Merge branch 'for-linus' of git://github.com/tiwai/sound

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 20 Sep 2011 17:21:52 +0000 (10:21 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 20 Sep 2011 17:21:52 +0000 (10:21 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 20 Sep 2011 17:21:52 +0000 (10:21 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 20 Sep 2011 17:21:52 +0000 (10:21 -0700)
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX

index 1f89424c36a6036a9daecd3e0b2d0aca93bae3e9..65bbd26223962209b7ea378eec541c3ac6479320 100644 (file)
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -272,6 +272,8 @@ printk-formats.txt
         - how to get printk format specifiers right
  prio_tree.txt
         - info on radix-priority-search-tree use for indexing vmas.
+ramoops.txt
+       - documentation of the ramoops oops/panic logging module.
  rbtree.txt
         - info on what red-black trees are and what they are for.
  robust-futex-ABI.txt
diff --git a/Documentation/ABI/testing/sysfs-class-scsi_host b/Documentation/ABI/testing/sysfs-class-scsi_host

new file mode 100644 (file)

index 0000000..29a4f89
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-scsi_host
@@ -0,0 +1,13 @@
+What:          /sys/class/scsi_host/hostX/isci_id
+Date:          June 2011
+Contact:       Dave Jiang <dave.jiang@intel.com>
+Description:
+               This file contains the enumerated host ID for the Intel
+               SCU controller. The Intel(R) C600 Series Chipset SATA/SAS
+               Storage Control Unit embeds up to two 4-port controllers in
+               a single PCI device.  The controllers are enumerated in order
+               which usually means the lowest number scsi_host corresponds
+               with the first controller, but this association is not
+               guaranteed.  The 'isci_id' attribute unambiguously identifies
+               the controller index: '0' for the first controller,
+               '1' for the second.
diff --git a/Documentation/DocBook/media/v4l/controls.xml b/Documentation/DocBook/media/v4l/controls.xml

index 85164016ed26c28e736651cd4c7199224ed503a6..23fdf79f8cf366fbecb9e10b1fd2ad70b2905e21 100644 (file)
--- a/Documentation/DocBook/media/v4l/controls.xml
+++ b/Documentation/DocBook/media/v4l/controls.xml
@@ -1455,7 +1455,7 @@ Applicable to the H264 encoder.</entry>
               </row>
  
               <row><entry></entry></row>
-             <row>
+             <row id="v4l2-mpeg-video-h264-vui-sar-idc">
                 <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_IDC</constant>&nbsp;</entry>
                 <entry>enum&nbsp;v4l2_mpeg_video_h264_vui_sar_idc</entry>
               </row>
@@ -1561,7 +1561,7 @@ Applicable to the H264 encoder.</entry>
               </row>
  
               <row><entry></entry></row>
-             <row>
+             <row id="v4l2-mpeg-video-h264-level">
                 <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_LEVEL</constant>&nbsp;</entry>
                 <entry>enum&nbsp;v4l2_mpeg_video_h264_level</entry>
               </row>
@@ -1641,7 +1641,7 @@ Possible values are:</entry>
               </row>
  
               <row><entry></entry></row>
-             <row>
+             <row id="v4l2-mpeg-video-mpeg4-level">
                 <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL</constant>&nbsp;</entry>
                 <entry>enum&nbsp;v4l2_mpeg_video_mpeg4_level</entry>
               </row>
@@ -1689,9 +1689,9 @@ Possible values are:</entry>
               </row>
  
               <row><entry></entry></row>
-             <row>
+             <row id="v4l2-mpeg-video-h264-profile">
                 <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_PROFILE</constant>&nbsp;</entry>
-               <entry>enum&nbsp;v4l2_mpeg_h264_profile</entry>
+               <entry>enum&nbsp;v4l2_mpeg_video_h264_profile</entry>
               </row>
               <row><entry spanname="descr">The profile information for H264.
  Applicable to the H264 encoder.
@@ -1774,9 +1774,9 @@ Possible values are:</entry>
               </row>
  
               <row><entry></entry></row>
-             <row>
+             <row id="v4l2-mpeg-video-mpeg4-profile">
                 <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE</constant>&nbsp;</entry>
-               <entry>enum&nbsp;v4l2_mpeg_mpeg4_profile</entry>
+               <entry>enum&nbsp;v4l2_mpeg_video_mpeg4_profile</entry>
               </row>
               <row><entry spanname="descr">The profile information for MPEG4.
  Applicable to the MPEG4 encoder.
@@ -1820,9 +1820,9 @@ Applicable to the encoder.
               </row>
  
               <row><entry></entry></row>
-             <row>
+             <row id="v4l2-mpeg-video-multi-slice-mode">
                 <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MODE</constant>&nbsp;</entry>
-               <entry>enum&nbsp;v4l2_mpeg_multi_slice_mode</entry>
+               <entry>enum&nbsp;v4l2_mpeg_video_multi_slice_mode</entry>
               </row>
               <row><entry spanname="descr">Determines how the encoder should handle division of frame into slices.
  Applicable to the encoder.
@@ -1868,9 +1868,9 @@ Applicable to the encoder.</entry>
               </row>
  
               <row><entry></entry></row>
-             <row>
+             <row id="v4l2-mpeg-video-h264-loop-filter-mode">
                 <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_MODE</constant>&nbsp;</entry>
-               <entry>enum&nbsp;v4l2_mpeg_h264_loop_filter_mode</entry>
+               <entry>enum&nbsp;v4l2_mpeg_video_h264_loop_filter_mode</entry>
               </row>
               <row><entry spanname="descr">Loop filter mode for H264 encoder.
  Possible values are:</entry>
@@ -1913,9 +1913,9 @@ Applicable to the H264 encoder.</entry>
               </row>
  
               <row><entry></entry></row>
-             <row>
+             <row id="v4l2-mpeg-video-h264-entropy-mode">
                 <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_ENTROPY_MODE</constant>&nbsp;</entry>
-               <entry>enum&nbsp;v4l2_mpeg_h264_symbol_mode</entry>
+               <entry>enum&nbsp;v4l2_mpeg_video_h264_entropy_mode</entry>
               </row>
               <row><entry spanname="descr">Entropy coding mode for H264 - CABAC/CAVALC.
  Applicable to the H264 encoder.
@@ -2140,9 +2140,9 @@ previous frames. Applicable to the H264 encoder.</entry>
               </row>
  
               <row><entry></entry></row>
-             <row>
+             <row id="v4l2-mpeg-video-header-mode">
                 <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_HEADER_MODE</constant>&nbsp;</entry>
-               <entry>enum&nbsp;v4l2_mpeg_header_mode</entry>
+               <entry>enum&nbsp;v4l2_mpeg_video_header_mode</entry>
               </row>
               <row><entry spanname="descr">Determines whether the header is returned as the first buffer or is
  it returned together with the first frame. Applicable to encoders.
@@ -2320,9 +2320,9 @@ Valid only when H.264 and macroblock level RC is enabled (<constant>V4L2_CID_MPE
  Applicable to the H264 encoder.</entry>
               </row>
               <row><entry></entry></row>
-             <row>
+             <row id="v4l2-mpeg-mfc51-video-frame-skip-mode">
                 <entry spanname="id"><constant>V4L2_CID_MPEG_MFC51_VIDEO_FRAME_SKIP_MODE</constant>&nbsp;</entry>
-               <entry>enum&nbsp;v4l2_mpeg_mfc51_frame_skip_mode</entry>
+               <entry>enum&nbsp;v4l2_mpeg_mfc51_video_frame_skip_mode</entry>
               </row>
               <row><entry spanname="descr">
  Indicates in what conditions the encoder should skip frames. If encoding a frame would cause the encoded stream to be larger then
@@ -2361,9 +2361,9 @@ the stream will meet tight bandwidth contraints. Applicable to encoders.
  </entry>
               </row>
               <row><entry></entry></row>
-             <row>
+             <row id="v4l2-mpeg-mfc51-video-force-frame-type">
                 <entry spanname="id"><constant>V4L2_CID_MPEG_MFC51_VIDEO_FORCE_FRAME_TYPE</constant>&nbsp;</entry>
-               <entry>enum&nbsp;v4l2_mpeg_mfc51_force_frame_type</entry>
+               <entry>enum&nbsp;v4l2_mpeg_mfc51_video_force_frame_type</entry>
               </row>
               <row><entry spanname="descr">Force a frame type for the next queued buffer. Applicable to encoders.
  Possible values are:</entry>
diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt

index 3f5e0b09bed5b29efe2493a3c05857e8c7a56901..53e6fca146d73919e9b7dbc4fd32631a18141795 100644 (file)
--- a/Documentation/PCI/MSI-HOWTO.txt
+++ b/Documentation/PCI/MSI-HOWTO.txt
@@ -45,7 +45,7 @@ arrived in memory (this becomes more likely with devices behind PCI-PCI
  bridges).  In order to ensure that all the data has arrived in memory,
  the interrupt handler must read a register on the device which raised
  the interrupt.  PCI transaction ordering rules require that all the data
-arrives in memory before the value can be returned from the register.
+arrive in memory before the value may be returned from the register.
  Using MSIs avoids this problem as the interrupt-generating write cannot
  pass the data writes, so by the time the interrupt is raised, the driver
  knows that all the data has arrived in memory.
@@ -86,13 +86,13 @@ device.
  
  int pci_enable_msi(struct pci_dev *dev)
  
-A successful call will allocate ONE interrupt to the device, regardless
-of how many MSIs the device supports.  The device will be switched from
+A successful call allocates ONE interrupt to the device, regardless
+of how many MSIs the device supports.  The device is switched from
  pin-based interrupt mode to MSI mode.  The dev->irq number is changed
-to a new number which represents the message signaled interrupt.
-This function should be called before the driver calls request_irq()
-since enabling MSIs disables the pin-based IRQ and the driver will not
-receive interrupts on the old interrupt.
+to a new number which represents the message signaled interrupt;
+consequently, this function should be called before the driver calls
+request_irq(), because an MSI is delivered via a vector that is
+different from the vector of a pin-based interrupt.
  
  4.2.2 pci_enable_msi_block
  
@@ -111,20 +111,20 @@ the device are in the range dev->irq to dev->irq + count - 1.
  
  If this function returns a negative number, it indicates an error and
  the driver should not attempt to request any more MSI interrupts for
-this device.  If this function returns a positive number, it will be
-less than 'count' and indicate the number of interrupts that could have
-been allocated.  In neither case will the irq value have been
-updated, nor will the device have been switched into MSI mode.
+this device.  If this function returns a positive number, it is
+less than 'count' and indicates the number of interrupts that could have
+been allocated.  In neither case is the irq value updated or the device
+switched into MSI mode.
  
  The device driver must decide what action to take if
-pci_enable_msi_block() returns a value less than the number asked for.
-Some devices can make use of fewer interrupts than the maximum they
-request; in this case the driver should call pci_enable_msi_block()
+pci_enable_msi_block() returns a value less than the number requested.
+For instance, the driver could still make use of fewer interrupts;
+in this case the driver should call pci_enable_msi_block()
  again.  Note that it is not guaranteed to succeed, even when the
  'count' has been reduced to the value returned from a previous call to
  pci_enable_msi_block().  This is because there are multiple constraints
  on the number of vectors that can be allocated; pci_enable_msi_block()
-will return as soon as it finds any constraint that doesn't allow the
+returns as soon as it finds any constraint that doesn't allow the
  call to succeed.
  
  4.2.3 pci_disable_msi
@@ -137,10 +137,10 @@ interrupt number and frees the previously allocated message signaled
  interrupt(s).  The interrupt may subsequently be assigned to another
  device, so drivers should not cache the value of dev->irq.
  
-A device driver must always call free_irq() on the interrupt(s)
-for which it has called request_irq() before calling this function.
-Failure to do so will result in a BUG_ON(), the device will be left with
-MSI enabled and will leak its vector.
+Before calling this function, a device driver must always call free_irq()
+on any interrupt for which it previously called request_irq().
+Failure to do so results in a BUG_ON(), leaving the device with
+MSI enabled and thus leaking its vector.
  
  4.3 Using MSI-X
  
@@ -155,10 +155,10 @@ struct msix_entry {
  };
  
  This allows for the device to use these interrupts in a sparse fashion;
-for example it could use interrupts 3 and 1027 and allocate only a
+for example, it could use interrupts 3 and 1027 and yet allocate only a
  two-element array.  The driver is expected to fill in the 'entry' value
-in each element of the array to indicate which entries it wants the kernel
-to assign interrupts for.  It is invalid to fill in two entries with the
+in each element of the array to indicate for which entries the kernel
+should assign interrupts; it is invalid to fill in two entries with the
  same number.
  
  4.3.1 pci_enable_msix
@@ -168,10 +168,11 @@ int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
  Calling this function asks the PCI subsystem to allocate 'nvec' MSIs.
  The 'entries' argument is a pointer to an array of msix_entry structs
  which should be at least 'nvec' entries in size.  On success, the
-function will return 0 and the device will have been switched into
-MSI-X interrupt mode.  The 'vector' elements in each entry will have
-been filled in with the interrupt number.  The driver should then call
-request_irq() for each 'vector' that it decides to use.
+device is switched into MSI-X mode and the function returns 0.
+The 'vector' member in each entry is populated with the interrupt number;
+the driver should then call request_irq() for each 'vector' that it
+decides to use.  The device driver is responsible for keeping track of the
+interrupts assigned to the MSI-X vectors so it can free them again later.
  
  If this function returns a negative number, it indicates an error and
  the driver should not attempt to allocate any more MSI-X interrupts for
@@ -181,16 +182,14 @@ below.
  
  This function, in contrast with pci_enable_msi(), does not adjust
  dev->irq.  The device will not generate interrupts for this interrupt
-number once MSI-X is enabled.  The device driver is responsible for
-keeping track of the interrupts assigned to the MSI-X vectors so it can
-free them again later.
+number once MSI-X is enabled.
  
  Device drivers should normally call this function once per device
  during the initialization phase.
  
-It is ideal if drivers can cope with a variable number of MSI-X interrupts,
+It is ideal if drivers can cope with a variable number of MSI-X interrupts;
  there are many reasons why the platform may not be able to provide the
-exact number a driver asks for.
+exact number that a driver asks for.
  
  A request loop to achieve that might look like:
  
@@ -212,15 +211,15 @@ static int foo_driver_enable_msix(struct foo_adapter *adapter, int nvec)
  
  void pci_disable_msix(struct pci_dev *dev)
  
-This API should be used to undo the effect of pci_enable_msix().  It frees
+This function should be used to undo the effect of pci_enable_msix().  It frees
  the previously allocated message signaled interrupts.  The interrupts may
  subsequently be assigned to another device, so drivers should not cache
  the value of the 'vector' elements over a call to pci_disable_msix().
  
-A device driver must always call free_irq() on the interrupt(s)
-for which it has called request_irq() before calling this function.
-Failure to do so will result in a BUG_ON(), the device will be left with
-MSI enabled and will leak its vector.
+Before calling this function, a device driver must always call free_irq()
+on any interrupt for which it previously called request_irq().
+Failure to do so results in a BUG_ON(), leaving the device with
+MSI-X enabled and thus leaking its vector.
  
  4.3.3 The MSI-X Table
  
@@ -232,10 +231,10 @@ mask or unmask an interrupt, it should call disable_irq() / enable_irq().
  4.4 Handling devices implementing both MSI and MSI-X capabilities
  
  If a device implements both MSI and MSI-X capabilities, it can
-run in either MSI mode or MSI-X mode but not both simultaneously.
+run in either MSI mode or MSI-X mode, but not both simultaneously.
  This is a requirement of the PCI spec, and it is enforced by the
  PCI layer.  Calling pci_enable_msi() when MSI-X is already enabled or
-pci_enable_msix() when MSI is already enabled will result in an error.
+pci_enable_msix() when MSI is already enabled results in an error.
  If a device driver wishes to switch between MSI and MSI-X at runtime,
  it must first quiesce the device, then switch it back to pin-interrupt
  mode, before calling pci_enable_msi() or pci_enable_msix() and resuming
@@ -251,7 +250,7 @@ the MSI-X facilities in preference to the MSI facilities.  As mentioned
  above, MSI-X supports any number of interrupts between 1 and 2048.
  In constrast, MSI is restricted to a maximum of 32 interrupts (and
  must be a power of two).  In addition, the MSI interrupt vectors must
-be allocated consecutively, so the system may not be able to allocate
+be allocated consecutively, so the system might not be able to allocate
  as many vectors for MSI as it could for MSI-X.  On some platforms, MSI
  interrupts must all be targeted at the same set of CPUs whereas MSI-X
  interrupts can all be targeted at different CPUs.
@@ -281,7 +280,7 @@ disabled to enabled and back again.
  
  Using 'lspci -v' (as root) may show some devices with "MSI", "Message
  Signalled Interrupts" or "MSI-X" capabilities.  Each of these capabilities
-has an 'Enable' flag which will be followed with either "+" (enabled)
+has an 'Enable' flag which is followed with either "+" (enabled)
  or "-" (disabled).
  
  
@@ -298,7 +297,7 @@ The PCI stack provides three ways to disable MSIs:
  
  Some host chipsets simply don't support MSIs properly.  If we're
  lucky, the manufacturer knows this and has indicated it in the ACPI
-FADT table.  In this case, Linux will automatically disable MSIs.
+FADT table.  In this case, Linux automatically disables MSIs.
  Some boards don't include this information in the table and so we have
  to detect them ourselves.  The complete list of these is found near the
  quirk_disable_all_msi() function in drivers/pci/quirks.c.
@@ -317,7 +316,7 @@ Some bridges allow you to enable MSIs by changing some bits in their
  PCI configuration space (especially the Hypertransport chipsets such
  as the nVidia nForce and Serverworks HT2000).  As with host chipsets,
  Linux mostly knows about them and automatically enables MSIs if it can.
-If you have a bridge which Linux doesn't yet know about, you can enable
+If you have a bridge unknown to Linux, you can enable
  MSIs in configuration space using whatever method you know works, then
  enable MSIs on that bridge by doing:
  
@@ -327,7 +326,7 @@ where $bridge is the PCI address of the bridge you've enabled (eg
  0000:00:0e.0).
  
  To disable MSIs, echo 0 instead of 1.  Changing this value should be
-done with caution as it can break interrupt handling for all devices
+done with caution as it could break interrupt handling for all devices
  below this bridge.
  
  Again, please notify linux-pci@vger.kernel.org of any bridges that need
@@ -336,7 +335,7 @@ special handling.
  5.3. Disabling MSIs on a single device
  
  Some devices are known to have faulty MSI implementations.  Usually this
-is handled in the individual device driver but occasionally it's necessary
+is handled in the individual device driver, but occasionally it's necessary
  to handle this with a quirk.  Some drivers have an option to disable use
  of MSI.  While this is a convenient workaround for the driver author,
  it is not good practise, and should not be emulated.
@@ -350,7 +349,7 @@ for your machine.  You should also check your .config to be sure you
  have enabled CONFIG_PCI_MSI.
  
  Then, 'lspci -t' gives the list of bridges above a device.  Reading
-/sys/bus/pci/devices/*/msi_bus will tell you whether MSI are enabled (1)
+/sys/bus/pci/devices/*/msi_bus will tell you whether MSIs are enabled (1)
  or disabled (0).  If 0 is found in any of the msi_bus files belonging
  to bridges between the PCI root and the device, MSIs are disabled.
  
diff --git a/Documentation/SubmittingDrivers b/Documentation/SubmittingDrivers

index 319baa8b60dd727d82c5e8fa062c13e3bb3126d5..36d16bbf72c6160ac3f679afbe91854978e8194a 100644 (file)
--- a/Documentation/SubmittingDrivers
+++ b/Documentation/SubmittingDrivers
@@ -130,7 +130,7 @@ Linux kernel master tree:
         ftp.??.kernel.org:/pub/linux/kernel/...
         ?? == your country code, such as "us", "uk", "fr", etc.
  
-       http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git
+       http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git
  
  Linux kernel mailing list:
         linux-kernel@vger.kernel.org
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches

index 569f3532e1383cb3e67dcba7b77990b5f18a1411..4468ce24427cb011e7991d8f1ae2560764b170b8 100644 (file)
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -303,7 +303,7 @@ patches that are being emailed around.
  
  The sign-off is a simple line at the end of the explanation for the
  patch, which certifies that you wrote it or otherwise have the right to
-pass it on as a open-source patch.  The rules are pretty simple: if you
+pass it on as an open-source patch.  The rules are pretty simple: if you
  can certify the below:
  
          Developer's Certificate of Origin 1.1
diff --git a/Documentation/block/cfq-iosched.txt b/Documentation/block/cfq-iosched.txt

index e578feed6d818f13b148f16cba3dbca8505362fc..6d670f570451a14c1ce4f8c2eb1c69d52736ee01 100644 (file)
--- a/Documentation/block/cfq-iosched.txt
+++ b/Documentation/block/cfq-iosched.txt
@@ -43,3 +43,74 @@ If one sets slice_idle=0 and if storage supports NCQ, CFQ internally switches
  to IOPS mode and starts providing fairness in terms of number of requests
  dispatched. Note that this mode switching takes effect only for group
  scheduling. For non-cgroup users nothing should change.
+
+CFQ IO scheduler Idling Theory
+===============================
+Idling on a queue is primarily about waiting for the next request to come
+on same queue after completion of a request. In this process CFQ will not
+dispatch requests from other cfq queues even if requests are pending there.
+
+The rationale behind idling is that it can cut down on number of seeks
+on rotational media. For example, if a process is doing dependent
+sequential reads (next read will come on only after completion of previous
+one), then not dispatching request from other queue should help as we
+did not move the disk head and kept on dispatching sequential IO from
+one queue.
+
+CFQ has following service trees and various queues are put on these trees.
+
+       sync-idle       sync-noidle     async
+
+All cfq queues doing synchronous sequential IO go on to sync-idle tree.
+On this tree we idle on each queue individually.
+
+All synchronous non-sequential queues go on sync-noidle tree. Also any
+request which are marked with REQ_NOIDLE go on this service tree. On this
+tree we do not idle on individual queues instead idle on the whole group
+of queues or the tree. So if there are 4 queues waiting for IO to dispatch
+we will idle only once last queue has dispatched the IO and there is
+no more IO on this service tree.
+
+All async writes go on async service tree. There is no idling on async
+queues.
+
+CFQ has some optimizations for SSDs and if it detects a non-rotational
+media which can support higher queue depth (multiple requests at in
+flight at a time), then it cuts down on idling of individual queues and
+all the queues move to sync-noidle tree and only tree idle remains. This
+tree idling provides isolation with buffered write queues on async tree.
+
+FAQ
+===
+Q1. Why to idle at all on queues marked with REQ_NOIDLE.
+
+A1. We only do tree idle (all queues on sync-noidle tree) on queues marked
+    with REQ_NOIDLE. This helps in providing isolation with all the sync-idle
+    queues. Otherwise in presence of many sequential readers, other
+    synchronous IO might not get fair share of disk.
+
+    For example, if there are 10 sequential readers doing IO and they get
+    100ms each. If a REQ_NOIDLE request comes in, it will be scheduled
+    roughly after 1 second. If after completion of REQ_NOIDLE request we
+    do not idle, and after a couple of milli seconds a another REQ_NOIDLE
+    request comes in, again it will be scheduled after 1second. Repeat it
+    and notice how a workload can lose its disk share and suffer due to
+    multiple sequential readers.
+
+    fsync can generate dependent IO where bunch of data is written in the
+    context of fsync, and later some journaling data is written. Journaling
+    data comes in only after fsync has finished its IO (atleast for ext4
+    that seemed to be the case). Now if one decides not to idle on fsync
+    thread due to REQ_NOIDLE, then next journaling write will not get
+    scheduled for another second. A process doing small fsync, will suffer
+    badly in presence of multiple sequential readers.
+
+    Hence doing tree idling on threads using REQ_NOIDLE flag on requests
+    provides isolation from multiple sequential readers and at the same
+    time we do not idle on individual threads.
+
+Q2. When to specify REQ_NOIDLE
+A2. I would think whenever one is doing synchronous write and not expecting
+    more writes to be dispatched from same context soon, should be able
+    to specify REQ_NOIDLE on writes and that probably should work well for
+    most of the cases.
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt

index 6f3c598971fc3ef05d2ebfb0e6e8879b3047d839..06eb6d957c83097b85fd15e87e94b8ed7edfe1cf 100644 (file)
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -380,7 +380,7 @@ will be charged as a new owner of it.
  
  5.2 stat file
  
-5.2.1 memory.stat file includes following statistics
+memory.stat file includes following statistics
  
  # per-memory cgroup local status
  cache          - # of bytes of page cache memory.
@@ -438,89 +438,6 @@ Note:
          file_mapped is accounted only when the memory cgroup is owner of page
          cache.)
  
-5.2.2 memory.vmscan_stat
-
-memory.vmscan_stat includes statistics information for memory scanning and
-freeing, reclaiming. The statistics shows memory scanning information since
-memory cgroup creation and can be reset to 0 by writing 0 as
-
- #echo 0 > ../memory.vmscan_stat
-
-This file contains following statistics.
-
-[param]_[file_or_anon]_pages_by_[reason]_[under_heararchy]
-[param]_elapsed_ns_by_[reason]_[under_hierarchy]
-
-For example,
-
-  scanned_file_pages_by_limit indicates the number of scanned
-  file pages at vmscan.
-
-Now, 3 parameters are supported
-
-  scanned - the number of pages scanned by vmscan
-  rotated - the number of pages activated at vmscan
-  freed   - the number of pages freed by vmscan
-
-If "rotated" is high against scanned/freed, the memcg seems busy.
-
-Now, 2 reason are supported
-
-  limit - the memory cgroup's limit
-  system - global memory pressure + softlimit
-           (global memory pressure not under softlimit is not handled now)
-
-When under_hierarchy is added in the tail, the number indicates the
-total memcg scan of its children and itself.
-
-elapsed_ns is a elapsed time in nanosecond. This may include sleep time
-and not indicates CPU usage. So, please take this as just showing
-latency.
-
-Here is an example.
-
-# cat /cgroup/memory/A/memory.vmscan_stat
-scanned_pages_by_limit 9471864
-scanned_anon_pages_by_limit 6640629
-scanned_file_pages_by_limit 2831235
-rotated_pages_by_limit 4243974
-rotated_anon_pages_by_limit 3971968
-rotated_file_pages_by_limit 272006
-freed_pages_by_limit 2318492
-freed_anon_pages_by_limit 962052
-freed_file_pages_by_limit 1356440
-elapsed_ns_by_limit 351386416101
-scanned_pages_by_system 0
-scanned_anon_pages_by_system 0
-scanned_file_pages_by_system 0
-rotated_pages_by_system 0
-rotated_anon_pages_by_system 0
-rotated_file_pages_by_system 0
-freed_pages_by_system 0
-freed_anon_pages_by_system 0
-freed_file_pages_by_system 0
-elapsed_ns_by_system 0
-scanned_pages_by_limit_under_hierarchy 9471864
-scanned_anon_pages_by_limit_under_hierarchy 6640629
-scanned_file_pages_by_limit_under_hierarchy 2831235
-rotated_pages_by_limit_under_hierarchy 4243974
-rotated_anon_pages_by_limit_under_hierarchy 3971968
-rotated_file_pages_by_limit_under_hierarchy 272006
-freed_pages_by_limit_under_hierarchy 2318492
-freed_anon_pages_by_limit_under_hierarchy 962052
-freed_file_pages_by_limit_under_hierarchy 1356440
-elapsed_ns_by_limit_under_hierarchy 351386416101
-scanned_pages_by_system_under_hierarchy 0
-scanned_anon_pages_by_system_under_hierarchy 0
-scanned_file_pages_by_system_under_hierarchy 0
-rotated_pages_by_system_under_hierarchy 0
-rotated_anon_pages_by_system_under_hierarchy 0
-rotated_file_pages_by_system_under_hierarchy 0
-freed_pages_by_system_under_hierarchy 0
-freed_anon_pages_by_system_under_hierarchy 0
-freed_file_pages_by_system_under_hierarchy 0
-elapsed_ns_by_system_under_hierarchy 0
-
  5.3 swappiness
  
  Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only.
diff --git a/Documentation/email-clients.txt b/Documentation/email-clients.txt

index a0b58e29f91171cf8616fd2aaa93cc856c985dd9..860c29a472adfc13af02641001bd03dfe04b9cb6 100644 (file)
--- a/Documentation/email-clients.txt
+++ b/Documentation/email-clients.txt
@@ -199,18 +199,16 @@ to coerce it into behaving.
  
  To beat some sense out of the internal editor, do this:
  
-- Under account settings, composition and addressing, uncheck "Compose
-  messages in HTML format".
-
  - Edit your Thunderbird config settings so that it won't use format=flowed.
    Go to "edit->preferences->advanced->config editor" to bring up the
    thunderbird's registry editor, and set "mailnews.send_plaintext_flowed" to
    "false".
  
-- Enable "preformat" mode: Shft-click on the Write icon to bring up the HTML
-  composer, select "Preformat" from the drop-down box just under the subject
-  line, then close the message without saving.  (This setting also applies to
-  the text composer, but the only control for it is in the HTML composer.)
+- Disable HTML Format: Set "mail.identity.id1.compose_html" to "false".
+
+- Enable "preformat" mode: Set "editor.quotesPreformatted" to "true".
+
+- Enable UTF8: Set "prefs.converted-to-utf8" to "true".
  
  - Install the "toggle wordwrap" extension.  Download the file from:
      https://addons.mozilla.org/thunderbird/addon/2351/
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt

index c4a6e148732ae549be7e25b8863912a2f924d254..4dc4654776657b68f35161ac642e9caa187d15e4 100644 (file)
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -592,3 +592,11 @@ Why:    In 3.0, we can now autodetect internal 3G device and already have
         interface that was used by acer-wmi driver. It will replaced by
         information log when acer-wmi initial.
  Who:    Lee, Chun-Yi <jlee@novell.com>
+
+----------------------------
+What:  The XFS nodelaylog mount option
+When:  3.3
+Why:   The delaylog mode that has been the default since 2.6.39 has proven
+       stable, and the old code is in the way of additional improvements in
+       the log code.
+Who:   Christoph Hellwig <hch@lst.de>
diff --git a/Documentation/filesystems/befs.txt b/Documentation/filesystems/befs.txt

index 6e49c363938e8b9e76833ca1c0da3106912a4449..da45e6c842b889189fffbb05720a5605b51902ba 100644 (file)
--- a/Documentation/filesystems/befs.txt
+++ b/Documentation/filesystems/befs.txt
@@ -27,7 +27,7 @@ His original code can still be found at:
  Does anyone know of a more current email address for Makoto? He doesn't
  respond to the address given above...
  
-Current maintainer: Sergey S. Kostyliov <rathamahata@php4.ru>
+This filesystem doesn't have a maintainer.
  
  WHAT IS THIS DRIVER?
  ==================
diff --git a/Documentation/hwmon/max16065 b/Documentation/hwmon/max16065

index 44b4f61e04f9e3195c32602f77b2fd310dbbf980..c11f64a1f2adb61077f5910215981d589503aa0f 100644 (file)
--- a/Documentation/hwmon/max16065
+++ b/Documentation/hwmon/max16065
@@ -62,6 +62,13 @@ can be safely used to identify the chip. You will have to instantiate
  the devices explicitly. Please see Documentation/i2c/instantiating-devices for
  details.
  
+WARNING: Do not access chip registers using the i2cdump command, and do not use
+any of the i2ctools commands on a command register (0xa5 to 0xac). The chips
+supported by this driver interpret any access to a command register (including
+read commands) as request to execute the command in question. This may result in
+power loss, board resets, and/or Flash corruption. Worst case, your board may
+turn into a brick.
+
  
  Sysfs entries
  -------------
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt

index 845a191004b1fd20373b6b3738f8404b0d85c2c4..54078ed96b3751724acaf0bd3fdab95df962480f 100644 (file)
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -319,4 +319,6 @@ Code  Seq#(hex)     Include File            Comments
                                         <mailto:thomas@winischhofer.net>
  0xF4   00-1F   video/mbxfb.h           mbxfb
                                         <mailto:raph@8d.com>
+0xF6   all     LTTng                   Linux Trace Toolkit Next Generation
+                                       <mailto:mathieu.desnoyers@efficios.com>
  0xFD   all     linux/dm-ioctl.h
diff --git a/Documentation/kernel-docs.txt b/Documentation/kernel-docs.txt

index 9a8674629a07598eb553970e1207b31b42ec0316..0e0734b509d84097f1ed02e653cf0f4bfbadc6c0 100644 (file)
--- a/Documentation/kernel-docs.txt
+++ b/Documentation/kernel-docs.txt
@@ -620,17 +620,6 @@
         (including this document itself) have been moved there, and might
         be more up to date than the web version.
  
-     * Name: "Linux Source Driver"
-       URL: http://lsd.linux.cz
-       Keywords: Browsing source code.
-       Description: "Linux Source Driver (LSD) is an application, which
-       can make browsing source codes of Linux kernel easier than you can
-       imagine. You can select between multiple versions of kernel (e.g.
-       0.01, 1.0.0, 2.0.33, 2.0.34pre13, 2.0.0, 2.1.101 etc.). With LSD
-       you can search Linux kernel (fulltext, macros, types, functions
-       and variables) and LSD can generate patches for you on the fly
-       (files, directories or kernel)".
-
       * Name: "Linux Kernel Source Reference"
         Author: Thomas Graichen.
         URL: http://marc.info/?l=linux-kernel&m=96446640102205&w=4
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt

index e279b724291239677041f3d4a3f002e566db8ae6..854ed5ca7e3f122f5ac808b462eb14b9ee6b1977 100644 (file)
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -40,6 +40,7 @@ parameter is applicable:
         ALSA    ALSA sound support is enabled.
         APIC    APIC support is enabled.
         APM     Advanced Power Management support is enabled.
+       ARM     ARM architecture is enabled.
         AVR32   AVR32 architecture is enabled.
         AX25    Appropriate AX.25 support is enabled.
         BLACKFIN Blackfin architecture is enabled.
@@ -49,6 +50,7 @@ parameter is applicable:
         EFI     EFI Partitioning (GPT) is enabled
         EIDE    EIDE/ATAPI support is enabled.
         FB      The frame buffer device is enabled.
+       FTRACE  Function tracing enabled.
         GCOV    GCOV profiling is enabled.
         HW      Appropriate hardware is enabled.
         IA-64   IA-64 architecture is enabled.
@@ -69,6 +71,7 @@ parameter is applicable:
                         Documentation/m68k/kernel-options.txt.
         MCA     MCA bus support is enabled.
         MDA     MDA console support is enabled.
+       MIPS    MIPS architecture is enabled.
         MOUSE   Appropriate mouse support is enabled.
         MSI     Message Signaled Interrupts (PCI).
         MTD     MTD (Memory Technology Device) support is enabled.
@@ -100,7 +103,6 @@ parameter is applicable:
         SPARC   Sparc architecture is enabled.
         SWSUSP  Software suspend (hibernation) is enabled.
         SUSPEND System suspend states are enabled.
-       FTRACE  Function tracing enabled.
         TPM     TPM drivers are enabled.
         TS      Appropriate touchscreen support is enabled.
         UMS     USB Mass Storage support is enabled.
@@ -115,7 +117,7 @@ parameter is applicable:
         X86-64  X86-64 architecture is enabled.
                         More X86-64 boot options can be found in
                         Documentation/x86/x86_64/boot-options.txt .
-       X86     Either 32bit or 64bit x86 (same as X86-32+X86-64)
+       X86     Either 32-bit or 64-bit x86 (same as X86-32+X86-64)
         XEN     Xen support is enabled
  
  In addition, the following text indicates that the option:
@@ -376,7 +378,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
         atkbd.softrepeat= [HW]
                         Use software keyboard repeat
  
-       autotest        [IA64]
+       autotest        [IA-64]
  
         baycom_epp=     [HW,AX25]
                         Format: <io>,<mode>
@@ -681,8 +683,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                 uart[8250],mmio32,<addr>[,options]
                         Start an early, polled-mode console on the 8250/16550
                         UART at the specified I/O port or MMIO address.
-                       MMIO inter-register address stride is either 8bit (mmio)
-                        or 32bit (mmio32).
+                       MMIO inter-register address stride is either 8-bit
+                       (mmio) or 32-bit (mmio32).
                         The options are the same as for ttyS, above.
  
         earlyprintk=    [X86,SH,BLACKFIN]
@@ -725,7 +727,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                         See Documentation/block/as-iosched.txt and
                         Documentation/block/deadline-iosched.txt for details.
  
-       elfcorehdr=     [IA64,PPC,SH,X86]
+       elfcorehdr=     [IA-64,PPC,SH,X86]
                         Specifies physical address of start of kernel core
                         image elf header. Generally kexec loader will
                         pass this option to capture kernel.
@@ -791,7 +793,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                         tracer at boot up. function-list is a comma separated
                         list of functions. This list can be changed at run
                         time by the set_ftrace_filter file in the debugfs
-                       tracing directory. 
+                       tracing directory.
  
         ftrace_notrace=[function-list]
                         [FTRACE] Do not trace the functions specified in
@@ -829,7 +831,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
  
         hashdist=       [KNL,NUMA] Large hashes allocated during boot
                         are distributed across NUMA nodes.  Defaults on
-                       for 64bit NUMA, off otherwise.
+                       for 64-bit NUMA, off otherwise.
                         Format: 0 | 1 (for off | on)
  
         hcl=            [IA-64] SGI's Hardware Graph compatibility layer
@@ -998,10 +1000,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                         DMA.
                 forcedac [x86_64]
                         With this option iommu will not optimize to look
-                       for io virtual address below 32 bit forcing dual
+                       for io virtual address below 32-bit forcing dual
                         address cycle on pci bus for cards supporting greater
-                       than 32 bit addressing. The default is to look
-                       for translation below 32 bit and if not available
+                       than 32-bit addressing. The default is to look
+                       for translation below 32-bit and if not available
                         then look in the higher range.
                 strict [Default Off]
                         With this option on every unmap_single operation will
@@ -1017,7 +1019,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                         off     disable Interrupt Remapping
                         nosid   disable Source ID checking
  
-       inttest=        [IA64]
+       inttest=        [IA-64]
  
         iomem=          Disable strict checking of access to MMIO memory
                 strict  regions from userspace.
@@ -1034,7 +1036,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                 nomerge
                 forcesac
                 soft
-               pt      [x86, IA64]
+               pt      [x86, IA-64]
  
         io7=            [HW] IO7 for Marvel based alpha systems
                         See comment before marvel_specify_io7 in
@@ -1165,7 +1167,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
  
         kvm-amd.npt=    [KVM,AMD] Disable nested paging (virtualized MMU)
                         for all guests.
-                       Default is 1 (enabled) if in 64bit or 32bit-PAE mode
+                       Default is 1 (enabled) if in 64-bit or 32-bit PAE mode.
  
         kvm-intel.ept=  [KVM,Intel] Disable extended page tables
                         (virtualized MMU) support on capable Intel chips.
@@ -1202,10 +1204,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                         libata.dma=0      Disable all PATA and SATA DMA
                         libata.dma=1      PATA and SATA Disk DMA only
                         libata.dma=2      ATAPI (CDROM) DMA only
-                       libata.dma=4      Compact Flash DMA only 
+                       libata.dma=4      Compact Flash DMA only
                         Combinations also work, so libata.dma=3 enables DMA
                         for disks and CDROMs, but not CFs.
-       
+
         libata.ignore_hpa=      [LIBATA] Ignore HPA limit
                         libata.ignore_hpa=0       keep BIOS limits (default)
                         libata.ignore_hpa=1       ignore limits, using full disk
@@ -1331,7 +1333,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
         ltpc=           [NET]
                         Format: <io>,<irq>,<dma>
  
-       machvec=        [IA64] Force the use of a particular machine-vector
+       machvec=        [IA-64] Force the use of a particular machine-vector
                         (machvec) in a generic kernel.
                         Example: machvec=hpzx1_swiotlb
  
@@ -1348,9 +1350,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                         it is equivalent to "nosmp", which also disables
                         the IO APIC.
  
-       max_loop=       [LOOP] Maximum number of loopback devices that can
-                       be mounted
-                       Format: <1-256>
+       max_loop=       [LOOP] The number of loop block devices that get
+       (loop.max_loop) unconditionally pre-created at init time. The default
+                       number is configured by BLK_DEV_LOOP_MIN_COUNT. Instead
+                       of statically allocating a predefined number, loop
+                       devices can be requested on-demand with the
+                       /dev/loop-control interface.
  
         mcatest=        [IA-64]
  
@@ -1734,7 +1739,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
  
         nointroute      [IA-64]
  
-       nojitter        [IA64] Disables jitter checking for ITC timers.
+       nojitter        [IA-64] Disables jitter checking for ITC timers.
  
         no-kvmclock     [X86,KVM] Disable paravirtualized KVM clock driver
  
@@ -1800,7 +1805,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
  
         nox2apic        [X86-64,APIC] Do not enable x2APIC mode.
  
-       nptcg=          [IA64] Override max number of concurrent global TLB
+       nptcg=          [IA-64] Override max number of concurrent global TLB
                         purges which is reported from either PAL_VM_SUMMARY or
                         SAL PALO.
  
@@ -2077,13 +2082,16 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                         Format: { parport<nr> | timid | 0 }
                         See also Documentation/parport.txt.
  
-       pmtmr=          [X86] Manual setup of pmtmr I/O Port. 
+       pmtmr=          [X86] Manual setup of pmtmr I/O Port.
                         Override pmtimer IOPort with a hex value.
                         e.g. pmtmr=0x508
  
-       pnp.debug       [PNP]
-                       Enable PNP debug messages.  This depends on the
-                       CONFIG_PNP_DEBUG_MESSAGES option.
+       pnp.debug=1     [PNP]
+                       Enable PNP debug messages (depends on the
+                       CONFIG_PNP_DEBUG_MESSAGES option).  Change at run-time
+                       via /sys/module/pnp/parameters/debug.  We always show
+                       current resource usage; turning this on also shows
+                       possible settings and some assignment information.
  
         pnpacpi=        [ACPI]
                         { off }
@@ -2635,6 +2643,16 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                                         medium is write-protected).
                         Example: quirks=0419:aaf5:rl,0421:0433:rc
  
+       user_debug=     [KNL,ARM]
+                       Format: <int>
+                       See arch/arm/Kconfig.debug help text.
+                                1 - undefined instruction events
+                                2 - system calls
+                                4 - invalid data aborts
+                                8 - SIGSEGV faults
+                               16 - SIGBUS faults
+                       Example: user_debug=31
+
         userpte=
                         [X86] Flags controlling user PTE allocations.
  
@@ -2680,6 +2698,27 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
         vmpoff=         [KNL,S390] Perform z/VM CP command after power off.
                         Format: <command>
  
+       vsyscall=       [X86-64]
+                       Controls the behavior of vsyscalls (i.e. calls to
+                       fixed addresses of 0xffffffffff600x00 from legacy
+                       code).  Most statically-linked binaries and older
+                       versions of glibc use these calls.  Because these
+                       functions are at fixed addresses, they make nice
+                       targets for exploits that can control RIP.
+
+                       emulate     [default] Vsyscalls turn into traps and are
+                                   emulated reasonably safely.
+
+                       native      Vsyscalls are native syscall instructions.
+                                   This is a little bit faster than trapping
+                                   and makes a few dynamic recompilers work
+                                   better than they would in emulation mode.
+                                   It also makes exploits much easier to write.
+
+                       none        Vsyscalls don't work at all.  This makes
+                                   them quite hard to use for exploits but
+                                   might break your system.
+
         vt.cur_default= [VT] Default cursor shape.
                         Format: 0xCCBBAA, where AA, BB, and CC are the same as
                         the parameters of the <Esc>[?A;B;Cc escape sequence;
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX

index 4edd78dfb3622f195a4b05d5948a8e39ceb43070..bbce1215434a9e70797ce679bb6a15c5c2aa29ab 100644 (file)
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -1,13 +1,21 @@
  00-INDEX
         - this file
+3c359.txt
+       - information on the 3Com TokenLink Velocity XL (3c5359) driver.
  3c505.txt
         - information on the 3Com EtherLink Plus (3c505) driver.
+3c509.txt
+       - information on the 3Com Etherlink III Series Ethernet cards.
  6pack.txt
         - info on the 6pack protocol, an alternative to KISS for AX.25
  DLINK.txt
         - info on the D-Link DE-600/DE-620 parallel port pocket adapters
  PLIP.txt
         - PLIP: The Parallel Line Internet Protocol device driver
+README.ipw2100
+       - README for the Intel PRO/Wireless 2100 driver.
+README.ipw2200
+       - README for the Intel PRO/Wireless 2915ABG and 2200BG driver.
  README.sb1000
         - info on General Instrument/NextLevel SURFboard1000 cable modem.
  alias.txt
@@ -20,8 +28,12 @@ atm.txt
         - info on where to get ATM programs and support for Linux.
  ax25.txt
         - info on using AX.25 and NET/ROM code for Linux
+batman-adv.txt
+       - B.A.T.M.A.N routing protocol on top of layer 2 Ethernet Frames.
  baycom.txt
         - info on the driver for Baycom style amateur radio modems
+bonding.txt
+       - Linux Ethernet Bonding Driver HOWTO: link aggregation in Linux.
  bridge.txt
         - where to get user space programs for ethernet bridging with Linux.
  can.txt
@@ -34,32 +46,60 @@ cxacru.txt
         - Conexant AccessRunner USB ADSL Modem
  cxacru-cf.py
         - Conexant AccessRunner USB ADSL Modem configuration file parser
+cxgb.txt
+       - Release Notes for the Chelsio N210 Linux device driver.
+dccp.txt
+       - the Datagram Congestion Control Protocol (DCCP) (RFC 4340..42).
  de4x5.txt
         - the Digital EtherWORKS DE4?? and DE5?? PCI Ethernet driver
  decnet.txt
         - info on using the DECnet networking layer in Linux.
  depca.txt
         - the Digital DEPCA/EtherWORKS DE1?? and DE2?? LANCE Ethernet driver
+dl2k.txt
+       - README for D-Link DL2000-based Gigabit Ethernet Adapters (dl2k.ko).
+dm9000.txt
+       - README for the Simtec DM9000 Network driver.
  dmfe.txt
         - info on the Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver.
+dns_resolver.txt
+       - The DNS resolver module allows kernel servies to make DNS queries.
+driver.txt
+       - Softnet driver issues.
  e100.txt
         - info on Intel's EtherExpress PRO/100 line of 10/100 boards
  e1000.txt
         - info on Intel's E1000 line of gigabit ethernet boards
+e1000e.txt
+       - README for the Intel Gigabit Ethernet Driver (e1000e).
  eql.txt
         - serial IP load balancing
  ewrk3.txt
         - the Digital EtherWORKS 3 DE203/4/5 Ethernet driver
+fib_trie.txt
+       - Level Compressed Trie (LC-trie) notes: a structure for routing.
  filter.txt
         - Linux Socket Filtering
  fore200e.txt
         - FORE Systems PCA-200E/SBA-200E ATM NIC driver info.
  framerelay.txt
         - info on using Frame Relay/Data Link Connection Identifier (DLCI).
+gen_stats.txt
+       - Generic networking statistics for netlink users.
+generic_hdlc.txt
+       - The generic High Level Data Link Control (HDLC) layer.
  generic_netlink.txt
         - info on Generic Netlink
+gianfar.txt
+       - Gianfar Ethernet Driver.
  ieee802154.txt
         - Linux IEEE 802.15.4 implementation, API and drivers
+ifenslave.c
+       - Configure network interfaces for parallel routing (bonding).
+igb.txt
+       - README for the Intel Gigabit Ethernet Driver (igb).
+igbvf.txt
+       - README for the Intel Gigabit Ethernet Driver (igbvf).
  ip-sysctl.txt
         - /proc/sys/net/ipv4/* variables
  ip_dynaddr.txt
@@ -68,41 +108,117 @@ ipddp.txt
         - AppleTalk-IP Decapsulation and AppleTalk-IP Encapsulation
  iphase.txt
         - Interphase PCI ATM (i)Chip IA Linux driver info.
+ipv6.txt
+       - Options to the ipv6 kernel module.
+ipvs-sysctl.txt
+       - Per-inode explanation of the /proc/sys/net/ipv4/vs interface.
  irda.txt
         - where to get IrDA (infrared) utilities and info for Linux.
+ixgb.txt
+       - README for the Intel 10 Gigabit Ethernet Driver (ixgb).
+ixgbe.txt
+       - README for the Intel 10 Gigabit Ethernet Driver (ixgbe).
+ixgbevf.txt
+       - README for the Intel Virtual Function (VF) Driver (ixgbevf).
+l2tp.txt
+       - User guide to the L2TP tunnel protocol.
  lapb-module.txt
         - programming information of the LAPB module.
  ltpc.txt
         - the Apple or Farallon LocalTalk PC card driver
+mac80211-injection.txt
+       - HOWTO use packet injection with mac80211
  multicast.txt
         - Behaviour of cards under Multicast
+multiqueue.txt
+       - HOWTO for multiqueue network device support.
+netconsole.txt
+       - The network console module netconsole.ko: configuration and notes.
+netdev-features.txt
+       - Network interface features API description.
  netdevices.txt
         - info on network device driver functions exported to the kernel.
+netif-msg.txt
+       - Design of the network interface message level setting (NETIF_MSG_*).
+nfc.txt
+       - The Linux Near Field Communication (NFS) subsystem.
  olympic.txt
         - IBM PCI Pit/Pit-Phy/Olympic Token Ring driver info.
+operstates.txt
+       - Overview of network interface operational states.
+packet_mmap.txt
+       - User guide to memory mapped packet socket rings (PACKET_[RT]X_RING).
+phonet.txt
+       - The Phonet packet protocol used in Nokia cellular modems.
+phy.txt
+       - The PHY abstraction layer.
+pktgen.txt
+       - User guide to the kernel packet generator (pktgen.ko).
  policy-routing.txt
         - IP policy-based routing
+ppp_generic.txt
+       - Information about the generic PPP driver.
+proc_net_tcp.txt
+       - Per inode overview of the /proc/net/tcp and /proc/net/tcp6 interfaces.
+radiotap-headers.txt
+       - Background on radiotap headers.
  ray_cs.txt
         - Raylink Wireless LAN card driver info.
+rds.txt
+       - Background on the reliable, ordered datagram delivery method RDS.
+regulatory.txt
+       - Overview of the Linux wireless regulatory infrastructure.
+rxrpc.txt
+       - Guide to the RxRPC protocol.
+s2io.txt
+       - Release notes for Neterion Xframe I/II 10GbE driver.
+scaling.txt
+       - Explanation of network scaling techniques: RSS, RPS, RFS, aRFS, XPS.
+sctp.txt
+       - Notes on the Linux kernel implementation of the SCTP protocol.
+secid.txt
+       - Explanation of the secid member in flow structures.
  skfp.txt
         - SysKonnect FDDI (SK-5xxx, Compaq Netelligent) driver info.
  smc9.txt
         - the driver for SMC's 9000 series of Ethernet cards
  smctr.txt
         - SMC TokenCard TokenRing Linux driver info.
+spider-net.txt
+       - README for the Spidernet Driver (as found in PS3 / Cell BE).
+stmmac.txt
+       - README for the STMicro Synopsys Ethernet driver.
+tc-actions-env-rules.txt
+       - rules for traffic control (tc) actions.
+timestamping.txt
+       - overview of network packet timestamping variants.
  tcp.txt
         - short blurb on how TCP output takes place.
+tcp-thin.txt
+       - kernel tuning options for low rate 'thin' TCP streams.
  tlan.txt
         - ThunderLAN (Compaq Netelligent 10/100, Olicom OC-2xxx) driver info.
  tms380tr.txt
         - SysKonnect Token Ring ISA/PCI adapter driver info.
+tproxy.txt
+       - Transparent proxy support user guide.
  tuntap.txt
         - TUN/TAP device driver, allowing user space Rx/Tx of packets.
+udplite.txt
+       - UDP-Lite protocol (RFC 3828) introduction.
  vortex.txt
         - info on using 3Com Vortex (3c590, 3c592, 3c595, 3c597) Ethernet cards.
+vxge.txt
+       - README for the Neterion X3100 PCIe Server Adapter.
  x25.txt
         - general info on X.25 development.
  x25-iface.txt
         - description of the X.25 Packet Layer to LAPB device interface.
+xfrm_proc.txt
+       - description of the statistics package for XFRM.
+xfrm_sync.txt
+       - sync patches for XFRM enable migration of an SA between hosts.
+xfrm_sysctl.txt
+       - description of the XFRM configuration options.
  z8530drv.txt
         - info about Linux driver for Z8530 based HDLC cards for AX.25
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt

index 5dd960d751743ba8a49b7e28c69628cf00807930..91df678fb7f88428f3989da217647d676e7121de 100644 (file)
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -238,6 +238,18 @@ ad_select
  
         This option was added in bonding version 3.4.0.
  
+all_slaves_active
+
+       Specifies that duplicate frames (received on inactive ports) should be
+       dropped (0) or delivered (1).
+
+       Normally, bonding will drop duplicate frames (received on inactive
+       ports), which is desirable for most users. But there are some times
+       it is nice to allow duplicate frames to be delivered.
+
+       The default value is 0 (drop duplicate frames received on inactive
+       ports).
+
  arp_interval
  
         Specifies the ARP link monitoring frequency in milliseconds.
@@ -433,6 +445,23 @@ miimon
         determined.  See the High Availability section for additional
         information.  The default value is 0.
  
+min_links
+
+       Specifies the minimum number of links that must be active before
+       asserting carrier. It is similar to the Cisco EtherChannel min-links
+       feature. This allows setting the minimum number of member ports that
+       must be up (link-up state) before marking the bond device as up
+       (carrier on). This is useful for situations where higher level services
+       such as clustering want to ensure a minimum number of low bandwidth
+       links are active before switchover. This option only affect 802.3ad
+       mode.
+
+       The default value is 0. This will cause carrier to be asserted (for
+       802.3ad mode) whenever there is an active aggregator, regardless of the
+       number of available links in that aggregator. Note that, because an
+       aggregator cannot be active without at least one available link,
+       setting this option to 0 or to 1 has the exact same effect.
+
  mode
  
         Specifies one of the bonding policies. The default is
diff --git a/Documentation/networking/dmfe.txt b/Documentation/networking/dmfe.txt

index 8006c227fda25fbd150e183a622b94afd9defd86..25320bf19c86ba62f44589f3f8ffef50f0e65cb3 100644 (file)
--- a/Documentation/networking/dmfe.txt
+++ b/Documentation/networking/dmfe.txt
@@ -1,3 +1,5 @@
+Note: This driver doesn't have a maintainer.
+
  Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver for Linux.
  
  This program is free software; you can redistribute it and/or
@@ -55,7 +57,6 @@ Test and make sure PCI latency is now correct for all cases.
  Authors:
  
  Sten Wang <sten_wang@davicom.com.tw >   : Original Author
-Tobias Ringstrom <tori@unhappy.mine.nu> : Current Maintainer
  
  Contributors:
  
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt

index db2a4067013c2e25deabc419ee3e9cab14c2b282..81546990f41ca16b2c1c022dac487aea92fbc20b 100644 (file)
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -992,7 +992,7 @@ bindv6only - BOOLEAN
                 TRUE: disable IPv4-mapped address feature
                 FALSE: enable IPv4-mapped address feature
  
-       Default: FALSE (as specified in RFC2553bis)
+       Default: FALSE (as specified in RFC3493)
  
  IPv6 Fragmentation:
  
diff --git a/Documentation/networking/scaling.txt b/Documentation/networking/scaling.txt

new file mode 100644 (file)

index 0000000..58fd741
--- /dev/null
+++ b/Documentation/networking/scaling.txt
@@ -0,0 +1,378 @@
+Scaling in the Linux Networking Stack
+
+
+Introduction
+============
+
+This document describes a set of complementary techniques in the Linux
+networking stack to increase parallelism and improve performance for
+multi-processor systems.
+
+The following technologies are described:
+
+  RSS: Receive Side Scaling
+  RPS: Receive Packet Steering
+  RFS: Receive Flow Steering
+  Accelerated Receive Flow Steering
+  XPS: Transmit Packet Steering
+
+
+RSS: Receive Side Scaling
+=========================
+
+Contemporary NICs support multiple receive and transmit descriptor queues
+(multi-queue). On reception, a NIC can send different packets to different
+queues to distribute processing among CPUs. The NIC distributes packets by
+applying a filter to each packet that assigns it to one of a small number
+of logical flows. Packets for each flow are steered to a separate receive
+queue, which in turn can be processed by separate CPUs. This mechanism is
+generally known as “Receive-side Scaling” (RSS). The goal of RSS and
+the other scaling techniques to increase performance uniformly.
+Multi-queue distribution can also be used for traffic prioritization, but
+that is not the focus of these techniques.
+
+The filter used in RSS is typically a hash function over the network
+and/or transport layer headers-- for example, a 4-tuple hash over
+IP addresses and TCP ports of a packet. The most common hardware
+implementation of RSS uses a 128-entry indirection table where each entry
+stores a queue number. The receive queue for a packet is determined
+by masking out the low order seven bits of the computed hash for the
+packet (usually a Toeplitz hash), taking this number as a key into the
+indirection table and reading the corresponding value.
+
+Some advanced NICs allow steering packets to queues based on
+programmable filters. For example, webserver bound TCP port 80 packets
+can be directed to their own receive queue. Such “n-tuple” filters can
+be configured from ethtool (--config-ntuple).
+
+==== RSS Configuration
+
+The driver for a multi-queue capable NIC typically provides a kernel
+module parameter for specifying the number of hardware queues to
+configure. In the bnx2x driver, for instance, this parameter is called
+num_queues. A typical RSS configuration would be to have one receive queue
+for each CPU if the device supports enough queues, or otherwise at least
+one for each memory domain, where a memory domain is a set of CPUs that
+share a particular memory level (L1, L2, NUMA node, etc.).
+
+The indirection table of an RSS device, which resolves a queue by masked
+hash, is usually programmed by the driver at initialization. The
+default mapping is to distribute the queues evenly in the table, but the
+indirection table can be retrieved and modified at runtime using ethtool
+commands (--show-rxfh-indir and --set-rxfh-indir). Modifying the
+indirection table could be done to give different queues different
+relative weights.
+
+== RSS IRQ Configuration
+
+Each receive queue has a separate IRQ associated with it. The NIC triggers
+this to notify a CPU when new packets arrive on the given queue. The
+signaling path for PCIe devices uses message signaled interrupts (MSI-X),
+that can route each interrupt to a particular CPU. The active mapping
+of queues to IRQs can be determined from /proc/interrupts. By default,
+an IRQ may be handled on any CPU. Because a non-negligible part of packet
+processing takes place in receive interrupt handling, it is advantageous
+to spread receive interrupts between CPUs. To manually adjust the IRQ
+affinity of each interrupt see Documentation/IRQ-affinity. Some systems
+will be running irqbalance, a daemon that dynamically optimizes IRQ
+assignments and as a result may override any manual settings.
+
+== Suggested Configuration
+
+RSS should be enabled when latency is a concern or whenever receive
+interrupt processing forms a bottleneck. Spreading load between CPUs
+decreases queue length. For low latency networking, the optimal setting
+is to allocate as many queues as there are CPUs in the system (or the
+NIC maximum, if lower). The most efficient high-rate configuration
+is likely the one with the smallest number of receive queues where no
+receive queue overflows due to a saturated CPU, because in default
+mode with interrupt coalescing enabled, the aggregate number of
+interrupts (and thus work) grows with each additional queue.
+
+Per-cpu load can be observed using the mpstat utility, but note that on
+processors with hyperthreading (HT), each hyperthread is represented as
+a separate CPU. For interrupt handling, HT has shown no benefit in
+initial tests, so limit the number of queues to the number of CPU cores
+in the system.
+
+
+RPS: Receive Packet Steering
+============================
+
+Receive Packet Steering (RPS) is logically a software implementation of
+RSS. Being in software, it is necessarily called later in the datapath.
+Whereas RSS selects the queue and hence CPU that will run the hardware
+interrupt handler, RPS selects the CPU to perform protocol processing
+above the interrupt handler. This is accomplished by placing the packet
+on the desired CPU’s backlog queue and waking up the CPU for processing.
+RPS has some advantages over RSS: 1) it can be used with any NIC,
+2) software filters can easily be added to hash over new protocols,
+3) it does not increase hardware device interrupt rate (although it does
+introduce inter-processor interrupts (IPIs)).
+
+RPS is called during bottom half of the receive interrupt handler, when
+a driver sends a packet up the network stack with netif_rx() or
+netif_receive_skb(). These call the get_rps_cpu() function, which
+selects the queue that should process a packet.
+
+The first step in determining the target CPU for RPS is to calculate a
+flow hash over the packet’s addresses or ports (2-tuple or 4-tuple hash
+depending on the protocol). This serves as a consistent hash of the
+associated flow of the packet. The hash is either provided by hardware
+or will be computed in the stack. Capable hardware can pass the hash in
+the receive descriptor for the packet; this would usually be the same
+hash used for RSS (e.g. computed Toeplitz hash). The hash is saved in
+skb->rx_hash and can be used elsewhere in the stack as a hash of the
+packet’s flow.
+
+Each receive hardware queue has an associated list of CPUs to which
+RPS may enqueue packets for processing. For each received packet,
+an index into the list is computed from the flow hash modulo the size
+of the list. The indexed CPU is the target for processing the packet,
+and the packet is queued to the tail of that CPU’s backlog queue. At
+the end of the bottom half routine, IPIs are sent to any CPUs for which
+packets have been queued to their backlog queue. The IPI wakes backlog
+processing on the remote CPU, and any queued packets are then processed
+up the networking stack.
+
+==== RPS Configuration
+
+RPS requires a kernel compiled with the CONFIG_RPS kconfig symbol (on
+by default for SMP). Even when compiled in, RPS remains disabled until
+explicitly configured. The list of CPUs to which RPS may forward traffic
+can be configured for each receive queue using a sysfs file entry:
+
+ /sys/class/net/<dev>/queues/rx-<n>/rps_cpus
+
+This file implements a bitmap of CPUs. RPS is disabled when it is zero
+(the default), in which case packets are processed on the interrupting
+CPU. Documentation/IRQ-affinity.txt explains how CPUs are assigned to
+the bitmap.
+
+== Suggested Configuration
+
+For a single queue device, a typical RPS configuration would be to set
+the rps_cpus to the CPUs in the same memory domain of the interrupting
+CPU. If NUMA locality is not an issue, this could also be all CPUs in
+the system. At high interrupt rate, it might be wise to exclude the
+interrupting CPU from the map since that already performs much work.
+
+For a multi-queue system, if RSS is configured so that a hardware
+receive queue is mapped to each CPU, then RPS is probably redundant
+and unnecessary. If there are fewer hardware queues than CPUs, then
+RPS might be beneficial if the rps_cpus for each queue are the ones that
+share the same memory domain as the interrupting CPU for that queue.
+
+
+RFS: Receive Flow Steering
+==========================
+
+While RPS steers packets solely based on hash, and thus generally
+provides good load distribution, it does not take into account
+application locality. This is accomplished by Receive Flow Steering
+(RFS). The goal of RFS is to increase datacache hitrate by steering
+kernel processing of packets to the CPU where the application thread
+consuming the packet is running. RFS relies on the same RPS mechanisms
+to enqueue packets onto the backlog of another CPU and to wake up that
+CPU.
+
+In RFS, packets are not forwarded directly by the value of their hash,
+but the hash is used as index into a flow lookup table. This table maps
+flows to the CPUs where those flows are being processed. The flow hash
+(see RPS section above) is used to calculate the index into this table.
+The CPU recorded in each entry is the one which last processed the flow.
+If an entry does not hold a valid CPU, then packets mapped to that entry
+are steered using plain RPS. Multiple table entries may point to the
+same CPU. Indeed, with many flows and few CPUs, it is very likely that
+a single application thread handles flows with many different flow hashes.
+
+rps_sock_table is a global flow table that contains the *desired* CPU for
+flows: the CPU that is currently processing the flow in userspace. Each
+table value is a CPU index that is updated during calls to recvmsg and
+sendmsg (specifically, inet_recvmsg(), inet_sendmsg(), inet_sendpage()
+and tcp_splice_read()).
+
+When the scheduler moves a thread to a new CPU while it has outstanding
+receive packets on the old CPU, packets may arrive out of order. To
+avoid this, RFS uses a second flow table to track outstanding packets
+for each flow: rps_dev_flow_table is a table specific to each hardware
+receive queue of each device. Each table value stores a CPU index and a
+counter. The CPU index represents the *current* CPU onto which packets
+for this flow are enqueued for further kernel processing. Ideally, kernel
+and userspace processing occur on the same CPU, and hence the CPU index
+in both tables is identical. This is likely false if the scheduler has
+recently migrated a userspace thread while the kernel still has packets
+enqueued for kernel processing on the old CPU.
+
+The counter in rps_dev_flow_table values records the length of the current
+CPU's backlog when a packet in this flow was last enqueued. Each backlog
+queue has a head counter that is incremented on dequeue. A tail counter
+is computed as head counter + queue length. In other words, the counter
+in rps_dev_flow_table[i] records the last element in flow i that has
+been enqueued onto the currently designated CPU for flow i (of course,
+entry i is actually selected by hash and multiple flows may hash to the
+same entry i).
+
+And now the trick for avoiding out of order packets: when selecting the
+CPU for packet processing (from get_rps_cpu()) the rps_sock_flow table
+and the rps_dev_flow table of the queue that the packet was received on
+are compared. If the desired CPU for the flow (found in the
+rps_sock_flow table) matches the current CPU (found in the rps_dev_flow
+table), the packet is enqueued onto that CPU’s backlog. If they differ,
+the current CPU is updated to match the desired CPU if one of the
+following is true:
+
+- The current CPU's queue head counter >= the recorded tail counter
+  value in rps_dev_flow[i]
+- The current CPU is unset (equal to NR_CPUS)
+- The current CPU is offline
+
+After this check, the packet is sent to the (possibly updated) current
+CPU. These rules aim to ensure that a flow only moves to a new CPU when
+there are no packets outstanding on the old CPU, as the outstanding
+packets could arrive later than those about to be processed on the new
+CPU.
+
+==== RFS Configuration
+
+RFS is only available if the kconfig symbol CONFIG_RFS is enabled (on
+by default for SMP). The functionality remains disabled until explicitly
+configured. The number of entries in the global flow table is set through:
+
+ /proc/sys/net/core/rps_sock_flow_entries
+
+The number of entries in the per-queue flow table are set through:
+
+ /sys/class/net/<dev>/queues/tx-<n>/rps_flow_cnt
+
+== Suggested Configuration
+
+Both of these need to be set before RFS is enabled for a receive queue.
+Values for both are rounded up to the nearest power of two. The
+suggested flow count depends on the expected number of active connections
+at any given time, which may be significantly less than the number of open
+connections. We have found that a value of 32768 for rps_sock_flow_entries
+works fairly well on a moderately loaded server.
+
+For a single queue device, the rps_flow_cnt value for the single queue
+would normally be configured to the same value as rps_sock_flow_entries.
+For a multi-queue device, the rps_flow_cnt for each queue might be
+configured as rps_sock_flow_entries / N, where N is the number of
+queues. So for instance, if rps_flow_entries is set to 32768 and there
+are 16 configured receive queues, rps_flow_cnt for each queue might be
+configured as 2048.
+
+
+Accelerated RFS
+===============
+
+Accelerated RFS is to RFS what RSS is to RPS: a hardware-accelerated load
+balancing mechanism that uses soft state to steer flows based on where
+the application thread consuming the packets of each flow is running.
+Accelerated RFS should perform better than RFS since packets are sent
+directly to a CPU local to the thread consuming the data. The target CPU
+will either be the same CPU where the application runs, or at least a CPU
+which is local to the application thread’s CPU in the cache hierarchy.
+
+To enable accelerated RFS, the networking stack calls the
+ndo_rx_flow_steer driver function to communicate the desired hardware
+queue for packets matching a particular flow. The network stack
+automatically calls this function every time a flow entry in
+rps_dev_flow_table is updated. The driver in turn uses a device specific
+method to program the NIC to steer the packets.
+
+The hardware queue for a flow is derived from the CPU recorded in
+rps_dev_flow_table. The stack consults a CPU to hardware queue map which
+is maintained by the NIC driver. This is an auto-generated reverse map of
+the IRQ affinity table shown by /proc/interrupts. Drivers can use
+functions in the cpu_rmap (“CPU affinity reverse map”) kernel library
+to populate the map. For each CPU, the corresponding queue in the map is
+set to be one whose processing CPU is closest in cache locality.
+
+==== Accelerated RFS Configuration
+
+Accelerated RFS is only available if the kernel is compiled with
+CONFIG_RFS_ACCEL and support is provided by the NIC device and driver.
+It also requires that ntuple filtering is enabled via ethtool. The map
+of CPU to queues is automatically deduced from the IRQ affinities
+configured for each receive queue by the driver, so no additional
+configuration should be necessary.
+
+== Suggested Configuration
+
+This technique should be enabled whenever one wants to use RFS and the
+NIC supports hardware acceleration.
+
+XPS: Transmit Packet Steering
+=============================
+
+Transmit Packet Steering is a mechanism for intelligently selecting
+which transmit queue to use when transmitting a packet on a multi-queue
+device. To accomplish this, a mapping from CPU to hardware queue(s) is
+recorded. The goal of this mapping is usually to assign queues
+exclusively to a subset of CPUs, where the transmit completions for
+these queues are processed on a CPU within this set. This choice
+provides two benefits. First, contention on the device queue lock is
+significantly reduced since fewer CPUs contend for the same queue
+(contention can be eliminated completely if each CPU has its own
+transmit queue). Secondly, cache miss rate on transmit completion is
+reduced, in particular for data cache lines that hold the sk_buff
+structures.
+
+XPS is configured per transmit queue by setting a bitmap of CPUs that
+may use that queue to transmit. The reverse mapping, from CPUs to
+transmit queues, is computed and maintained for each network device.
+When transmitting the first packet in a flow, the function
+get_xps_queue() is called to select a queue. This function uses the ID
+of the running CPU as a key into the CPU-to-queue lookup table. If the
+ID matches a single queue, that is used for transmission. If multiple
+queues match, one is selected by using the flow hash to compute an index
+into the set.
+
+The queue chosen for transmitting a particular flow is saved in the
+corresponding socket structure for the flow (e.g. a TCP connection).
+This transmit queue is used for subsequent packets sent on the flow to
+prevent out of order (ooo) packets. The choice also amortizes the cost
+of calling get_xps_queues() over all packets in the flow. To avoid
+ooo packets, the queue for a flow can subsequently only be changed if
+skb->ooo_okay is set for a packet in the flow. This flag indicates that
+there are no outstanding packets in the flow, so the transmit queue can
+change without the risk of generating out of order packets. The
+transport layer is responsible for setting ooo_okay appropriately. TCP,
+for instance, sets the flag when all data for a connection has been
+acknowledged.
+
+==== XPS Configuration
+
+XPS is only available if the kconfig symbol CONFIG_XPS is enabled (on by
+default for SMP). The functionality remains disabled until explicitly
+configured. To enable XPS, the bitmap of CPUs that may use a transmit
+queue is configured using the sysfs file entry:
+
+/sys/class/net/<dev>/queues/tx-<n>/xps_cpus
+
+== Suggested Configuration
+
+For a network device with a single transmission queue, XPS configuration
+has no effect, since there is no choice in this case. In a multi-queue
+system, XPS is preferably configured so that each CPU maps onto one queue.
+If there are as many queues as there are CPUs in the system, then each
+queue can also map onto one CPU, resulting in exclusive pairings that
+experience no contention. If there are fewer queues than CPUs, then the
+best CPUs to share a given queue are probably those that share the cache
+with the CPU that processes transmit completions for that queue
+(transmit interrupts).
+
+
+Further Information
+===================
+RPS and RFS were introduced in kernel 2.6.35. XPS was incorporated into
+2.6.38. Original patches were submitted by Tom Herbert
+(therbert@google.com)
+
+Accelerated RFS was introduced in 2.6.35. Original patches were
+submitted by Ben Hutchings (bhutchings@solarflare.com)
+
+Authors:
+Tom Herbert (therbert@google.com)
+Willem de Bruijn (willemb@google.com)
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt

index 4ce5450ab6e833cef372ef2755993353cae50935..6066e3a6b9a98c0f499059d1342444afb43be781 100644 (file)
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -431,8 +431,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
  
    void pm_runtime_irq_safe(struct device *dev);
      - set the power.irq_safe flag for the device, causing the runtime-PM
-      suspend and resume callbacks (but not the idle callback) to be invoked
-      with interrupts disabled
+      callbacks to be invoked with interrupts off
  
    void pm_runtime_mark_last_busy(struct device *dev);
      - set the power.last_busy field to the current time
diff --git a/Documentation/ramoops.txt b/Documentation/ramoops.txt

new file mode 100644 (file)

index 0000000..8fb1ba7
--- /dev/null
+++ b/Documentation/ramoops.txt
@@ -0,0 +1,76 @@
+Ramoops oops/panic logger
+=========================
+
+Sergiu Iordache <sergiu@chromium.org>
+
+Updated: 8 August 2011
+
+0. Introduction
+
+Ramoops is an oops/panic logger that writes its logs to RAM before the system
+crashes. It works by logging oopses and panics in a circular buffer. Ramoops
+needs a system with persistent RAM so that the content of that area can
+survive after a restart.
+
+1. Ramoops concepts
+
+Ramoops uses a predefined memory area to store the dump. The start and size of
+the memory area are set using two variables:
+  * "mem_address" for the start
+  * "mem_size" for the size. The memory size will be rounded down to a
+  power of two.
+
+The memory area is divided into "record_size" chunks (also rounded down to
+power of two) and each oops/panic writes a "record_size" chunk of
+information.
+
+Dumping both oopses and panics can be done by setting 1 in the "dump_oops"
+variable while setting 0 in that variable dumps only the panics.
+
+The module uses a counter to record multiple dumps but the counter gets reset
+on restart (i.e. new dumps after the restart will overwrite old ones).
+
+2. Setting the parameters
+
+Setting the ramoops parameters can be done in 2 different manners:
+ 1. Use the module parameters (which have the names of the variables described
+ as before).
+ 2. Use a platform device and set the platform data. The parameters can then
+ be set through that platform data. An example of doing that is:
+
+#include <linux/ramoops.h>
+[...]
+
+static struct ramoops_platform_data ramoops_data = {
+        .mem_size               = <...>,
+        .mem_address            = <...>,
+        .record_size            = <...>,
+        .dump_oops              = <...>,
+};
+
+static struct platform_device ramoops_dev = {
+        .name = "ramoops",
+        .dev = {
+                .platform_data = &ramoops_data,
+        },
+};
+
+[... inside a function ...]
+int ret;
+
+ret = platform_device_register(&ramoops_dev);
+if (ret) {
+       printk(KERN_ERR "unable to register platform device\n");
+       return ret;
+}
+
+3. Dump format
+
+The data dump begins with a header, currently defined as "====" followed by a
+timestamp and a new line. The dump then continues with the actual data.
+
+4. Reading the data
+
+The dump data can be read from memory (through /dev/mem or other means).
+Getting the module parameters, which are needed in order to parse the data, can
+be done through /sys/module/ramoops/parameters/* .
diff --git a/Documentation/virtual/00-INDEX b/Documentation/virtual/00-INDEX

index fe0251c4cfb7dcd589163d1283e516153d066cbe..8e601991d91c6b4e0247cd9b7a5735f3df7661b3 100644 (file)
--- a/Documentation/virtual/00-INDEX
+++ b/Documentation/virtual/00-INDEX
@@ -8,3 +8,6 @@ lguest/
         - Extremely simple hypervisor for experimental/educational use.
  uml/
         - User Mode Linux, builds/runs Linux kernel as a userspace program.
+virtio.txt
+       - Text version of draft virtio spec.
+          See http://ozlabs.org/~rusty/virtio-spec
diff --git a/Documentation/virtual/lguest/lguest.c b/Documentation/virtual/lguest/lguest.c

index 043bd7df31399a6a02c770393eb6d7f083e331e3..d928c134dee66953f0b7f163a8e3633ac95e3747 100644 (file)
--- a/Documentation/virtual/lguest/lguest.c
+++ b/Documentation/virtual/lguest/lguest.c
@@ -1996,6 +1996,9 @@ int main(int argc, char *argv[])
         /* We use a simple helper to copy the arguments separated by spaces. */
         concat((char *)(boot + 1), argv+optind+2);
  
+       /* Set kernel alignment to 16M (CONFIG_PHYSICAL_ALIGN) */
+       boot->hdr.kernel_alignment = 0x1000000;
+
         /* Boot protocol version: 2.07 supports the fields for lguest. */
         boot->hdr.version = 0x207;
  
diff --git a/Documentation/virtual/virtio-spec.txt b/Documentation/virtual/virtio-spec.txt

new file mode 100644 (file)

index 0000000..a350ae1
--- /dev/null
+++ b/Documentation/virtual/virtio-spec.txt
@@ -0,0 +1,2200 @@
+[Generated file: see http://ozlabs.org/~rusty/virtio-spec/]
+Virtio PCI Card Specification
+v0.9.1 DRAFT
+-
+
+Rusty Russell <rusty@rustcorp.com.au>IBM Corporation (Editor)
+
+2011 August 1.
+
+Purpose and Description
+
+This document describes the specifications of the “virtio” family
+of PCI[LaTeX Command: nomenclature] devices. These are devices
+are found in virtual environments[LaTeX Command: nomenclature],
+yet by design they are not all that different from physical PCI
+devices, and this document treats them as such. This allows the
+guest to use standard PCI drivers and discovery mechanisms.
+
+The purpose of virtio and this specification is that virtual
+environments and guests should have a straightforward, efficient,
+standard and extensible mechanism for virtual devices, rather
+than boutique per-environment or per-OS mechanisms.
+
+  Straightforward: Virtio PCI devices use normal PCI mechanisms
+  of interrupts and DMA which should be familiar to any device
+  driver author. There is no exotic page-flipping or COW
+  mechanism: it's just a PCI device.[footnote:
+This lack of page-sharing implies that the implementation of the
+device (e.g. the hypervisor or host) needs full access to the
+guest memory. Communication with untrusted parties (i.e.
+inter-guest communication) requires copying.
+]
+
+  Efficient: Virtio PCI devices consist of rings of descriptors
+  for input and output, which are neatly separated to avoid cache
+  effects from both guest and device writing to the same cache
+  lines.
+
+  Standard: Virtio PCI makes no assumptions about the environment
+  in which it operates, beyond supporting PCI. In fact the virtio
+  devices specified in the appendices do not require PCI at all:
+  they have been implemented on non-PCI buses.[footnote:
+The Linux implementation further separates the PCI virtio code
+from the specific virtio drivers: these drivers are shared with
+the non-PCI implementations (currently lguest and S/390).
+]
+
+  Extensible: Virtio PCI devices contain feature bits which are
+  acknowledged by the guest operating system during device setup.
+  This allows forwards and backwards compatibility: the device
+  offers all the features it knows about, and the driver
+  acknowledges those it understands and wishes to use.
+
+  Virtqueues
+
+The mechanism for bulk data transport on virtio PCI devices is
+pretentiously called a virtqueue. Each device can have zero or
+more virtqueues: for example, the network device has one for
+transmit and one for receive.
+
+Each virtqueue occupies two or more physically-contiguous pages
+(defined, for the purposes of this specification, as 4096 bytes),
+and consists of three parts:
+
+
++-------------------+-----------------------------------+-----------+
+| Descriptor Table  |   Available Ring     (padding)    | Used Ring |
++-------------------+-----------------------------------+-----------+
+
+
+When the driver wants to send buffers to the device, it puts them
+in one or more slots in the descriptor table, and writes the
+descriptor indices into the available ring. It then notifies the
+device. When the device has finished with the buffers, it writes
+the descriptors into the used ring, and sends an interrupt.
+
+Specification
+
+  PCI Discovery
+
+Any PCI device with Vendor ID 0x1AF4, and Device ID 0x1000
+through 0x103F inclusive is a virtio device[footnote:
+The actual value within this range is ignored
+]. The device must also have a Revision ID of 0 to match this
+specification.
+
+The Subsystem Device ID indicates which virtio device is
+supported by the device. The Subsystem Vendor ID should reflect
+the PCI Vendor ID of the environment (it's currently only used
+for informational purposes by the guest).
+
+
++----------------------+--------------------+---------------+
+| Subsystem Device ID  |   Virtio Device    | Specification |
++----------------------+--------------------+---------------+
++----------------------+--------------------+---------------+
+|          1           |   network card     |  Appendix C   |
++----------------------+--------------------+---------------+
+|          2           |   block device     |  Appendix D   |
++----------------------+--------------------+---------------+
+|          3           |      console       |  Appendix E   |
++----------------------+--------------------+---------------+
+|          4           |  entropy source    |  Appendix F   |
++----------------------+--------------------+---------------+
+|          5           | memory ballooning  |  Appendix G   |
++----------------------+--------------------+---------------+
+|          6           |     ioMemory       |       -       |
++----------------------+--------------------+---------------+
+|          9           |   9P transport     |       -       |
++----------------------+--------------------+---------------+
+
+
+  Device Configuration
+
+To configure the device, we use the first I/O region of the PCI
+device. This contains a virtio header followed by a
+device-specific region.
+
+There may be different widths of accesses to the I/O region; the “
+natural” access method for each field in the virtio header must
+be used (i.e. 32-bit accesses for 32-bit fields, etc), but the
+device-specific region can be accessed using any width accesses,
+and should obtain the same results.
+
+Note that this is possible because while the virtio header is PCI
+(i.e. little) endian, the device-specific region is encoded in
+the native endian of the guest (where such distinction is
+applicable).
+
+  Device Initialization Sequence
+
+We start with an overview of device initialization, then expand
+on the details of the device and how each step is preformed.
+
+  Reset the device. This is not required on initial start up.
+
+  The ACKNOWLEDGE status bit is set: we have noticed the device.
+
+  The DRIVER status bit is set: we know how to drive the device.
+
+  Device-specific setup, including reading the Device Feature
+  Bits, discovery of virtqueues for the device, optional MSI-X
+  setup, and reading and possibly writing the virtio
+  configuration space.
+
+  The subset of Device Feature Bits understood by the driver is
+  written to the device.
+
+  The DRIVER_OK status bit is set.
+
+  The device can now be used (ie. buffers added to the
+  virtqueues)[footnote:
+Historically, drivers have used the device before steps 5 and 6.
+This is only allowed if the driver does not use any features
+which would alter this early use of the device.
+]
+
+If any of these steps go irrecoverably wrong, the guest should
+set the FAILED status bit to indicate that it has given up on the
+device (it can reset the device later to restart if desired).
+
+We now cover the fields required for general setup in detail.
+
+  Virtio Header
+
+The virtio header looks as follows:
+
+
++------------++---------------------+---------------------+----------+--------+---------+---------+---------+--------+
+| Bits       || 32                  | 32                  | 32       | 16     | 16      | 16      | 8       | 8      |
++------------++---------------------+---------------------+----------+--------+---------+---------+---------+--------+
+| Read/Write || R                   | R+W                 | R+W      | R      | R+W     | R+W     | R+W     | R      |
++------------++---------------------+---------------------+----------+--------+---------+---------+---------+--------+
+| Purpose    || Device              | Guest               | Queue    | Queue  | Queue   | Queue   | Device  | ISR    |
+|            || Features bits 0:31  | Features bits 0:31  | Address  | Size   | Select  | Notify  | Status  | Status |
++------------++---------------------+---------------------+----------+--------+---------+---------+---------+--------+
+
+
+If MSI-X is enabled for the device, two additional fields
+immediately follow this header:
+
+
++------------++----------------+--------+
+| Bits       || 16             | 16     |
+              +----------------+--------+
++------------++----------------+--------+
+| Read/Write || R+W            | R+W    |
++------------++----------------+--------+
+| Purpose    || Configuration  | Queue  |
+| (MSI-X)    || Vector         | Vector |
++------------++----------------+--------+
+
+
+Finally, if feature bits (VIRTIO_F_FEATURES_HI) this is
+immediately followed by two additional fields:
+
+
++------------++----------------------+----------------------
+| Bits       || 32                   | 32
++------------++----------------------+----------------------
+| Read/Write || R                    | R+W
++------------++----------------------+----------------------
+| Purpose    || Device               | Guest
+|            || Features bits 32:63  | Features bits 32:63
++------------++----------------------+----------------------
+
+
+Immediately following these general headers, there may be
+device-specific headers:
+
+
++------------++--------------------+
+| Bits       || Device Specific    |
+              +--------------------+
++------------++--------------------+
+| Read/Write || Device Specific    |
++------------++--------------------+
+| Purpose    || Device Specific... |
+|            ||                    |
++------------++--------------------+
+
+
+  Device Status
+
+The Device Status field is updated by the guest to indicate its
+progress. This provides a simple low-level diagnostic: it's most
+useful to imagine them hooked up to traffic lights on the console
+indicating the status of each device.
+
+The device can be reset by writing a 0 to this field, otherwise
+at least one bit should be set:
+
+  ACKNOWLEDGE (1) Indicates that the guest OS has found the
+  device and recognized it as a valid virtio device.
+
+  DRIVER (2) Indicates that the guest OS knows how to drive the
+  device. Under Linux, drivers can be loadable modules so there
+  may be a significant (or infinite) delay before setting this
+  bit.
+
+  DRIVER_OK (3) Indicates that the driver is set up and ready to
+  drive the device.
+
+  FAILED (8) Indicates that something went wrong in the guest,
+  and it has given up on the device. This could be an internal
+  error, or the driver didn't like the device for some reason, or
+  even a fatal error during device operation. The device must be
+  reset before attempting to re-initialize.
+
+  Feature Bits
+
+The least significant 31 bits of the first configuration field
+indicates the features that the device supports (the high bit is
+reserved, and will be used to indicate the presence of future
+feature bits elsewhere). If more than 31 feature bits are
+supported, the device indicates so by setting feature bit 31 (see
+[cha:Reserved-Feature-Bits]). The bits are allocated as follows:
+
+  0 to 23 Feature bits for the specific device type
+
+  24 to 40 Feature bits reserved for extensions to the queue and
+  feature negotiation mechanisms
+
+  41 to 63 Feature bits reserved for future extensions
+
+For example, feature bit 0 for a network device (i.e. Subsystem
+Device ID 1) indicates that the device supports checksumming of
+packets.
+
+The feature bits are negotiated: the device lists all the
+features it understands in the Device Features field, and the
+guest writes the subset that it understands into the Guest
+Features field. The only way to renegotiate is to reset the
+device.
+
+In particular, new fields in the device configuration header are
+indicated by offering a feature bit, so the guest can check
+before accessing that part of the configuration space.
+
+This allows for forwards and backwards compatibility: if the
+device is enhanced with a new feature bit, older guests will not
+write that feature bit back to the Guest Features field and it
+can go into backwards compatibility mode. Similarly, if a guest
+is enhanced with a feature that the device doesn't support, it
+will not see that feature bit in the Device Features field and
+can go into backwards compatibility mode (or, for poor
+implementations, set the FAILED Device Status bit).
+
+Access to feature bits 32 to 63 is enabled by Guest by setting
+feature bit 31. If this bit is unset, Device must assume that all
+feature bits > 31 are unset.
+
+  Configuration/Queue Vectors
+
+When MSI-X capability is present and enabled in the device
+(through standard PCI configuration space) 4 bytes at byte offset
+20 are used to map configuration change and queue interrupts to
+MSI-X vectors. In this case, the ISR Status field is unused, and
+device specific configuration starts at byte offset 24 in virtio
+header structure. When MSI-X capability is not enabled, device
+specific configuration starts at byte offset 20 in virtio header.
+
+Writing a valid MSI-X Table entry number, 0 to 0x7FF, to one of
+Configuration/Queue Vector registers, maps interrupts triggered
+by the configuration change/selected queue events respectively to
+the corresponding MSI-X vector. To disable interrupts for a
+specific event type, unmap it by writing a special NO_VECTOR
+value:
+
+/* Vector value used to disable MSI for queue */
+
+#define VIRTIO_MSI_NO_VECTOR            0xffff
+
+Reading these registers returns vector mapped to a given event,
+or NO_VECTOR if unmapped. All queue and configuration change
+events are unmapped by default.
+
+Note that mapping an event to vector might require allocating
+internal device resources, and might fail. Devices report such
+failures by returning the NO_VECTOR value when the relevant
+Vector field is read. After mapping an event to vector, the
+driver must verify success by reading the Vector field value: on
+success, the previously written value is returned, and on
+failure, NO_VECTOR is returned. If a mapping failure is detected,
+the driver can retry mapping with fewervectors, or disable MSI-X.
+
+  Virtqueue Configuration
+
+As a device can have zero or more virtqueues for bulk data
+transport (for example, the network driver has two), the driver
+needs to configure them as part of the device-specific
+configuration.
+
+This is done as follows, for each virtqueue a device has:
+
+  Write the virtqueue index (first queue is 0) to the Queue
+  Select field.
+
+  Read the virtqueue size from the Queue Size field, which is
+  always a power of 2. This controls how big the virtqueue is
+  (see below). If this field is 0, the virtqueue does not exist.
+
+  Allocate and zero virtqueue in contiguous physical memory, on a
+  4096 byte alignment. Write the physical address, divided by
+  4096 to the Queue Address field.[footnote:
+The 4096 is based on the x86 page size, but it's also large
+enough to ensure that the separate parts of the virtqueue are on
+separate cache lines.
+]
+
+  Optionally, if MSI-X capability is present and enabled on the
+  device, select a vector to use to request interrupts triggered
+  by virtqueue events. Write the MSI-X Table entry number
+  corresponding to this vector in Queue Vector field. Read the
+  Queue Vector field: on success, previously written value is
+  returned; on failure, NO_VECTOR value is returned.
+
+The Queue Size field controls the total number of bytes required
+for the virtqueue according to the following formula:
+
+#define ALIGN(x) (((x) + 4095) & ~4095)
+
+static inline unsigned vring_size(unsigned int qsz)
+
+{
+
+     return ALIGN(sizeof(struct vring_desc)*qsz + sizeof(u16)*(2
++ qsz))
+
+          + ALIGN(sizeof(struct vring_used_elem)*qsz);
+
+}
+
+This currently wastes some space with padding, but also allows
+future extensions. The virtqueue layout structure looks like this
+(qsz is the Queue Size field, which is a variable, so this code
+won't compile):
+
+struct vring {
+
+    /* The actual descriptors (16 bytes each) */
+
+    struct vring_desc desc[qsz];
+
+
+
+    /* A ring of available descriptor heads with free-running
+index. */
+
+    struct vring_avail avail;
+
+
+
+    // Padding to the next 4096 boundary.
+
+    char pad[];
+
+
+
+    // A ring of used descriptor heads with free-running index.
+
+    struct vring_used used;
+
+};
+
+  A Note on Virtqueue Endianness
+
+Note that the endian of these fields and everything else in the
+virtqueue is the native endian of the guest, not little-endian as
+PCI normally is. This makes for simpler guest code, and it is
+assumed that the host already has to be deeply aware of the guest
+endian so such an “endian-aware” device is not a significant
+issue.
+
+  Descriptor Table
+
+The descriptor table refers to the buffers the guest is using for
+the device. The addresses are physical addresses, and the buffers
+can be chained via the next field. Each descriptor describes a
+buffer which is read-only or write-only, but a chain of
+descriptors can contain both read-only and write-only buffers.
+
+No descriptor chain may be more than 2^32 bytes long in total.struct vring_desc {
+
+    /* Address (guest-physical). */
+
+    u64 addr;
+
+    /* Length. */
+
+    u32 len;
+
+/* This marks a buffer as continuing via the next field. */
+
+#define VRING_DESC_F_NEXT   1
+
+/* This marks a buffer as write-only (otherwise read-only). */
+
+#define VRING_DESC_F_WRITE     2
+
+/* This means the buffer contains a list of buffer descriptors.
+*/
+
+#define VRING_DESC_F_INDIRECT   4
+
+    /* The flags as indicated above. */
+
+    u16 flags;
+
+    /* Next field if flags & NEXT */
+
+    u16 next;
+
+};
+
+The number of descriptors in the table is specified by the Queue
+Size field for this virtqueue.
+
+  <sub:Indirect-Descriptors>Indirect Descriptors
+
+Some devices benefit by concurrently dispatching a large number
+of large requests. The VIRTIO_RING_F_INDIRECT_DESC feature can be
+used to allow this (see [cha:Reserved-Feature-Bits]). To increase
+ring capacity it is possible to store a table of indirect
+descriptors anywhere in memory, and insert a descriptor in main
+virtqueue (with flags&INDIRECT on) that refers to memory buffer
+containing this indirect descriptor table; fields addr and len
+refer to the indirect table address and length in bytes,
+respectively. The indirect table layout structure looks like this
+(len is the length of the descriptor that refers to this table,
+which is a variable, so this code won't compile):
+
+struct indirect_descriptor_table {
+
+    /* The actual descriptors (16 bytes each) */
+
+    struct vring_desc desc[len / 16];
+
+};
+
+The first indirect descriptor is located at start of the indirect
+descriptor table (index 0), additional indirect descriptors are
+chained by next field. An indirect descriptor without next field
+(with flags&NEXT off) signals the end of the indirect descriptor
+table, and transfers control back to the main virtqueue. An
+indirect descriptor can not refer to another indirect descriptor
+table (flags&INDIRECT must be off). A single indirect descriptor
+table can include both read-only and write-only descriptors;
+write-only flag (flags&WRITE) in the descriptor that refers to it
+is ignored.
+
+  Available Ring
+
+The available ring refers to what descriptors we are offering the
+device: it refers to the head of a descriptor chain. The “flags”
+field is currently 0 or 1: 1 indicating that we do not need an
+interrupt when the device consumes a descriptor from the
+available ring. Alternatively, the guest can ask the device to
+delay interrupts until an entry with an index specified by the “
+used_event” field is written in the used ring (equivalently,
+until the idx field in the used ring will reach the value
+used_event + 1). The method employed by the device is controlled
+by the VIRTIO_RING_F_EVENT_IDX feature bit (see [cha:Reserved-Feature-Bits]
+). This interrupt suppression is merely an optimization; it may
+not suppress interrupts entirely.
+
+The “idx” field indicates where we would put the next descriptor
+entry (modulo the ring size). This starts at 0, and increases.
+
+struct vring_avail {
+
+#define VRING_AVAIL_F_NO_INTERRUPT      1
+
+   u16 flags;
+
+   u16 idx;
+
+   u16 ring[qsz]; /* qsz is the Queue Size field read from device
+*/
+
+   u16 used_event;
+
+};
+
+  Used Ring
+
+The used ring is where the device returns buffers once it is done
+with them. The flags field can be used by the device to hint that
+no notification is necessary when the guest adds to the available
+ring. Alternatively, the “avail_event” field can be used by the
+device to hint that no notification is necessary until an entry
+with an index specified by the “avail_event” is written in the
+available ring (equivalently, until the idx field in the
+available ring will reach the value avail_event + 1). The method
+employed by the device is controlled by the guest through the
+VIRTIO_RING_F_EVENT_IDX feature bit (see [cha:Reserved-Feature-Bits]
+). [footnote:
+These fields are kept here because this is the only part of the
+virtqueue written by the device
+].
+
+Each entry in the ring is a pair: the head entry of the
+descriptor chain describing the buffer (this matches an entry
+placed in the available ring by the guest earlier), and the total
+of bytes written into the buffer. The latter is extremely useful
+for guests using untrusted buffers: if you do not know exactly
+how much has been written by the device, you usually have to zero
+the buffer to ensure no data leakage occurs.
+
+/* u32 is used here for ids for padding reasons. */
+
+struct vring_used_elem {
+
+    /* Index of start of used descriptor chain. */
+
+    u32 id;
+
+    /* Total length of the descriptor chain which was used
+(written to) */
+
+    u32 len;
+
+};
+
+
+
+struct vring_used {
+
+#define VRING_USED_F_NO_NOTIFY  1
+
+    u16 flags;
+
+    u16 idx;
+
+    struct vring_used_elem ring[qsz];
+
+    u16 avail_event;
+
+};
+
+  Helpers for Managing Virtqueues
+
+The Linux Kernel Source code contains the definitions above and
+helper routines in a more usable form, in
+include/linux/virtio_ring.h. This was explicitly licensed by IBM
+and Red Hat under the (3-clause) BSD license so that it can be
+freely used by all other projects, and is reproduced (with slight
+variation to remove Linux assumptions) in Appendix A.
+
+  Device Operation
+
+There are two parts to device operation: supplying new buffers to
+the device, and processing used buffers from the device. As an
+example, the virtio network device has two virtqueues: the
+transmit virtqueue and the receive virtqueue. The driver adds
+outgoing (read-only) packets to the transmit virtqueue, and then
+frees them after they are used. Similarly, incoming (write-only)
+buffers are added to the receive virtqueue, and processed after
+they are used.
+
+  Supplying Buffers to The Device
+
+Actual transfer of buffers from the guest OS to the device
+operates as follows:
+
+  Place the buffer(s) into free descriptor(s).
+
+  If there are no free descriptors, the guest may choose to
+    notify the device even if notifications are suppressed (to
+    reduce latency).[footnote:
+The Linux drivers do this only for read-only buffers: for
+write-only buffers, it is assumed that the driver is merely
+trying to keep the receive buffer ring full, and no notification
+of this expected condition is necessary.
+]
+
+  Place the id of the buffer in the next ring entry of the
+  available ring.
+
+  The steps (1) and (2) may be performed repeatedly if batching
+  is possible.
+
+  A memory barrier should be executed to ensure the device sees
+  the updated descriptor table and available ring before the next
+  step.
+
+  The available “idx” field should be increased by the number of
+  entries added to the available ring.
+
+  A memory barrier should be executed to ensure that we update
+  the idx field before checking for notification suppression.
+
+  If notifications are not suppressed, the device should be
+  notified of the new buffers.
+
+Note that the above code does not take precautions against the
+available ring buffer wrapping around: this is not possible since
+the ring buffer is the same size as the descriptor table, so step
+(1) will prevent such a condition.
+
+In addition, the maximum queue size is 32768 (it must be a power
+of 2 which fits in 16 bits), so the 16-bit “idx” value can always
+distinguish between a full and empty buffer.
+
+Here is a description of each stage in more detail.
+
+  Placing Buffers Into The Descriptor Table
+
+A buffer consists of zero or more read-only physically-contiguous
+elements followed by zero or more physically-contiguous
+write-only elements (it must have at least one element). This
+algorithm maps it into the descriptor table:
+
+  for each buffer element, b:
+
+  Get the next free descriptor table entry, d
+
+  Set d.addr to the physical address of the start of b
+
+  Set d.len to the length of b.
+
+  If b is write-only, set d.flags to VRING_DESC_F_WRITE,
+    otherwise 0.
+
+  If there is a buffer element after this:
+
+    Set d.next to the index of the next free descriptor element.
+
+    Set the VRING_DESC_F_NEXT bit in d.flags.
+
+In practice, the d.next fields are usually used to chain free
+descriptors, and a separate count kept to check there are enough
+free descriptors before beginning the mappings.
+
+  Updating The Available Ring
+
+The head of the buffer we mapped is the first d in the algorithm
+above. A naive implementation would do the following:
+
+avail->ring[avail->idx % qsz] = head;
+
+However, in general we can add many descriptors before we update
+the “idx” field (at which point they become visible to the
+device), so we keep a counter of how many we've added:
+
+avail->ring[(avail->idx + added++) % qsz] = head;
+
+  Updating The Index Field
+
+Once the idx field of the virtqueue is updated, the device will
+be able to access the descriptor entries we've created and the
+memory they refer to. This is why a memory barrier is generally
+used before the idx update, to ensure it sees the most up-to-date
+copy.
+
+The idx field always increments, and we let it wrap naturally at
+65536:
+
+avail->idx += added;
+
+  <sub:Notifying-The-Device>Notifying The Device
+
+Device notification occurs by writing the 16-bit virtqueue index
+of this virtqueue to the Queue Notify field of the virtio header
+in the first I/O region of the PCI device. This can be expensive,
+however, so the device can suppress such notifications if it
+doesn't need them. We have to be careful to expose the new idx
+value before checking the suppression flag: it's OK to notify
+gratuitously, but not to omit a required notification. So again,
+we use a memory barrier here before reading the flags or the
+avail_event field.
+
+If the VIRTIO_F_RING_EVENT_IDX feature is not negotiated, and if
+the VRING_USED_F_NOTIFY flag is not set, we go ahead and write to
+the PCI configuration space.
+
+If the VIRTIO_F_RING_EVENT_IDX feature is negotiated, we read the
+avail_event field in the available ring structure. If the
+available index crossed_the avail_event field value since the
+last notification, we go ahead and write to the PCI configuration
+space. The avail_event field wraps naturally at 65536 as well:
+
+(u16)(new_idx - avail_event - 1) < (u16)(new_idx - old_idx)
+
+  <sub:Receiving-Used-Buffers>Receiving Used Buffers From The
+  Device
+
+Once the device has used a buffer (read from or written to it, or
+parts of both, depending on the nature of the virtqueue and the
+device), it sends an interrupt, following an algorithm very
+similar to the algorithm used for the driver to send the device a
+buffer:
+
+  Write the head descriptor number to the next field in the used
+  ring.
+
+  Update the used ring idx.
+
+  Determine whether an interrupt is necessary:
+
+  If the VIRTIO_F_RING_EVENT_IDX feature is not negotiated: check
+    if f the VRING_AVAIL_F_NO_INTERRUPT flag is not set in avail-
+    >flags
+
+  If the VIRTIO_F_RING_EVENT_IDX feature is negotiated: check
+    whether the used index crossed the used_event field value
+    since the last update. The used_event field wraps naturally
+    at 65536 as well:(u16)(new_idx - used_event - 1) < (u16)(new_idx - old_idx)
+
+  If an interrupt is necessary:
+
+  If MSI-X capability is disabled:
+
+    Set the lower bit of the ISR Status field for the device.
+
+    Send the appropriate PCI interrupt for the device.
+
+  If MSI-X capability is enabled:
+
+    Request the appropriate MSI-X interrupt message for the
+      device, Queue Vector field sets the MSI-X Table entry
+      number.
+
+    If Queue Vector field value is NO_VECTOR, no interrupt
+      message is requested for this event.
+
+The guest interrupt handler should:
+
+  If MSI-X capability is disabled: read the ISR Status field,
+  which will reset it to zero. If the lower bit is zero, the
+  interrupt was not for this device. Otherwise, the guest driver
+  should look through the used rings of each virtqueue for the
+  device, to see if any progress has been made by the device
+  which requires servicing.
+
+  If MSI-X capability is enabled: look through the used rings of
+  each virtqueue mapped to the specific MSI-X vector for the
+  device, to see if any progress has been made by the device
+  which requires servicing.
+
+For each ring, guest should then disable interrupts by writing
+VRING_AVAIL_F_NO_INTERRUPT flag in avail structure, if required.
+It can then process used ring entries finally enabling interrupts
+by clearing the VRING_AVAIL_F_NO_INTERRUPT flag or updating the
+EVENT_IDX field in the available structure, Guest should then
+execute a memory barrier, and then recheck the ring empty
+condition. This is necessary to handle the case where, after the
+last check and before enabling interrupts, an interrupt has been
+suppressed by the device:
+
+vring_disable_interrupts(vq);
+
+for (;;) {
+
+    if (vq->last_seen_used != vring->used.idx) {
+
+               vring_enable_interrupts(vq);
+
+               mb();
+
+               if (vq->last_seen_used != vring->used.idx)
+
+                       break;
+
+    }
+
+    struct vring_used_elem *e =
+vring.used->ring[vq->last_seen_used%vsz];
+
+    process_buffer(e);
+
+    vq->last_seen_used++;
+
+}
+
+  Dealing With Configuration Changes
+
+Some virtio PCI devices can change the device configuration
+state, as reflected in the virtio header in the PCI configuration
+space. In this case:
+
+  If MSI-X capability is disabled: an interrupt is delivered and
+  the second highest bit is set in the ISR Status field to
+  indicate that the driver should re-examine the configuration
+  space.Note that a single interrupt can indicate both that one
+  or more virtqueue has been used and that the configuration
+  space has changed: even if the config bit is set, virtqueues
+  must be scanned.
+
+  If MSI-X capability is enabled: an interrupt message is
+  requested. The Configuration Vector field sets the MSI-X Table
+  entry number to use. If Configuration Vector field value is
+  NO_VECTOR, no interrupt message is requested for this event.
+
+Creating New Device Types
+
+Various considerations are necessary when creating a new device
+type:
+
+  How Many Virtqueues?
+
+It is possible that a very simple device will operate entirely
+through its configuration space, but most will need at least one
+virtqueue in which it will place requests. A device with both
+input and output (eg. console and network devices described here)
+need two queues: one which the driver fills with buffers to
+receive input, and one which the driver places buffers to
+transmit output.
+
+  What Configuration Space Layout?
+
+Configuration space is generally used for rarely-changing or
+initialization-time parameters. But it is a limited resource, so
+it might be better to use a virtqueue to update configuration
+information (the network device does this for filtering,
+otherwise the table in the config space could potentially be very
+large).
+
+Note that this space is generally the guest's native endian,
+rather than PCI's little-endian.
+
+  What Device Number?
+
+Currently device numbers are assigned quite freely: a simple
+request mail to the author of this document or the Linux
+virtualization mailing list[footnote:
+
+https://lists.linux-foundation.org/mailman/listinfo/virtualization
+] will be sufficient to secure a unique one.
+
+Meanwhile for experimental drivers, use 65535 and work backwards.
+
+  How many MSI-X vectors?
+
+Using the optional MSI-X capability devices can speed up
+interrupt processing by removing the need to read ISR Status
+register by guest driver (which might be an expensive operation),
+reducing interrupt sharing between devices and queues within the
+device, and handling interrupts from multiple CPUs. However, some
+systems impose a limit (which might be as low as 256) on the
+total number of MSI-X vectors that can be allocated to all
+devices. Devices and/or device drivers should take this into
+account, limiting the number of vectors used unless the device is
+expected to cause a high volume of interrupts. Devices can
+control the number of vectors used by limiting the MSI-X Table
+Size or not presenting MSI-X capability in PCI configuration
+space. Drivers can control this by mapping events to as small
+number of vectors as possible, or disabling MSI-X capability
+altogether.
+
+  Message Framing
+
+The descriptors used for a buffer should not effect the semantics
+of the message, except for the total length of the buffer. For
+example, a network buffer consists of a 10 byte header followed
+by the network packet. Whether this is presented in the ring
+descriptor chain as (say) a 10 byte buffer and a 1514 byte
+buffer, or a single 1524 byte buffer, or even three buffers,
+should have no effect.
+
+In particular, no implementation should use the descriptor
+boundaries to determine the size of any header in a request.[footnote:
+The current qemu device implementations mistakenly insist that
+the first descriptor cover the header in these cases exactly, so
+a cautious driver should arrange it so.
+]
+
+  Device Improvements
+
+Any change to configuration space, or new virtqueues, or
+behavioural changes, should be indicated by negotiation of a new
+feature bit. This establishes clarity[footnote:
+Even if it does mean documenting design or implementation
+mistakes!
+] and avoids future expansion problems.
+
+Clusters of functionality which are always implemented together
+can use a single bit, but if one feature makes sense without the
+others they should not be gratuitously grouped together to
+conserve feature bits. We can always extend the spec when the
+first person needs more than 24 feature bits for their device.
+
+[LaTeX Command: printnomenclature]
+
+Appendix A: virtio_ring.h
+
+#ifndef VIRTIO_RING_H
+
+#define VIRTIO_RING_H
+
+/* An interface for efficient virtio implementation.
+
+ *
+
+ * This header is BSD licensed so anyone can use the definitions
+
+ * to implement compatible drivers/servers.
+
+ *
+
+ * Copyright 2007, 2009, IBM Corporation
+
+ * Copyright 2011, Red Hat, Inc
+
+ * All rights reserved.
+
+ *
+
+ * Redistribution and use in source and binary forms, with or
+without
+
+ * modification, are permitted provided that the following
+conditions
+
+ * are met:
+
+ * 1. Redistributions of source code must retain the above
+copyright
+
+ *    notice, this list of conditions and the following
+disclaimer.
+
+ * 2. Redistributions in binary form must reproduce the above
+copyright
+
+ *    notice, this list of conditions and the following
+disclaimer in the
+
+ *    documentation and/or other materials provided with the
+distribution.
+
+ * 3. Neither the name of IBM nor the names of its contributors
+
+ *    may be used to endorse or promote products derived from
+this software
+
+ *    without specific prior written permission.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+CONTRIBUTORS ``AS IS'' AND
+
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE
+
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE
+
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE
+LIABLE
+
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL
+
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS
+
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION)
+
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT
+
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+IN ANY WAY
+
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF
+
+ * SUCH DAMAGE.
+
+ */
+
+
+
+/* This marks a buffer as continuing via the next field. */
+
+#define VRING_DESC_F_NEXT       1
+
+/* This marks a buffer as write-only (otherwise read-only). */
+
+#define VRING_DESC_F_WRITE      2
+
+
+
+/* The Host uses this in used->flags to advise the Guest: don't
+kick me
+
+ * when you add a buffer.  It's unreliable, so it's simply an
+
+ * optimization.  Guest will still kick if it's out of buffers.
+*/
+
+#define VRING_USED_F_NO_NOTIFY  1
+
+/* The Guest uses this in avail->flags to advise the Host: don't
+
+ * interrupt me when you consume a buffer.  It's unreliable, so
+it's
+
+ * simply an optimization.  */
+
+#define VRING_AVAIL_F_NO_INTERRUPT      1
+
+
+
+/* Virtio ring descriptors: 16 bytes.
+
+ * These can chain together via "next". */
+
+struct vring_desc {
+
+        /* Address (guest-physical). */
+
+        uint64_t addr;
+
+        /* Length. */
+
+        uint32_t len;
+
+        /* The flags as indicated above. */
+
+        uint16_t flags;
+
+        /* We chain unused descriptors via this, too */
+
+        uint16_t next;
+
+};
+
+
+
+struct vring_avail {
+
+        uint16_t flags;
+
+        uint16_t idx;
+
+        uint16_t ring[];
+
+        uint16_t used_event;
+
+};
+
+
+
+/* u32 is used here for ids for padding reasons. */
+
+struct vring_used_elem {
+
+        /* Index of start of used descriptor chain. */
+
+        uint32_t id;
+
+        /* Total length of the descriptor chain which was written
+to. */
+
+        uint32_t len;
+
+};
+
+
+
+struct vring_used {
+
+        uint16_t flags;
+
+        uint16_t idx;
+
+        struct vring_used_elem ring[];
+
+        uint16_t avail_event;
+
+};
+
+
+
+struct vring {
+
+        unsigned int num;
+
+
+
+        struct vring_desc *desc;
+
+        struct vring_avail *avail;
+
+        struct vring_used *used;
+
+};
+
+
+
+/* The standard layout for the ring is a continuous chunk of
+memory which
+
+ * looks like this.  We assume num is a power of 2.
+
+ *
+
+ * struct vring {
+
+ *      // The actual descriptors (16 bytes each)
+
+ *      struct vring_desc desc[num];
+
+ *
+
+ *      // A ring of available descriptor heads with free-running
+index.
+
+ *      __u16 avail_flags;
+
+ *      __u16 avail_idx;
+
+ *      __u16 available[num];
+
+ *
+
+ *      // Padding to the next align boundary.
+
+ *      char pad[];
+
+ *
+
+ *      // A ring of used descriptor heads with free-running
+index.
+
+ *      __u16 used_flags;
+
+ *      __u16 EVENT_IDX;
+
+ *      struct vring_used_elem used[num];
+
+ * };
+
+ * Note: for virtio PCI, align is 4096.
+
+ */
+
+static inline void vring_init(struct vring *vr, unsigned int num,
+void *p,
+
+                              unsigned long align)
+
+{
+
+        vr->num = num;
+
+        vr->desc = p;
+
+        vr->avail = p + num*sizeof(struct vring_desc);
+
+        vr->used = (void *)(((unsigned long)&vr->avail->ring[num]
+
+                              + align-1)
+
+                            & ~(align - 1));
+
+}
+
+
+
+static inline unsigned vring_size(unsigned int num, unsigned long
+align)
+
+{
+
+        return ((sizeof(struct vring_desc)*num +
+sizeof(uint16_t)*(2+num)
+
+                 + align - 1) & ~(align - 1))
+
+                + sizeof(uint16_t)*3 + sizeof(struct
+vring_used_elem)*num;
+
+}
+
+
+
+static inline int vring_need_event(uint16_t event_idx, uint16_t
+new_idx, uint16_t old_idx)
+
+{
+
+         return (uint16_t)(new_idx - event_idx - 1) <
+(uint16_t)(new_idx - old_idx);
+
+}
+
+#endif /* VIRTIO_RING_H */
+
+<cha:Reserved-Feature-Bits>Appendix B: Reserved Feature Bits
+
+Currently there are five device-independent feature bits defined:
+
+  VIRTIO_F_NOTIFY_ON_EMPTY (24) Negotiating this feature
+  indicates that the driver wants an interrupt if the device runs
+  out of available descriptors on a virtqueue, even though
+  interrupts are suppressed using the VRING_AVAIL_F_NO_INTERRUPT
+  flag or the used_event field. An example of this is the
+  networking driver: it doesn't need to know every time a packet
+  is transmitted, but it does need to free the transmitted
+  packets a finite time after they are transmitted. It can avoid
+  using a timer if the device interrupts it when all the packets
+  are transmitted.
+
+  VIRTIO_F_RING_INDIRECT_DESC (28) Negotiating this feature
+  indicates that the driver can use descriptors with the
+  VRING_DESC_F_INDIRECT flag set, as described in [sub:Indirect-Descriptors]
+  .
+
+  VIRTIO_F_RING_EVENT_IDX(29) This feature enables the used_event
+  and the avail_event fields. If set, it indicates that the
+  device should ignore the flags field in the available ring
+  structure. Instead, the used_event field in this structure is
+  used by guest to suppress device interrupts. Further, the
+  driver should ignore the flags field in the used ring
+  structure. Instead, the avail_event field in this structure is
+  used by the device to suppress notifications. If unset, the
+  driver should ignore the used_event field; the device should
+  ignore the avail_event field; the flags field is used
+
+  VIRTIO_F_BAD_FEATURE(30) This feature should never be
+  negotiated by the guest; doing so is an indication that the
+  guest is faulty[footnote:
+An experimental virtio PCI driver contained in Linux version
+2.6.25 had this problem, and this feature bit can be used to
+detect it.
+]
+
+  VIRTIO_F_FEATURES_HIGH(31) This feature indicates that the
+  device supports feature bits 32:63. If unset, feature bits
+  32:63 are unset.
+
+Appendix C: Network Device
+
+The virtio network device is a virtual ethernet card, and is the
+most complex of the devices supported so far by virtio. It has
+enhanced rapidly and demonstrates clearly how support for new
+features should be added to an existing device. Empty buffers are
+placed in one virtqueue for receiving packets, and outgoing
+packets are enqueued into another for transmission in that order.
+A third command queue is used to control advanced filtering
+features.
+
+  Configuration
+
+  Subsystem Device ID 1
+
+  Virtqueues 0:receiveq. 1:transmitq. 2:controlq[footnote:
+Only if VIRTIO_NET_F_CTRL_VQ set
+]
+
+  Feature bits
+
+  VIRTIO_NET_F_CSUM (0) Device handles packets with partial
+    checksum
+
+  VIRTIO_NET_F_GUEST_CSUM (1) Guest handles packets with partial
+    checksum
+
+  VIRTIO_NET_F_MAC (5) Device has given MAC address.
+
+  VIRTIO_NET_F_GSO (6) (Deprecated) device handles packets with
+    any GSO type.[footnote:
+It was supposed to indicate segmentation offload support, but
+upon further investigation it became clear that multiple bits
+were required.
+]
+
+  VIRTIO_NET_F_GUEST_TSO4 (7) Guest can receive TSOv4.
+
+  VIRTIO_NET_F_GUEST_TSO6 (8) Guest can receive TSOv6.
+
+  VIRTIO_NET_F_GUEST_ECN (9) Guest can receive TSO with ECN.
+
+  VIRTIO_NET_F_GUEST_UFO (10) Guest can receive UFO.
+
+  VIRTIO_NET_F_HOST_TSO4 (11) Device can receive TSOv4.
+
+  VIRTIO_NET_F_HOST_TSO6 (12) Device can receive TSOv6.
+
+  VIRTIO_NET_F_HOST_ECN (13) Device can receive TSO with ECN.
+
+  VIRTIO_NET_F_HOST_UFO (14) Device can receive UFO.
+
+  VIRTIO_NET_F_MRG_RXBUF (15) Guest can merge receive buffers.
+
+  VIRTIO_NET_F_STATUS (16) Configuration status field is
+    available.
+
+  VIRTIO_NET_F_CTRL_VQ (17) Control channel is available.
+
+  VIRTIO_NET_F_CTRL_RX (18) Control channel RX mode support.
+
+  VIRTIO_NET_F_CTRL_VLAN (19) Control channel VLAN filtering.
+
+  Device configuration layout Two configuration fields are
+  currently defined. The mac address field always exists (though
+  is only valid if VIRTIO_NET_F_MAC is set), and the status field
+  only exists if VIRTIO_NET_F_STATUS is set. Only one bit is
+  currently defined for the status field: VIRTIO_NET_S_LINK_UP. #define VIRTIO_NET_S_LINK_UP   1
+
+
+
+struct virtio_net_config {
+
+    u8 mac[6];
+
+    u16 status;
+
+};
+
+  Device Initialization
+
+  The initialization routine should identify the receive and
+  transmission virtqueues.
+
+  If the VIRTIO_NET_F_MAC feature bit is set, the configuration
+  space “mac” entry indicates the “physical” address of the the
+  network card, otherwise a private MAC address should be
+  assigned. All guests are expected to negotiate this feature if
+  it is set.
+
+  If the VIRTIO_NET_F_CTRL_VQ feature bit is negotiated, identify
+  the control virtqueue.
+
+  If the VIRTIO_NET_F_STATUS feature bit is negotiated, the link
+  status can be read from the bottom bit of the “status” config
+  field. Otherwise, the link should be assumed active.
+
+  The receive virtqueue should be filled with receive buffers.
+  This is described in detail below in “Setting Up Receive
+  Buffers”.
+
+  A driver can indicate that it will generate checksumless
+  packets by negotating the VIRTIO_NET_F_CSUM feature. This “
+  checksum offload” is a common feature on modern network cards.
+
+  If that feature is negotiated, a driver can use TCP or UDP
+  segmentation offload by negotiating the VIRTIO_NET_F_HOST_TSO4
+  (IPv4 TCP), VIRTIO_NET_F_HOST_TSO6 (IPv6 TCP) and
+  VIRTIO_NET_F_HOST_UFO (UDP fragmentation) features. It should
+  not send TCP packets requiring segmentation offload which have
+  the Explicit Congestion Notification bit set, unless the
+  VIRTIO_NET_F_HOST_ECN feature is negotiated.[footnote:
+This is a common restriction in real, older network cards.
+]
+
+  The converse features are also available: a driver can save the
+  virtual device some work by negotiating these features.[footnote:
+For example, a network packet transported between two guests on
+the same system may not require checksumming at all, nor
+segmentation, if both guests are amenable.
+] The VIRTIO_NET_F_GUEST_CSUM feature indicates that partially
+  checksummed packets can be received, and if it can do that then
+  the VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
+  VIRTIO_NET_F_GUEST_UFO and VIRTIO_NET_F_GUEST_ECN are the input
+  equivalents of the features described above. See “Receiving
+  Packets” below.
+
+  Device Operation
+
+Packets are transmitted by placing them in the transmitq, and
+buffers for incoming packets are placed in the receiveq. In each
+case, the packet itself is preceeded by a header:
+
+struct virtio_net_hdr {
+
+#define VIRTIO_NET_HDR_F_NEEDS_CSUM    1
+
+       u8 flags;
+
+#define VIRTIO_NET_HDR_GSO_NONE        0
+
+#define VIRTIO_NET_HDR_GSO_TCPV4       1
+
+#define VIRTIO_NET_HDR_GSO_UDP          3
+
+#define VIRTIO_NET_HDR_GSO_TCPV6       4
+
+#define VIRTIO_NET_HDR_GSO_ECN      0x80
+
+       u8 gso_type;
+
+       u16 hdr_len;
+
+       u16 gso_size;
+
+       u16 csum_start;
+
+       u16 csum_offset;
+
+/* Only if VIRTIO_NET_F_MRG_RXBUF: */
+
+       u16 num_buffers
+
+};
+
+The controlq is used to control device features such as
+filtering.
+
+  Packet Transmission
+
+Transmitting a single packet is simple, but varies depending on
+the different features the driver negotiated.
+
+  If the driver negotiated VIRTIO_NET_F_CSUM, and the packet has
+  not been fully checksummed, then the virtio_net_hdr's fields
+  are set as follows. Otherwise, the packet must be fully
+  checksummed, and flags is zero.
+
+  flags has the VIRTIO_NET_HDR_F_NEEDS_CSUM set,
+
+  <ite:csum_start-is-set>csum_start is set to the offset within
+    the packet to begin checksumming, and
+
+  csum_offset indicates how many bytes after the csum_start the
+    new (16 bit ones' complement) checksum should be placed.[footnote:
+For example, consider a partially checksummed TCP (IPv4) packet.
+It will have a 14 byte ethernet header and 20 byte IP header
+followed by the TCP header (with the TCP checksum field 16 bytes
+into that header). csum_start will be 14+20 = 34 (the TCP
+checksum includes the header), and csum_offset will be 16. The
+value in the TCP checksum field will be the sum of the TCP pseudo
+header, so that replacing it by the ones' complement checksum of
+the TCP header and body will give the correct result.
+]
+
+  <enu:If-the-driver>If the driver negotiated
+  VIRTIO_NET_F_HOST_TSO4, TSO6 or UFO, and the packet requires
+  TCP segmentation or UDP fragmentation, then the “gso_type”
+  field is set to VIRTIO_NET_HDR_GSO_TCPV4, TCPV6 or UDP.
+  (Otherwise, it is set to VIRTIO_NET_HDR_GSO_NONE). In this
+  case, packets larger than 1514 bytes can be transmitted: the
+  metadata indicates how to replicate the packet header to cut it
+  into smaller packets. The other gso fields are set:
+
+  hdr_len is a hint to the device as to how much of the header
+    needs to be kept to copy into each packet, usually set to the
+    length of the headers, including the transport header.[footnote:
+Due to various bugs in implementations, this field is not useful
+as a guarantee of the transport header size.
+]
+
+  gso_size is the size of the packet beyond that header (ie.
+    MSS).
+
+  If the driver negotiated the VIRTIO_NET_F_HOST_ECN feature, the
+    VIRTIO_NET_HDR_GSO_ECN bit may be set in “gso_type” as well,
+    indicating that the TCP packet has the ECN bit set.[footnote:
+This case is not handled by some older hardware, so is called out
+specifically in the protocol.
+]
+
+  If the driver negotiated the VIRTIO_NET_F_MRG_RXBUF feature,
+  the num_buffers field is set to zero.
+
+  The header and packet are added as one output buffer to the
+  transmitq, and the device is notified of the new entry (see [sub:Notifying-The-Device]
+  ).[footnote:
+Note that the header will be two bytes longer for the
+VIRTIO_NET_F_MRG_RXBUF case.
+]
+
+  Packet Transmission Interrupt
+
+Often a driver will suppress transmission interrupts using the
+VRING_AVAIL_F_NO_INTERRUPT flag (see [sub:Receiving-Used-Buffers]
+) and check for used packets in the transmit path of following
+packets. However, it will still receive interrupts if the
+VIRTIO_F_NOTIFY_ON_EMPTY feature is negotiated, indicating that
+the transmission queue is completely emptied.
+
+The normal behavior in this interrupt handler is to retrieve and
+new descriptors from the used ring and free the corresponding
+headers and packets.
+
+  Setting Up Receive Buffers
+
+It is generally a good idea to keep the receive virtqueue as
+fully populated as possible: if it runs out, network performance
+will suffer.
+
+If the VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6 or
+VIRTIO_NET_F_GUEST_UFO features are used, the Guest will need to
+accept packets of up to 65550 bytes long (the maximum size of a
+TCP or UDP packet, plus the 14 byte ethernet header), otherwise
+1514 bytes. So unless VIRTIO_NET_F_MRG_RXBUF is negotiated, every
+buffer in the receive queue needs to be at least this length [footnote:
+Obviously each one can be split across multiple descriptor
+elements.
+].
+
+If VIRTIO_NET_F_MRG_RXBUF is negotiated, each buffer must be at
+least the size of the struct virtio_net_hdr.
+
+  Packet Receive Interrupt
+
+When a packet is copied into a buffer in the receiveq, the
+optimal path is to disable further interrupts for the receiveq
+(see [sub:Receiving-Used-Buffers]) and process packets until no
+more are found, then re-enable them.
+
+Processing packet involves:
+
+  If the driver negotiated the VIRTIO_NET_F_MRG_RXBUF feature,
+  then the “num_buffers” field indicates how many descriptors
+  this packet is spread over (including this one). This allows
+  receipt of large packets without having to allocate large
+  buffers. In this case, there will be at least “num_buffers” in
+  the used ring, and they should be chained together to form a
+  single packet. The other buffers will not begin with a struct
+  virtio_net_hdr.
+
+  If the VIRTIO_NET_F_MRG_RXBUF feature was not negotiated, or
+  the “num_buffers” field is one, then the entire packet will be
+  contained within this buffer, immediately following the struct
+  virtio_net_hdr.
+
+  If the VIRTIO_NET_F_GUEST_CSUM feature was negotiated, the
+  VIRTIO_NET_HDR_F_NEEDS_CSUM bit in the “flags” field may be
+  set: if so, the checksum on the packet is incomplete and the “
+  csum_start” and “csum_offset” fields indicate how to calculate
+  it (see [ite:csum_start-is-set]).
+
+  If the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options were
+  negotiated, then the “gso_type” may be something other than
+  VIRTIO_NET_HDR_GSO_NONE, and the “gso_size” field indicates the
+  desired MSS (see [enu:If-the-driver]).Control Virtqueue
+
+The driver uses the control virtqueue (if VIRTIO_NET_F_VTRL_VQ is
+negotiated) to send commands to manipulate various features of
+the device which would not easily map into the configuration
+space.
+
+All commands are of the following form:
+
+struct virtio_net_ctrl {
+
+       u8 class;
+
+       u8 command;
+
+       u8 command-specific-data[];
+
+       u8 ack;
+
+};
+
+
+
+/* ack values */
+
+#define VIRTIO_NET_OK     0
+
+#define VIRTIO_NET_ERR    1
+
+The class, command and command-specific-data are set by the
+driver, and the device sets the ack byte. There is little it can
+do except issue a diagnostic if the ack byte is not
+VIRTIO_NET_OK.
+
+  Packet Receive Filtering
+
+If the VIRTIO_NET_F_CTRL_RX feature is negotiated, the driver can
+send control commands for promiscuous mode, multicast receiving,
+and filtering of MAC addresses.
+
+Note that in general, these commands are best-effort: unwanted
+packets may still arrive.
+
+  Setting Promiscuous Mode
+
+#define VIRTIO_NET_CTRL_RX    0
+
+ #define VIRTIO_NET_CTRL_RX_PROMISC      0
+
+ #define VIRTIO_NET_CTRL_RX_ALLMULTI     1
+
+The class VIRTIO_NET_CTRL_RX has two commands:
+VIRTIO_NET_CTRL_RX_PROMISC turns promiscuous mode on and off, and
+VIRTIO_NET_CTRL_RX_ALLMULTI turns all-multicast receive on and
+off. The command-specific-data is one byte containing 0 (off) or
+1 (on).
+
+  Setting MAC Address Filtering
+
+struct virtio_net_ctrl_mac {
+
+       u32 entries;
+
+       u8 macs[entries][ETH_ALEN];
+
+};
+
+
+
+#define VIRTIO_NET_CTRL_MAC    1
+
+ #define VIRTIO_NET_CTRL_MAC_TABLE_SET        0
+
+The device can filter incoming packets by any number of
+destination MAC addresses.[footnote:
+Since there are no guarentees, it can use a hash filter
+orsilently switch to allmulti or promiscuous mode if it is given
+too many addresses.
+] This table is set using the class VIRTIO_NET_CTRL_MAC and the
+command VIRTIO_NET_CTRL_MAC_TABLE_SET. The command-specific-data
+is two variable length tables of 6-byte MAC addresses. The first
+table contains unicast addresses, and the second contains
+multicast addresses.
+
+  VLAN Filtering
+
+If the driver negotiates the VIRTION_NET_F_CTRL_VLAN feature, it
+can control a VLAN filter table in the device.
+
+#define VIRTIO_NET_CTRL_VLAN       2
+
+ #define VIRTIO_NET_CTRL_VLAN_ADD             0
+
+ #define VIRTIO_NET_CTRL_VLAN_DEL             1
+
+Both the VIRTIO_NET_CTRL_VLAN_ADD and VIRTIO_NET_CTRL_VLAN_DEL
+command take a 16-bit VLAN id as the command-specific-data.
+
+Appendix D: Block Device
+
+The virtio block device is a simple virtual block device (ie.
+disk). Read and write requests (and other exotic requests) are
+placed in the queue, and serviced (probably out of order) by the
+device except where noted.
+
+  Configuration
+
+  Subsystem Device ID 2
+
+  Virtqueues 0:requestq.
+
+  Feature bits
+
+  VIRTIO_BLK_F_BARRIER (0) Host supports request barriers.
+
+  VIRTIO_BLK_F_SIZE_MAX (1) Maximum size of any single segment is
+    in “size_max”.
+
+  VIRTIO_BLK_F_SEG_MAX (2) Maximum number of segments in a
+    request is in “seg_max”.
+
+  VIRTIO_BLK_F_GEOMETRY (4) Disk-style geometry specified in “
+    geometry”.
+
+  VIRTIO_BLK_F_RO (5) Device is read-only.
+
+  VIRTIO_BLK_F_BLK_SIZE (6) Block size of disk is in “blk_size”.
+
+  VIRTIO_BLK_F_SCSI (7) Device supports scsi packet commands.
+
+  VIRTIO_BLK_F_FLUSH (9) Cache flush command support.
+
+
+
+  Device configuration layout The capacity of the device
+  (expressed in 512-byte sectors) is always present. The
+  availability of the others all depend on various feature bits
+  as indicated above. struct virtio_blk_config {
+
+       u64 capacity;
+
+       u32 size_max;
+
+       u32 seg_max;
+
+       struct virtio_blk_geometry {
+
+               u16 cylinders;
+
+               u8 heads;
+
+               u8 sectors;
+
+       } geometry;
+
+       u32 blk_size;
+
+
+
+};
+
+  Device Initialization
+
+  The device size should be read from the “capacity”
+  configuration field. No requests should be submitted which goes
+  beyond this limit.
+
+  If the VIRTIO_BLK_F_BLK_SIZE feature is negotiated, the
+  blk_size field can be read to determine the optimal sector size
+  for the driver to use. This does not effect the units used in
+  the protocol (always 512 bytes), but awareness of the correct
+  value can effect performance.
+
+  If the VIRTIO_BLK_F_RO feature is set by the device, any write
+  requests will fail.
+
+
+
+  Device Operation
+
+The driver queues requests to the virtqueue, and they are used by
+the device (not necessarily in order). Each request is of form:
+
+struct virtio_blk_req {
+
+
+
+       u32 type;
+
+       u32 ioprio;
+
+       u64 sector;
+
+       char data[][512];
+
+       u8 status;
+
+};
+
+If the device has VIRTIO_BLK_F_SCSI feature, it can also support
+scsi packet command requests, each of these requests is of form:struct virtio_scsi_pc_req {
+
+       u32 type;
+
+       u32 ioprio;
+
+       u64 sector;
+
+    char cmd[];
+
+       char data[][512];
+
+#define SCSI_SENSE_BUFFERSIZE   96
+
+    u8 sense[SCSI_SENSE_BUFFERSIZE];
+
+    u32 errors;
+
+    u32 data_len;
+
+    u32 sense_len;
+
+    u32 residual;
+
+       u8 status;
+
+};
+
+The type of the request is either a read (VIRTIO_BLK_T_IN), a
+write (VIRTIO_BLK_T_OUT), a scsi packet command
+(VIRTIO_BLK_T_SCSI_CMD or VIRTIO_BLK_T_SCSI_CMD_OUT[footnote:
+the SCSI_CMD and SCSI_CMD_OUT types are equivalent, the device
+does not distinguish between them
+]) or a flush (VIRTIO_BLK_T_FLUSH or VIRTIO_BLK_T_FLUSH_OUT[footnote:
+the FLUSH and FLUSH_OUT types are equivalent, the device does not
+distinguish between them
+]). If the device has VIRTIO_BLK_F_BARRIER feature the high bit
+(VIRTIO_BLK_T_BARRIER) indicates that this request acts as a
+barrier and that all preceeding requests must be complete before
+this one, and all following requests must not be started until
+this is complete. Note that a barrier does not flush caches in
+the underlying backend device in host, and thus does not serve as
+data consistency guarantee. Driver must use FLUSH request to
+flush the host cache.
+
+#define VIRTIO_BLK_T_IN           0
+
+#define VIRTIO_BLK_T_OUT          1
+
+#define VIRTIO_BLK_T_SCSI_CMD     2
+
+#define VIRTIO_BLK_T_SCSI_CMD_OUT 3
+
+#define VIRTIO_BLK_T_FLUSH        4
+
+#define VIRTIO_BLK_T_FLUSH_OUT    5
+
+#define VIRTIO_BLK_T_BARRIER    0x80000000
+
+The ioprio field is a hint about the relative priorities of
+requests to the device: higher numbers indicate more important
+requests.
+
+The sector number indicates the offset (multiplied by 512) where
+the read or write is to occur. This field is unused and set to 0
+for scsi packet commands and for flush commands.
+
+The cmd field is only present for scsi packet command requests,
+and indicates the command to perform. This field must reside in a
+single, separate read-only buffer; command length can be derived
+from the length of this buffer.
+
+Note that these first three (four for scsi packet commands)
+fields are always read-only: the data field is either read-only
+or write-only, depending on the request. The size of the read or
+write can be derived from the total size of the request buffers.
+
+The sense field is only present for scsi packet command requests,
+and indicates the buffer for scsi sense data.
+
+The data_len field is only present for scsi packet command
+requests, this field is deprecated, and should be ignored by the
+driver. Historically, devices copied data length there.
+
+The sense_len field is only present for scsi packet command
+requests and indicates the number of bytes actually written to
+the sense buffer.
+
+The residual field is only present for scsi packet command
+requests and indicates the residual size, calculated as data
+length - number of bytes actually transferred.
+
+The final status byte is written by the device: either
+VIRTIO_BLK_S_OK for success, VIRTIO_BLK_S_IOERR for host or guest
+error or VIRTIO_BLK_S_UNSUPP for a request unsupported by host:#define VIRTIO_BLK_S_OK        0
+
+#define VIRTIO_BLK_S_IOERR     1
+
+#define VIRTIO_BLK_S_UNSUPP    2
+
+Historically, devices assumed that the fields type, ioprio and
+sector reside in a single, separate read-only buffer; the fields
+errors, data_len, sense_len and residual reside in a single,
+separate write-only buffer; the sense field in a separate
+write-only buffer of size 96 bytes, by itself; the fields errors,
+data_len, sense_len and residual in a single write-only buffer;
+and the status field is a separate read-only buffer of size 1
+byte, by itself.
+
+Appendix E: Console Device
+
+The virtio console device is a simple device for data input and
+output. A device may have one or more ports. Each port has a pair
+of input and output virtqueues. Moreover, a device has a pair of
+control IO virtqueues. The control virtqueues are used to
+communicate information between the device and the driver about
+ports being opened and closed on either side of the connection,
+indication from the host about whether a particular port is a
+console port, adding new ports, port hot-plug/unplug, etc., and
+indication from the guest about whether a port or a device was
+successfully added, port open/close, etc.. For data IO, one or
+more empty buffers are placed in the receive queue for incoming
+data and outgoing characters are placed in the transmit queue.
+
+  Configuration
+
+  Subsystem Device ID 3
+
+  Virtqueues 0:receiveq(port0). 1:transmitq(port0), 2:control
+  receiveq[footnote:
+Ports 2 onwards only if VIRTIO_CONSOLE_F_MULTIPORT is set
+], 3:control transmitq, 4:receiveq(port1), 5:transmitq(port1),
+  ...
+
+  Feature bits
+
+  VIRTIO_CONSOLE_F_SIZE (0) Configuration cols and rows fields
+    are valid.
+
+  VIRTIO_CONSOLE_F_MULTIPORT(1) Device has support for multiple
+    ports; configuration fields nr_ports and max_nr_ports are
+    valid and control virtqueues will be used.
+
+  Device configuration layout The size of the console is supplied
+  in the configuration space if the VIRTIO_CONSOLE_F_SIZE feature
+  is set. Furthermore, if the VIRTIO_CONSOLE_F_MULTIPORT feature
+  is set, the maximum number of ports supported by the device can
+  be fetched.struct virtio_console_config {
+
+       u16 cols;
+
+       u16 rows;
+
+
+
+       u32 max_nr_ports;
+
+};
+
+  Device Initialization
+
+  If the VIRTIO_CONSOLE_F_SIZE feature is negotiated, the driver
+  can read the console dimensions from the configuration fields.
+
+  If the VIRTIO_CONSOLE_F_MULTIPORT feature is negotiated, the
+  driver can spawn multiple ports, not all of which may be
+  attached to a console. Some could be generic ports. In this
+  case, the control virtqueues are enabled and according to the
+  max_nr_ports configuration-space value, the appropriate number
+  of virtqueues are created. A control message indicating the
+  driver is ready is sent to the host. The host can then send
+  control messages for adding new ports to the device. After
+  creating and initializing each port, a
+  VIRTIO_CONSOLE_PORT_READY control message is sent to the host
+  for that port so the host can let us know of any additional
+  configuration options set for that port.
+
+  The receiveq for each port is populated with one or more
+  receive buffers.
+
+  Device Operation
+
+  For output, a buffer containing the characters is placed in the
+  port's transmitq.[footnote:
+Because this is high importance and low bandwidth, the current
+Linux implementation polls for the buffer to be used, rather than
+waiting for an interrupt, simplifying the implementation
+significantly. However, for generic serial ports with the
+O_NONBLOCK flag set, the polling limitation is relaxed and the
+consumed buffers are freed upon the next write or poll call or
+when a port is closed or hot-unplugged.
+]
+
+  When a buffer is used in the receiveq (signalled by an
+  interrupt), the contents is the input to the port associated
+  with the virtqueue for which the notification was received.
+
+  If the driver negotiated the VIRTIO_CONSOLE_F_SIZE feature, a
+  configuration change interrupt may occur. The updated size can
+  be read from the configuration fields.
+
+  If the driver negotiated the VIRTIO_CONSOLE_F_MULTIPORT
+  feature, active ports are announced by the host using the
+  VIRTIO_CONSOLE_PORT_ADD control message. The same message is
+  used for port hot-plug as well.
+
+  If the host specified a port `name', a sysfs attribute is
+  created with the name filled in, so that udev rules can be
+  written that can create a symlink from the port's name to the
+  char device for port discovery by applications in the guest.
+
+  Changes to ports' state are effected by control messages.
+  Appropriate action is taken on the port indicated in the
+  control message. The layout of the structure of the control
+  buffer and the events associated are:struct virtio_console_control {
+
+       uint32_t id;    /* Port number */
+
+       uint16_t event; /* The kind of control event */
+
+       uint16_t value; /* Extra information for the event */
+
+};
+
+
+
+/* Some events for the internal messages (control packets) */
+
+
+
+#define VIRTIO_CONSOLE_DEVICE_READY     0
+
+#define VIRTIO_CONSOLE_PORT_ADD         1
+
+#define VIRTIO_CONSOLE_PORT_REMOVE      2
+
+#define VIRTIO_CONSOLE_PORT_READY       3
+
+#define VIRTIO_CONSOLE_CONSOLE_PORT     4
+
+#define VIRTIO_CONSOLE_RESIZE           5
+
+#define VIRTIO_CONSOLE_PORT_OPEN        6
+
+#define VIRTIO_CONSOLE_PORT_NAME        7
+
+Appendix F: Entropy Device
+
+The virtio entropy device supplies high-quality randomness for
+guest use.
+
+  Configuration
+
+  Subsystem Device ID 4
+
+  Virtqueues 0:requestq.
+
+  Feature bits None currently defined
+
+  Device configuration layout None currently defined.
+
+  Device Initialization
+
+  The virtqueue is initialized
+
+  Device Operation
+
+When the driver requires random bytes, it places the descriptor
+of one or more buffers in the queue. It will be completely filled
+by random data by the device.
+
+Appendix G: Memory Balloon Device
+
+The virtio memory balloon device is a primitive device for
+managing guest memory: the device asks for a certain amount of
+memory, and the guest supplies it (or withdraws it, if the device
+has more than it asks for). This allows the guest to adapt to
+changes in allowance of underlying physical memory. If the
+feature is negotiated, the device can also be used to communicate
+guest memory statistics to the host.
+
+  Configuration
+
+  Subsystem Device ID 5
+
+  Virtqueues 0:inflateq. 1:deflateq. 2:statsq.[footnote:
+Only if VIRTIO_BALLON_F_STATS_VQ set
+]
+
+  Feature bits
+
+  VIRTIO_BALLOON_F_MUST_TELL_HOST (0) Host must be told before
+    pages from the balloon are used.
+
+  VIRTIO_BALLOON_F_STATS_VQ (1) A virtqueue for reporting guest
+    memory statistics is present.
+
+  Device configuration layout Both fields of this configuration
+  are always available. Note that they are little endian, despite
+  convention that device fields are guest endian:struct virtio_balloon_config {
+
+       u32 num_pages;
+
+       u32 actual;
+
+};
+
+  Device Initialization
+
+  The inflate and deflate virtqueues are identified.
+
+  If the VIRTIO_BALLOON_F_STATS_VQ feature bit is negotiated:
+
+  Identify the stats virtqueue.
+
+  Add one empty buffer to the stats virtqueue and notify the
+    host.
+
+Device operation begins immediately.
+
+  Device Operation
+
+  Memory Ballooning The device is driven by the receipt of a
+  configuration change interrupt.
+
+  The “num_pages” configuration field is examined. If this is
+  greater than the “actual” number of pages, memory must be given
+  to the balloon. If it is less than the “actual” number of
+  pages, memory may be taken back from the balloon for general
+  use.
+
+  To supply memory to the balloon (aka. inflate):
+
+  The driver constructs an array of addresses of unused memory
+    pages. These addresses are divided by 4096[footnote:
+This is historical, and independent of the guest page size
+] and the descriptor describing the resulting 32-bit array is
+    added to the inflateq.
+
+  To remove memory from the balloon (aka. deflate):
+
+  The driver constructs an array of addresses of memory pages it
+    has previously given to the balloon, as described above. This
+    descriptor is added to the deflateq.
+
+  If the VIRTIO_BALLOON_F_MUST_TELL_HOST feature is set, the
+    guest may not use these requested pages until that descriptor
+    in the deflateq has been used by the device.
+
+  Otherwise, the guest may begin to re-use pages previously given
+    to the balloon before the device has acknowledged their
+    withdrawl. [footnote:
+In this case, deflation advice is merely a courtesy
+]
+
+  In either case, once the device has completed the inflation or
+  deflation, the “actual” field of the configuration should be
+  updated to reflect the new number of pages in the balloon.[footnote:
+As updates to configuration space are not atomic, this field
+isn't particularly reliable, but can be used to diagnose buggy
+guests.
+]
+
+  Memory Statistics
+
+The stats virtqueue is atypical because communication is driven
+by the device (not the driver). The channel becomes active at
+driver initialization time when the driver adds an empty buffer
+and notifies the device. A request for memory statistics proceeds
+as follows:
+
+  The device pushes the buffer onto the used ring and sends an
+  interrupt.
+
+  The driver pops the used buffer and discards it.
+
+  The driver collects memory statistics and writes them into a
+  new buffer.
+
+  The driver adds the buffer to the virtqueue and notifies the
+  device.
+
+  The device pops the buffer (retaining it to initiate a
+  subsequent request) and consumes the statistics.
+
+  Memory Statistics Format Each statistic consists of a 16 bit
+  tag and a 64 bit value. Both quantities are represented in the
+  native endian of the guest. All statistics are optional and the
+  driver may choose which ones to supply. To guarantee backwards
+  compatibility, unsupported statistics should be omitted.
+
+  struct virtio_balloon_stat {
+
+#define VIRTIO_BALLOON_S_SWAP_IN  0
+
+#define VIRTIO_BALLOON_S_SWAP_OUT 1
+
+#define VIRTIO_BALLOON_S_MAJFLT   2
+
+#define VIRTIO_BALLOON_S_MINFLT   3
+
+#define VIRTIO_BALLOON_S_MEMFREE  4
+
+#define VIRTIO_BALLOON_S_MEMTOT   5
+
+       u16 tag;
+
+       u64 val;
+
+} __attribute__((packed));
+
+  Tags
+
+  VIRTIO_BALLOON_S_SWAP_IN The amount of memory that has been
+  swapped in (in bytes).
+
+  VIRTIO_BALLOON_S_SWAP_OUT The amount of memory that has been
+  swapped out to disk (in bytes).
+
+  VIRTIO_BALLOON_S_MAJFLT The number of major page faults that
+  have occurred.
+
+  VIRTIO_BALLOON_S_MINFLT The number of minor page faults that
+  have occurred.
+
+  VIRTIO_BALLOON_S_MEMFREE The amount of memory not being used
+  for any purpose (in bytes).
+
+  VIRTIO_BALLOON_S_MEMTOT The total amount of memory available
+  (in bytes).
+
diff --git a/MAINTAINERS b/MAINTAINERS

index 0b4ccdd35bbb18793986bf667054cdb6c7e5e0d9..ae8820e173a217ddb9afec0258c23d7274da9276 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1278,7 +1278,6 @@ F:        drivers/input/misc/ati_remote2.c
  ATLX ETHERNET DRIVERS
  M:     Jay Cliburn <jcliburn@gmail.com>
  M:     Chris Snook <chris.snook@gmail.com>
-M:     Jie Yang <jie.yang@atheros.com>
  L:     netdev@vger.kernel.org
  W:     http://sourceforge.net/projects/atl1
  W:     http://atl1.sourceforge.net
@@ -1574,7 +1573,6 @@ F:        drivers/scsi/bfa/
  
  BROCADE BNA 10 GIGABIT ETHERNET DRIVER
  M:     Rasesh Mody <rmody@brocade.com>
-M:     Debashis Dutt <ddutt@brocade.com>
  L:     netdev@vger.kernel.org
  S:     Supported
  F:     drivers/net/bna/
@@ -1758,7 +1756,6 @@ F:        Documentation/zh_CN/
  
  CISCO VIC ETHERNET NIC DRIVER
  M:     Christian Benvenuti <benve@cisco.com>
-M:     Vasanthy Kolluri <vkolluri@cisco.com>
  M:     Roopa Prabhu <roprabhu@cisco.com>
  M:     David Wang <dwang2@cisco.com>
  S:     Supported
@@ -1883,7 +1880,7 @@ S:        Maintained
  F:     drivers/connector/
  
  CONTROL GROUPS (CGROUPS)
-M:     Paul Menage <menage@google.com>
+M:     Paul Menage <paul@paulmenage.org>
  M:     Li Zefan <lizf@cn.fujitsu.com>
  L:     containers@lists.linux-foundation.org
  S:     Maintained
@@ -1932,7 +1929,7 @@ S:        Maintained
  F:     tools/power/cpupower
  
  CPUSETS
-M:     Paul Menage <menage@google.com>
+M:     Paul Menage <paul@paulmenage.org>
  W:     http://www.bullopensource.org/cpuset/
  W:     http://oss.sgi.com/projects/cpusets/
  S:     Supported
@@ -2649,11 +2646,11 @@ F:      drivers/net/wan/dlci.c
  F:     drivers/net/wan/sdla.c
  
  FRAMEBUFFER LAYER
-M:     Paul Mundt <lethal@linux-sh.org>
+M:     Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
  L:     linux-fbdev@vger.kernel.org
  W:     http://linux-fbdev.sourceforge.net/
  Q:     http://patchwork.kernel.org/project/linux-fbdev/list/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/lethal/fbdev-2.6.git
+T:     git git://github.com/schandinat/linux-2.6.git fbdev-next
  S:     Maintained
  F:     Documentation/fb/
  F:     Documentation/devicetree/bindings/fb/
@@ -3262,6 +3259,17 @@ F:       Documentation/input/multi-touch-protocol.txt
  F:     drivers/input/input-mt.c
  K:     \b(ABS|SYN)_MT_
  
+INTEL C600 SERIES SAS CONTROLLER DRIVER
+M:     Intel SCU Linux support <intel-linux-scu@intel.com>
+M:     Dan Williams <dan.j.williams@intel.com>
+M:     Dave Jiang <dave.jiang@intel.com>
+M:     Ed Nadolski <edmund.nadolski@intel.com>
+L:     linux-scsi@vger.kernel.org
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/djbw/isci.git
+S:     Maintained
+F:     drivers/scsi/isci/
+F:     firmware/isci/
+
  INTEL IDLE DRIVER
  M:     Len Brown <lenb@kernel.org>
  L:     linux-pm@lists.linux-foundation.org
@@ -3905,9 +3913,9 @@ F:        arch/powerpc/platforms/powermac/
  F:     drivers/macintosh/
  
  LINUX FOR POWERPC EMBEDDED MPC5XXX
-M:     Grant Likely <grant.likely@secretlab.ca>
+M:     Anatolij Gustschin <agust@denx.de>
  L:     linuxppc-dev@lists.ozlabs.org
-T:     git git://git.secretlab.ca/git/linux-2.6.git
+T:     git git://git.denx.de/linux-2.6-agust.git
  S:     Maintained
  F:     arch/powerpc/platforms/512x/
  F:     arch/powerpc/platforms/52xx/
@@ -4404,7 +4412,8 @@ L:        netfilter@vger.kernel.org
  L:     coreteam@netfilter.org
  W:     http://www.netfilter.org/
  W:     http://www.iptables.org/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-2.6.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-2.6.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next-2.6.git
  S:     Supported
  F:     include/linux/netfilter*
  F:     include/linux/netfilter/
@@ -4450,8 +4459,8 @@ M:        "David S. Miller" <davem@davemloft.net>
  L:     netdev@vger.kernel.org
  W:     http://www.linuxfoundation.org/en/Net
  W:     http://patchwork.ozlabs.org/project/netdev/list/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6.git
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git
  S:     Maintained
  F:     net/
  F:     include/net/
@@ -4774,7 +4783,7 @@ F:        drivers/net/wireless/orinoco/
  
  OSD LIBRARY and FILESYSTEM
  M:     Boaz Harrosh <bharrosh@panasas.com>
-M:     Benny Halevy <bhalevy@panasas.com>
+M:     Benny Halevy <bhalevy@tonian.com>
  L:     osd-dev@open-osd.org
  W:     http://open-osd.org
  T:     git git://git.open-osd.org/open-osd.git
@@ -4971,7 +4980,7 @@ M:        Paul Mackerras <paulus@samba.org>
  M:     Ingo Molnar <mingo@elte.hu>
  M:     Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
  S:     Supported
-F:     kernel/perf_event*.c
+F:     kernel/events/*
  F:     include/linux/perf_event.h
  F:     arch/*/kernel/perf_event*.c
  F:     arch/*/kernel/*/perf_event*.c
@@ -7088,7 +7097,7 @@ S:        Supported
  F:     drivers/mmc/host/vub300.c
  
  W1 DALLAS'S 1-WIRE BUS
-M:     Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+M:     Evgeniy Polyakov <zbr@ioremap.net>
  S:     Maintained
  F:     Documentation/w1/
  F:     drivers/w1/
@@ -7362,7 +7371,7 @@ THE REST
  M:     Linus Torvalds <torvalds@linux-foundation.org>
  L:     linux-kernel@vger.kernel.org
  Q:     http://patchwork.kernel.org/project/LKML/list/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
  S:     Buried alive in reporters
  F:     *
  F:     */
diff --git a/Makefile b/Makefile

index b4ca4e111c9a4bc3e4c1c03e35eb532b5c31da93..522fa4784e69a65353fa83494fa3c4f105989c03 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
  VERSION = 3
  PATCHLEVEL = 1
  SUBLEVEL = 0
-EXTRAVERSION = -rc1
-NAME = Sneaky Weasel
+EXTRAVERSION = -rc6
+NAME = "Divemaster Edition"
  
  # *DOCUMENTATION*
  # To see a list of typical targets execute "make help"
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig

index 60cde53d266c38da1848b01538698bc85a309453..8bb936226dee6bcc673cdefd623c8d08ca8eeac8 100644 (file)
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -51,7 +51,7 @@ config GENERIC_CMOS_UPDATE
          def_bool y
  
  config GENERIC_GPIO
-       def_bool y
+       bool
  
  config ZONE_DMA
         bool
diff --git a/arch/alpha/include/asm/sysinfo.h b/arch/alpha/include/asm/sysinfo.h

index 086aba284df25ba314e246eb2e975291b4191dfa..e77d77cd07b8b8bfc69277d32ae5782344c9320a 100644 (file)
--- a/arch/alpha/include/asm/sysinfo.h
+++ b/arch/alpha/include/asm/sysinfo.h
@@ -27,13 +27,4 @@
  #define UAC_NOFIX                      2
  #define UAC_SIGBUS                     4
  
-
-#ifdef __KERNEL__
-
-/* This is the shift that is applied to the UAC bits as stored in the
-   per-thread flags.  See thread_info.h.  */
-#define UAC_SHIFT                      6
-
-#endif
-
  #endif /* __ASM_ALPHA_SYSINFO_H */
diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h

index 6f32f9c84a2ddde12a85826a4339f70c857ee2f0..ff73db022342f789124b484618251e8e6235c1c6 100644 (file)
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -74,9 +74,9 @@ register struct thread_info *__current_thread_info __asm__("$8");
  #define TIF_NEED_RESCHED       3       /* rescheduling necessary */
  #define TIF_POLLING_NRFLAG     8       /* poll_idle is polling NEED_RESCHED */
  #define TIF_DIE_IF_KERNEL      9       /* dik recursion lock */
-#define TIF_UAC_NOPRINT                10      /* see sysinfo.h */
-#define TIF_UAC_NOFIX          11
-#define TIF_UAC_SIGBUS         12
+#define TIF_UAC_NOPRINT                10      /* ! Preserve sequence of following */
+#define TIF_UAC_NOFIX          11      /* ! flags as they match            */
+#define TIF_UAC_SIGBUS         12      /* ! userspace part of 'osf_sysinfo' */
  #define TIF_MEMDIE             13      /* is terminating due to OOM killer */
  #define TIF_RESTORE_SIGMASK    14      /* restore signal mask in do_signal */
  #define TIF_FREEZE             16      /* is freezing for suspend */
@@ -97,7 +97,7 @@ register struct thread_info *__current_thread_info __asm__("$8");
  #define _TIF_ALLWORK_MASK      (_TIF_WORK_MASK         \
                                  | _TIF_SYSCALL_TRACE)
  
-#define ALPHA_UAC_SHIFT                10
+#define ALPHA_UAC_SHIFT                TIF_UAC_NOPRINT
  #define ALPHA_UAC_MASK         (1 << TIF_UAC_NOPRINT | 1 << TIF_UAC_NOFIX | \
                                  1 << TIF_UAC_SIGBUS)
  
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c

index 326f0a2d56e52922920aad724dfec0e78487986a..01e8715e26d9306148a8706d046cf863015c1721 100644 (file)
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -42,6 +42,7 @@
  #include <asm/uaccess.h>
  #include <asm/system.h>
  #include <asm/sysinfo.h>
+#include <asm/thread_info.h>
  #include <asm/hwrpb.h>
  #include <asm/processor.h>
  
@@ -633,9 +634,10 @@ SYSCALL_DEFINE5(osf_getsysinfo, unsigned long, op, void __user *, buffer,
         case GSI_UACPROC:
                 if (nbytes < sizeof(unsigned int))
                         return -EINVAL;
-               w = (current_thread_info()->flags >> UAC_SHIFT) & UAC_BITMASK;
-               if (put_user(w, (unsigned int __user *)buffer))
-                       return -EFAULT;
+               w = (current_thread_info()->flags >> ALPHA_UAC_SHIFT) &
+                       UAC_BITMASK;
+               if (put_user(w, (unsigned int __user *)buffer))
+                       return -EFAULT;
                 return 1;
  
         case GSI_PROC_TYPE:
@@ -756,8 +758,8 @@ SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer,
                         case SSIN_UACPROC:
                         again:
                                 old = current_thread_info()->flags;
-                               new = old & ~(UAC_BITMASK << UAC_SHIFT);
-                               new = new | (w & UAC_BITMASK) << UAC_SHIFT;
+                               new = old & ~(UAC_BITMASK << ALPHA_UAC_SHIFT);
+                               new = new | (w & UAC_BITMASK) << ALPHA_UAC_SHIFT;
                                 if (cmpxchg(&current_thread_info()->flags,
                                             old, new) != old)
                                         goto again;
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S

index b9c28f3f19560340fdbc343d5ca90875ffc82661..6acea1f96de394d05ef0e13dbea2a29d85a7e0ee 100644 (file)
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -360,7 +360,7 @@ sys_call_table:
         .quad sys_newuname
         .quad sys_nanosleep                     /* 340 */
         .quad sys_mremap
-       .quad sys_nfsservctl
+       .quad sys_ni_syscall                    /* old nfsservctl */
         .quad sys_setresuid
         .quad sys_getresuid
         .quad sys_pciconfig_read                /* 345 */
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig

index 2c71a8f3535a1400c2d187e7fa612e00995e6927..3269576dbfa8dab12d6531996e2e86de2ca309c1 100644 (file)
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -195,8 +195,7 @@ config VECTORS_BASE
           The base address of exception vectors.
  
  config ARM_PATCH_PHYS_VIRT
-       bool "Patch physical to virtual translations at runtime (EXPERIMENTAL)"
-       depends on EXPERIMENTAL
+       bool "Patch physical to virtual translations at runtime"
         depends on !XIP_KERNEL && MMU
         depends on !ARCH_REALVIEW || !SPARSEMEM
         help
@@ -1272,6 +1271,18 @@ config ARM_ERRATA_754327
           This workaround defines cpu_relax() as smp_mb(), preventing correctly
           written polling loops from denying visibility of updates to memory.
  
+config ARM_ERRATA_364296
+       bool "ARM errata: Possible cache data corruption with hit-under-miss enabled"
+       depends on CPU_V6 && !SMP
+       help
+         This options enables the workaround for the 364296 ARM1136
+         r0p2 erratum (possible cache data corruption with
+         hit-under-miss enabled). It sets the undocumented bit 31 in
+         the auxiliary control register and the FI bit in the control
+         register, thus disabling hit-under-miss without putting the
+         processor into full low interrupt latency mode. ARM11MPCore
+         is not affected.
+
  endmenu
  
  source "arch/arm/common/Kconfig"
diff --git a/arch/arm/boot/compressed/mmcif-sh7372.c b/arch/arm/boot/compressed/mmcif-sh7372.c

index b6f61d9a5a1b5279bf8576267b5a788f9f1cfe40..672ae95db5c3177aedc1dfdb3825a490b31e9fc2 100644 (file)
--- a/arch/arm/boot/compressed/mmcif-sh7372.c
+++ b/arch/arm/boot/compressed/mmcif-sh7372.c
@@ -82,7 +82,7 @@ asmlinkage void mmc_loader(unsigned char *buf, unsigned long len)
  
  
         /* Disable clock to MMC hardware block */
-       __raw_writel(__raw_readl(SMSTPCR3) & (1 << 12), SMSTPCR3);
+       __raw_writel(__raw_readl(SMSTPCR3) | (1 << 12), SMSTPCR3);
  
         mmc_update_progress(MMC_PROGRESS_DONE);
  }
diff --git a/arch/arm/boot/compressed/sdhi-sh7372.c b/arch/arm/boot/compressed/sdhi-sh7372.c

index d403a8b24d7f322ad8b53c58ce047a06dc2bc0f8..d279294f238116fa4fbd3c71931149528516b483 100644 (file)
--- a/arch/arm/boot/compressed/sdhi-sh7372.c
+++ b/arch/arm/boot/compressed/sdhi-sh7372.c
@@ -85,7 +85,7 @@ asmlinkage void mmc_loader(unsigned short *buf, unsigned long len)
                 goto err;
  
          /* Disable clock to SDHI1 hardware block */
-        __raw_writel(__raw_readl(SMSTPCR3) & (1 << 13), SMSTPCR3);
+        __raw_writel(__raw_readl(SMSTPCR3) | (1 << 13), SMSTPCR3);
  
         mmc_update_progress(MMC_PROGRESS_DONE);
  
diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h

index 16bd48031583da44b7c0f25c68027fd787dc3fe1..99a6ed7e1bfd5ce6395ca7a900079ada255da281 100644 (file)
--- a/arch/arm/include/asm/hardware/cache-l2x0.h
+++ b/arch/arm/include/asm/hardware/cache-l2x0.h
@@ -45,8 +45,13 @@
  #define L2X0_CLEAN_INV_LINE_PA         0x7F0
  #define L2X0_CLEAN_INV_LINE_IDX                0x7F8
  #define L2X0_CLEAN_INV_WAY             0x7FC
-#define L2X0_LOCKDOWN_WAY_D            0x900
-#define L2X0_LOCKDOWN_WAY_I            0x904
+/*
+ * The lockdown registers repeat 8 times for L310, the L210 has only one
+ * D and one I lockdown register at 0x0900 and 0x0904.
+ */
+#define L2X0_LOCKDOWN_WAY_D_BASE       0x900
+#define L2X0_LOCKDOWN_WAY_I_BASE       0x904
+#define L2X0_LOCKDOWN_STRIDE           0x08
  #define L2X0_TEST_OPERATION            0xF00
  #define L2X0_LINE_DATA                 0xF10
  #define L2X0_LINE_TAG                  0xF30
@@ -64,7 +69,7 @@
  #define L2X0_AUX_CTRL_MASK                     0xc0000fff
  #define L2X0_AUX_CTRL_ASSOCIATIVITY_SHIFT      16
  #define L2X0_AUX_CTRL_WAY_SIZE_SHIFT           17
-#define L2X0_AUX_CTRL_WAY_SIZE_MASK            (0x3 << 17)
+#define L2X0_AUX_CTRL_WAY_SIZE_MASK            (0x7 << 17)
  #define L2X0_AUX_CTRL_SHARE_OVERRIDE_SHIFT     22
  #define L2X0_AUX_CTRL_NS_LOCKDOWN_SHIFT                26
  #define L2X0_AUX_CTRL_NS_INT_CTRL_SHIFT                27
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h

index 67c70a31a1be33c34ecdd271c343767342a83dfe..b7e82c4aced63ed7222df9a92c46810bc3094622 100644 (file)
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -41,7 +41,7 @@ struct arm_pmu_platdata {
   * encoded error on failure.
   */
  extern struct platform_device *
-reserve_pmu(enum arm_pmu_type device);
+reserve_pmu(enum arm_pmu_type type);
  
  /**
   * release_pmu() - Relinquish control of the performance counters
@@ -62,26 +62,26 @@ release_pmu(enum arm_pmu_type type);
   * the actual hardware initialisation.
   */
  extern int
-init_pmu(enum arm_pmu_type device);
+init_pmu(enum arm_pmu_type type);
  
  #else /* CONFIG_CPU_HAS_PMU */
  
  #include <linux/err.h>
  
  static inline struct platform_device *
-reserve_pmu(enum arm_pmu_type device)
+reserve_pmu(enum arm_pmu_type type)
  {
         return ERR_PTR(-ENODEV);
  }
  
  static inline int
-release_pmu(struct platform_device *pdev)
+release_pmu(enum arm_pmu_type type)
  {
         return -ENODEV;
  }
  
  static inline int
-init_pmu(enum arm_pmu_type device)
+init_pmu(enum arm_pmu_type type)
  {
         return -ENODEV;
  }
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S

index 80f7896cc0164ad6f74469f1e777b90c1f306443..9943e9e74a1bda0b17bc6e1ee93ca3ab2b80f970 100644 (file)
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -178,7 +178,7 @@
                 CALL(sys_ni_syscall)            /* vm86 */
                 CALL(sys_ni_syscall)            /* was sys_query_module */
                 CALL(sys_poll)
-               CALL(sys_nfsservctl)
+               CALL(sys_ni_syscall)            /* was nfsservctl */
  /* 170 */      CALL(sys_setresgid16)
                 CALL(sys_getresgid16)
                 CALL(sys_prctl)
diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S

index 7fa3bb0d2397a482989a6f82c6ab93ad674b59da..a08783823b32fdde6dd73d7022b042b64c321bca 100644 (file)
--- a/arch/arm/kernel/iwmmxt.S
+++ b/arch/arm/kernel/iwmmxt.S
@@ -195,10 +195,10 @@ ENTRY(iwmmxt_task_disable)
  
         @ enable access to CP0 and CP1
         XSC(mrc p15, 0, r4, c15, c1, 0)
-       XSC(orr r4, r4, #0xf)
+       XSC(orr r4, r4, #0x3)
         XSC(mcr p15, 0, r4, c15, c1, 0)
         PJ4(mrc p15, 0, r4, c1, c0, 2)
-       PJ4(orr r4, r4, #0x3)
+       PJ4(orr r4, r4, #0xf)
         PJ4(mcr p15, 0, r4, c1, c0, 2)
  
         mov     r0, #0                          @ nothing to load
@@ -313,7 +313,7 @@ ENTRY(iwmmxt_task_switch)
         teq     r2, r3                          @ next task owns it?
         movne   pc, lr                          @ no: leave Concan disabled
  
-1:     @ flip Conan access
+1:     @ flip Concan access
         XSC(eor r1, r1, #0x3)
         XSC(mcr p15, 0, r1, c15, c1, 0)
         PJ4(eor r1, r1, #0xf)
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c

index 05b377616fd54825c3d7c1316548a11c9c6f60d4..cc2020c2c70977a4e95eaadad130c688514682e9 100644 (file)
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -323,7 +323,11 @@ int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs,
  #endif
         s = find_mod_section(hdr, sechdrs, ".alt.smp.init");
         if (s && !is_smp())
+#ifdef CONFIG_SMP_ON_UP
                 fixup_smp((void *)s->sh_addr, s->sh_size);
+#else
+               return -EINVAL;
+#endif
         return 0;
  }
  
diff --git a/arch/arm/kernel/pmu.c b/arch/arm/kernel/pmu.c

index 2b70709376c3271e1007b3e6bd7d829db997375e..c53474fe84dfe1851df72fd7428fccf02ea33c4f 100644 (file)
--- a/arch/arm/kernel/pmu.c
+++ b/arch/arm/kernel/pmu.c
@@ -31,7 +31,7 @@ static int __devinit pmu_register(struct platform_device *pdev,
  {
         if (type < 0 || type >= ARM_NUM_PMU_DEVICES) {
                 pr_warning("received registration request for unknown "
-                               "device %d\n", type);
+                               "PMU device type %d\n", type);
                 return -EINVAL;
         }
  
@@ -112,17 +112,17 @@ static int __init register_pmu_driver(void)
  device_initcall(register_pmu_driver);
  
  struct platform_device *
-reserve_pmu(enum arm_pmu_type device)
+reserve_pmu(enum arm_pmu_type type)
  {
         struct platform_device *pdev;
  
-       if (test_and_set_bit_lock(device, &pmu_lock)) {
+       if (test_and_set_bit_lock(type, &pmu_lock)) {
                 pdev = ERR_PTR(-EBUSY);
-       } else if (pmu_devices[device] == NULL) {
-               clear_bit_unlock(device, &pmu_lock);
+       } else if (pmu_devices[type] == NULL) {
+               clear_bit_unlock(type, &pmu_lock);
                 pdev = ERR_PTR(-ENODEV);
         } else {
-               pdev = pmu_devices[device];
+               pdev = pmu_devices[type];
         }
  
         return pdev;
@@ -130,11 +130,11 @@ reserve_pmu(enum arm_pmu_type device)
  EXPORT_SYMBOL_GPL(reserve_pmu);
  
  int
-release_pmu(enum arm_pmu_type device)
+release_pmu(enum arm_pmu_type type)
  {
-       if (WARN_ON(!pmu_devices[device]))
+       if (WARN_ON(!pmu_devices[type]))
                 return -EINVAL;
-       clear_bit_unlock(device, &pmu_lock);
+       clear_bit_unlock(type, &pmu_lock);
         return 0;
  }
  EXPORT_SYMBOL_GPL(release_pmu);
@@ -182,17 +182,17 @@ init_cpu_pmu(void)
  }
  
  int
-init_pmu(enum arm_pmu_type device)
+init_pmu(enum arm_pmu_type type)
  {
         int err = 0;
  
-       switch (device) {
+       switch (type) {
         case ARM_PMU_DEVICE_CPU:
                 err = init_cpu_pmu();
                 break;
         default:
-               pr_warning("attempt to initialise unknown device %d\n",
-                               device);
+               pr_warning("attempt to initialise PMU of unknown "
+                          "type %d\n", type);
                 err = -EINVAL;
         }
  
diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S

index 9cf4cbf8f95b8ca9eef1baf1d8f49054999304f8..d0cdedf4864dc52092355e105f3ba04bba5c5704 100644 (file)
--- a/arch/arm/kernel/relocate_kernel.S
+++ b/arch/arm/kernel/relocate_kernel.S
@@ -57,7 +57,8 @@ relocate_new_kernel:
         mov r0,#0
         ldr r1,kexec_mach_type
         ldr r2,kexec_boot_atags
-       mov pc,lr
+ ARM(  mov pc, lr      )
+ THUMB(        bx lr           )
  
         .align
  
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c

index 70bca649e9250d8a517348c7536e1fd7e782f942..e514c76043b4d2f5af4638f44b889bb2737dc5ad 100644 (file)
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -280,18 +280,19 @@ static void __init cacheid_init(void)
         if (arch >= CPU_ARCH_ARMv6) {
                 if ((cachetype & (7 << 29)) == 4 << 29) {
                         /* ARMv7 register format */
+                       arch = CPU_ARCH_ARMv7;
                         cacheid = CACHEID_VIPT_NONALIASING;
                         if ((cachetype & (3 << 14)) == 1 << 14)
                                 cacheid |= CACHEID_ASID_TAGGED;
-                       else if (cpu_has_aliasing_icache(CPU_ARCH_ARMv7))
-                               cacheid |= CACHEID_VIPT_I_ALIASING;
-               } else if (cachetype & (1 << 23)) {
-                       cacheid = CACHEID_VIPT_ALIASING;
                 } else {
-                       cacheid = CACHEID_VIPT_NONALIASING;
-                       if (cpu_has_aliasing_icache(CPU_ARCH_ARMv6))
-                               cacheid |= CACHEID_VIPT_I_ALIASING;
+                       arch = CPU_ARCH_ARMv6;
+                       if (cachetype & (1 << 23))
+                               cacheid = CACHEID_VIPT_ALIASING;
+                       else
+                               cacheid = CACHEID_VIPT_NONALIASING;
                 }
+               if (cpu_has_aliasing_icache(arch))
+                       cacheid |= CACHEID_VIPT_I_ALIASING;
         } else {
                 cacheid = CACHEID_VIVT;
         }
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c

index 2c277d40cee681ccc496239b3ab6724259a35756..01c186222f3b9d132c670bdba6721a7b3b8f50ce 100644 (file)
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -137,8 +137,8 @@ void __cpuinit twd_timer_setup(struct clock_event_device *clk)
         clk->max_delta_ns = clockevent_delta2ns(0xffffffff, clk);
         clk->min_delta_ns = clockevent_delta2ns(0xf, clk);
  
+       clockevents_register_device(clk);
+
         /* Make sure our local interrupt controller has this enabled */
         gic_enable_ppi(clk->irq);
-
-       clockevents_register_device(clk);
  }
diff --git a/arch/arm/mach-at91/at91sam9261.c b/arch/arm/mach-at91/at91sam9261.c

index d522b47e30b5a82322da83f6fed63d7d110dab5a..6c8e3b5f669ff7f7a3a885c43b8c1c7710ad2954 100644 (file)
--- a/arch/arm/mach-at91/at91sam9261.c
+++ b/arch/arm/mach-at91/at91sam9261.c
@@ -157,7 +157,7 @@ static struct clk_lookup periph_clocks_lookups[] = {
         CLKDEV_CON_DEV_ID("spi_clk", "atmel_spi.1", &spi1_clk),
         CLKDEV_CON_DEV_ID("t0_clk", "atmel_tcb.0", &tc0_clk),
         CLKDEV_CON_DEV_ID("t1_clk", "atmel_tcb.0", &tc1_clk),
-       CLKDEV_CON_DEV_ID("t2_clk", "atmel_tcb.0", &tc1_clk),
+       CLKDEV_CON_DEV_ID("t2_clk", "atmel_tcb.0", &tc2_clk),
         CLKDEV_CON_DEV_ID("pclk", "ssc.0", &ssc0_clk),
         CLKDEV_CON_DEV_ID("pclk", "ssc.1", &ssc1_clk),
         CLKDEV_CON_DEV_ID("pclk", "ssc.2", &ssc2_clk),
diff --git a/arch/arm/mach-cns3xxx/include/mach/entry-macro.S b/arch/arm/mach-cns3xxx/include/mach/entry-macro.S

index 6bd83ed90afe33523c047b7bb22f8cf63c4b94a0..d87bfc397d39859d01817c016e8c8b4aceab72d3 100644 (file)
--- a/arch/arm/mach-cns3xxx/include/mach/entry-macro.S
+++ b/arch/arm/mach-cns3xxx/include/mach/entry-macro.S
@@ -8,7 +8,6 @@
   * published by the Free Software Foundation.
   */
  
-#include <mach/hardware.h>
  #include <asm/hardware/entry-macro-gic.S>
  
                 .macro  disable_fiq
diff --git a/arch/arm/mach-cns3xxx/include/mach/system.h b/arch/arm/mach-cns3xxx/include/mach/system.h

index 58bb03ae3cf43fcbf2c7191ec12099eaddcc19f9..4f16c9b79f784e970e1e467b8dafbeb7f7feef61 100644 (file)
--- a/arch/arm/mach-cns3xxx/include/mach/system.h
+++ b/arch/arm/mach-cns3xxx/include/mach/system.h
@@ -13,7 +13,6 @@
  
  #include <linux/io.h>
  #include <asm/proc-fns.h>
-#include <mach/hardware.h>
  
  static inline void arch_idle(void)
  {
diff --git a/arch/arm/mach-cns3xxx/include/mach/uncompress.h b/arch/arm/mach-cns3xxx/include/mach/uncompress.h

index de8ead9b91f77086b100f9705dbca0a2f53645a1..a91b6058ab4fc044395f04d93fb1433db9b4cb2a 100644 (file)
--- a/arch/arm/mach-cns3xxx/include/mach/uncompress.h
+++ b/arch/arm/mach-cns3xxx/include/mach/uncompress.h
@@ -8,7 +8,6 @@
   */
  
  #include <asm/mach-types.h>
-#include <mach/hardware.h>
  #include <mach/cns3xxx.h>
  
  #define AMBA_UART_DR(base)     (*(volatile unsigned char *)((base) + 0x00))
diff --git a/arch/arm/mach-cns3xxx/pcie.c b/arch/arm/mach-cns3xxx/pcie.c

index 06fd25d70aec8064912eb1b70d82678b4b891f6d..0f8fca48a5eddd3a3c03421861b0d7b8fe6d1c1f 100644 (file)
--- a/arch/arm/mach-cns3xxx/pcie.c
+++ b/arch/arm/mach-cns3xxx/pcie.c
@@ -49,7 +49,7 @@ static struct cns3xxx_pcie *sysdata_to_cnspci(void *sysdata)
         return &cns3xxx_pcie[root->domain];
  }
  
-static struct cns3xxx_pcie *pdev_to_cnspci(struct pci_dev *dev)
+static struct cns3xxx_pcie *pdev_to_cnspci(const struct pci_dev *dev)
  {
         return sysdata_to_cnspci(dev->sysdata);
  }
diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c

index bd5394537c88d8f305c9bda5a00cf57214263c12..008d51407cd71dbe5d3ad5c8cce4d3f27a85aa76 100644 (file)
--- a/arch/arm/mach-davinci/board-da850-evm.c
+++ b/arch/arm/mach-davinci/board-da850-evm.c
@@ -115,6 +115,32 @@ static struct spi_board_info da850evm_spi_info[] = {
         },
  };
  
+#ifdef CONFIG_MTD
+static void da850_evm_m25p80_notify_add(struct mtd_info *mtd)
+{
+       char *mac_addr = davinci_soc_info.emac_pdata->mac_addr;
+       size_t retlen;
+
+       if (!strcmp(mtd->name, "MAC-Address")) {
+               mtd->read(mtd, 0, ETH_ALEN, &retlen, mac_addr);
+               if (retlen == ETH_ALEN)
+                       pr_info("Read MAC addr from SPI Flash: %pM\n",
+                               mac_addr);
+       }
+}
+
+static struct mtd_notifier da850evm_spi_notifier = {
+       .add    = da850_evm_m25p80_notify_add,
+};
+
+static void da850_evm_setup_mac_addr(void)
+{
+       register_mtd_user(&da850evm_spi_notifier);
+}
+#else
+static void da850_evm_setup_mac_addr(void) { }
+#endif
+
  static struct mtd_partition da850_evm_norflash_partition[] = {
         {
                 .name           = "bootloaders + env",
@@ -1244,6 +1270,8 @@ static __init void da850_evm_init(void)
         if (ret)
                 pr_warning("da850_evm_init: sata registration failed: %d\n",
                                 ret);
+
+       da850_evm_setup_mac_addr();
  }
  
  #ifdef CONFIG_SERIAL_8250_CONSOLE
diff --git a/arch/arm/mach-davinci/include/mach/psc.h b/arch/arm/mach-davinci/include/mach/psc.h

index 47fd0bc3d3e7bd03fafd8102ec93d3db343ee220..fa59c097223dc85ec965224e9ebfc0ac8dccf2c2 100644 (file)
--- a/arch/arm/mach-davinci/include/mach/psc.h
+++ b/arch/arm/mach-davinci/include/mach/psc.h
@@ -243,7 +243,7 @@
  #define PSC_STATE_DISABLE      2
  #define PSC_STATE_ENABLE       3
  
-#define MDSTAT_STATE_MASK      0x1f
+#define MDSTAT_STATE_MASK      0x3f
  #define MDCTL_FORCE            BIT(31)
  
  #ifndef __ASSEMBLER__
diff --git a/arch/arm/mach-davinci/sleep.S b/arch/arm/mach-davinci/sleep.S

index fb5e72b532b00bba9ecfe0976c440d9ea825b6c8..5f1e045a3ad1a6e6ffc05d23fb95ff835e97f1b6 100644 (file)
--- a/arch/arm/mach-davinci/sleep.S
+++ b/arch/arm/mach-davinci/sleep.S
@@ -217,7 +217,11 @@ ddr2clk_stop_done:
  ENDPROC(davinci_ddr_psc_config)
  
  CACHE_FLUSH:
-       .word   arm926_flush_kern_cache_all
+#ifdef CONFIG_CPU_V6
+       .word   v6_flush_kern_cache_all
+#else
+       .word   arm926_flush_kern_cache_all
+#endif
  
  ENTRY(davinci_cpu_suspend_sz)
         .word   . - davinci_cpu_suspend
diff --git a/arch/arm/mach-ep93xx/include/mach/ts72xx.h b/arch/arm/mach-ep93xx/include/mach/ts72xx.h

index 0eabec62cd9df5d97a747978003dde74990c8ed0..f1397a13e76b87b563b4343c848a023f57e71f09 100644 (file)
--- a/arch/arm/mach-ep93xx/include/mach/ts72xx.h
+++ b/arch/arm/mach-ep93xx/include/mach/ts72xx.h
@@ -6,7 +6,7 @@
   * TS72xx memory map:
   *
   * virt                phys            size
- * febff000    22000000        4K      model number register
+ * febff000    22000000        4K      model number register (bits 0-2)
   * febfe000    22400000        4K      options register
   * febfd000    22800000        4K      options register #2
   * febf9000    10800000        4K      TS-5620 RTC index register
@@ -20,6 +20,9 @@
  #define TS72XX_MODEL_TS7200            0x00
  #define TS72XX_MODEL_TS7250            0x01
  #define TS72XX_MODEL_TS7260            0x02
+#define TS72XX_MODEL_TS7300            0x03
+#define TS72XX_MODEL_TS7400            0x04
+#define TS72XX_MODEL_MASK              0x07
  
  
  #define TS72XX_OPTIONS_PHYS_BASE       0x22400000
@@ -51,19 +54,34 @@
  
  #ifndef __ASSEMBLY__
  
+static inline int ts72xx_model(void)
+{
+       return __raw_readb(TS72XX_MODEL_VIRT_BASE) & TS72XX_MODEL_MASK;
+}
+
  static inline int board_is_ts7200(void)
  {
-       return __raw_readb(TS72XX_MODEL_VIRT_BASE) == TS72XX_MODEL_TS7200;
+       return ts72xx_model() == TS72XX_MODEL_TS7200;
  }
  
  static inline int board_is_ts7250(void)
  {
-       return __raw_readb(TS72XX_MODEL_VIRT_BASE) == TS72XX_MODEL_TS7250;
+       return ts72xx_model() == TS72XX_MODEL_TS7250;
  }
  
  static inline int board_is_ts7260(void)
  {
-       return __raw_readb(TS72XX_MODEL_VIRT_BASE) == TS72XX_MODEL_TS7260;
+       return ts72xx_model() == TS72XX_MODEL_TS7260;
+}
+
+static inline int board_is_ts7300(void)
+{
+       return ts72xx_model()  == TS72XX_MODEL_TS7300;
+}
+
+static inline int board_is_ts7400(void)
+{
+       return ts72xx_model() == TS72XX_MODEL_TS7400;
  }
  
  static inline int is_max197_installed(void)
diff --git a/arch/arm/mach-exynos4/clock.c b/arch/arm/mach-exynos4/clock.c

index 851dea018578db02d58ee39b0d39b0f09f652fb0..1561b036a9bf2fd3ac9c4221df04e5117df6d3f6 100644 (file)
--- a/arch/arm/mach-exynos4/clock.c
+++ b/arch/arm/mach-exynos4/clock.c
@@ -520,7 +520,7 @@ static struct clk init_clocks_off[] = {
                 .ctrlbit        = (1 << 21),
         }, {
                 .name           = "ac97",
-               .id             = -1,
+               .devname        = "samsung-ac97",
                 .enable         = exynos4_clk_ip_peril_ctrl,
                 .ctrlbit        = (1 << 27),
         }, {
diff --git a/arch/arm/mach-exynos4/cpu.c b/arch/arm/mach-exynos4/cpu.c

index 2d8a40c9e6e5a69c49cf48f642bd38aa7bff30f5..746d6fc6d397dc23dffd93ea3764c80fd0db3bb0 100644 (file)
--- a/arch/arm/mach-exynos4/cpu.c
+++ b/arch/arm/mach-exynos4/cpu.c
@@ -24,12 +24,13 @@
  #include <plat/exynos4.h>
  #include <plat/adc-core.h>
  #include <plat/sdhci.h>
-#include <plat/devs.h>
  #include <plat/fb-core.h>
  #include <plat/fimc-core.h>
  #include <plat/iic-core.h>
+#include <plat/reset.h>
  
  #include <mach/regs-irq.h>
+#include <mach/regs-pmu.h>
  
  extern int combiner_init(unsigned int combiner_nr, void __iomem *base,
                          unsigned int irq_start);
@@ -128,6 +129,11 @@ static void exynos4_idle(void)
         local_irq_enable();
  }
  
+static void exynos4_sw_reset(void)
+{
+       __raw_writel(0x1, S5P_SWRESET);
+}
+
  /*
   * exynos4_map_io
   *
@@ -241,5 +247,8 @@ int __init exynos4_init(void)
         /* set idle function */
         pm_idle = exynos4_idle;
  
+       /* set sw_reset function */
+       s5p_reset_hook = exynos4_sw_reset;
+
         return sysdev_register(&exynos4_sysdev);
  }
diff --git a/arch/arm/mach-exynos4/include/mach/irqs.h b/arch/arm/mach-exynos4/include/mach/irqs.h

index 934d2a493982ca7a3951540b2ffbaf1bca25219f..f8952f8f3757760029e7d95d077ed99cd6e119f4 100644 (file)
--- a/arch/arm/mach-exynos4/include/mach/irqs.h
+++ b/arch/arm/mach-exynos4/include/mach/irqs.h
@@ -80,9 +80,8 @@
  #define IRQ_HSMMC3             IRQ_SPI(76)
  #define IRQ_DWMCI              IRQ_SPI(77)
  
-#define IRQ_MIPICSI0           IRQ_SPI(78)
-
-#define IRQ_MIPICSI1           IRQ_SPI(80)
+#define IRQ_MIPI_CSIS0         IRQ_SPI(78)
+#define IRQ_MIPI_CSIS1         IRQ_SPI(80)
  
  #define IRQ_ONENAND_AUDI       IRQ_SPI(82)
  #define IRQ_ROTATOR            IRQ_SPI(83)
diff --git a/arch/arm/mach-exynos4/include/mach/regs-pmu.h b/arch/arm/mach-exynos4/include/mach/regs-pmu.h

index fa49bbb8e7b01c2e1cf49497af543c205b432d75..cdf9b47c303cea185e668f7c52e1c28cb5dac4fd 100644 (file)
--- a/arch/arm/mach-exynos4/include/mach/regs-pmu.h
+++ b/arch/arm/mach-exynos4/include/mach/regs-pmu.h
@@ -29,6 +29,8 @@
  #define S5P_USE_STANDBY_WFE1                   (1 << 25)
  #define S5P_USE_MASK                           ((0x3 << 16) | (0x3 << 24))
  
+#define S5P_SWRESET                            S5P_PMUREG(0x0400)
+
  #define S5P_WAKEUP_STAT                                S5P_PMUREG(0x0600)
  #define S5P_EINT_WAKEUP_MASK                   S5P_PMUREG(0x0604)
  #define S5P_WAKEUP_MASK                                S5P_PMUREG(0x0608)
diff --git a/arch/arm/mach-exynos4/irq-eint.c b/arch/arm/mach-exynos4/irq-eint.c

index 9d87d2ac7f687303547a57aece1fd19f10d91384..badb8c66fc9bc907c6950806f2e9f63357dc06f8 100644 (file)
--- a/arch/arm/mach-exynos4/irq-eint.c
+++ b/arch/arm/mach-exynos4/irq-eint.c
@@ -23,6 +23,8 @@
  
  #include <mach/regs-gpio.h>
  
+#include <asm/mach/irq.h>
+
  static DEFINE_SPINLOCK(eint_lock);
  
  static unsigned int eint0_15_data[16];
@@ -184,8 +186,11 @@ static inline void exynos4_irq_demux_eint(unsigned int start)
  
  static void exynos4_irq_demux_eint16_31(unsigned int irq, struct irq_desc *desc)
  {
+       struct irq_chip *chip = irq_get_chip(irq);
+       chained_irq_enter(chip, desc);
         exynos4_irq_demux_eint(IRQ_EINT(16));
         exynos4_irq_demux_eint(IRQ_EINT(24));
+       chained_irq_exit(chip, desc);
  }
  
  static void exynos4_irq_eint0_15(unsigned int irq, struct irq_desc *desc)
@@ -193,6 +198,7 @@ static void exynos4_irq_eint0_15(unsigned int irq, struct irq_desc *desc)
         u32 *irq_data = irq_get_handler_data(irq);
         struct irq_chip *chip = irq_get_chip(irq);
  
+       chained_irq_enter(chip, desc);
         chip->irq_mask(&desc->irq_data);
  
         if (chip->irq_ack)
@@ -201,6 +207,7 @@ static void exynos4_irq_eint0_15(unsigned int irq, struct irq_desc *desc)
         generic_handle_irq(*irq_data);
  
         chip->irq_unmask(&desc->irq_data);
+       chained_irq_exit(chip, desc);
  }
  
  int __init exynos4_init_irq_eint(void)
diff --git a/arch/arm/mach-exynos4/mach-universal_c210.c b/arch/arm/mach-exynos4/mach-universal_c210.c

index 0e280d12301ebe542bcb170661c7bc2097b67fca..b3b5d8911004422fdb6cc9001ecb8ab1ae4df475 100644 (file)
--- a/arch/arm/mach-exynos4/mach-universal_c210.c
+++ b/arch/arm/mach-exynos4/mach-universal_c210.c
@@ -79,7 +79,7 @@ static struct s3c2410_uartcfg universal_uartcfgs[] __initdata = {
  };
  
  static struct regulator_consumer_supply max8952_consumer =
-       REGULATOR_SUPPLY("vddarm", NULL);
+       REGULATOR_SUPPLY("vdd_arm", NULL);
  
  static struct max8952_platform_data universal_max8952_pdata __initdata = {
         .gpio_vid0      = EXYNOS4_GPX0(3),
@@ -105,7 +105,7 @@ static struct max8952_platform_data universal_max8952_pdata __initdata = {
  };
  
  static struct regulator_consumer_supply lp3974_buck1_consumer =
-       REGULATOR_SUPPLY("vddint", NULL);
+       REGULATOR_SUPPLY("vdd_int", NULL);
  
  static struct regulator_consumer_supply lp3974_buck2_consumer =
         REGULATOR_SUPPLY("vddg3d", NULL);
diff --git a/arch/arm/mach-exynos4/setup-usb-phy.c b/arch/arm/mach-exynos4/setup-usb-phy.c

index 0883c1b824b94df5cdca0f7cabf76812fb734df0..39aca045f6604ae1b7c4125ee85646ef34df95f0 100644 (file)
--- a/arch/arm/mach-exynos4/setup-usb-phy.c
+++ b/arch/arm/mach-exynos4/setup-usb-phy.c
@@ -82,7 +82,7 @@ static int exynos4_usb_phy1_init(struct platform_device *pdev)
  
         rstcon &= ~(HOST_LINK_PORT_SWRST_MASK | PHY1_SWRST_MASK);
         writel(rstcon, EXYNOS4_RSTCON);
-       udelay(50);
+       udelay(80);
  
         clk_disable(otg_clk);
         clk_put(otg_clk);
diff --git a/arch/arm/mach-footbridge/Kconfig b/arch/arm/mach-footbridge/Kconfig

index dc26fff22cf062df3753eb4d0b46360334116dc1..c8e7afcf14ec142b38d7a1558f98661936006477 100644 (file)
--- a/arch/arm/mach-footbridge/Kconfig
+++ b/arch/arm/mach-footbridge/Kconfig
@@ -62,6 +62,7 @@ config ARCH_EBSA285_HOST
  config ARCH_NETWINDER
         bool "NetWinder"
         select CLKSRC_I8253
+       select CLKEVT_I8253
         select FOOTBRIDGE_HOST
         select ISA
         select ISA_DMA
diff --git a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c

index 1331fff51ae25352803b094a1a678b5a1ed9400e..18c32a5541d9e47c69b5ae38cee8d56ef8eaa333 100644 (file)
--- a/arch/arm/mach-footbridge/dc21285.c
+++ b/arch/arm/mach-footbridge/dc21285.c
@@ -18,6 +18,7 @@
  #include <linux/irq.h>
  #include <linux/io.h>
  #include <linux/spinlock.h>
+#include <video/vga.h>
  
  #include <asm/irq.h>
  #include <asm/system.h>
diff --git a/arch/arm/mach-imx/clock-imx25.c b/arch/arm/mach-imx/clock-imx25.c

index 0fc7ba56d6165862ae44817adbd0df9e701a2e32..e63e23504fe52d88a65f88061ee4a7aa1ac711d3 100644 (file)
--- a/arch/arm/mach-imx/clock-imx25.c
+++ b/arch/arm/mach-imx/clock-imx25.c
@@ -331,6 +331,9 @@ int __init mx25_clocks_init(void)
         __raw_writel(__raw_readl(CRM_BASE+0x64) | (1 << 7) | (1 << 0),
                         CRM_BASE + 0x64);
  
+       /* Clock source for gpt is ahb_div */
+       __raw_writel(__raw_readl(CRM_BASE+0x64) & ~(1 << 5), CRM_BASE + 0x64);
+
         mxc_timer_init(&gpt_clk, MX25_IO_ADDRESS(MX25_GPT1_BASE_ADDR), 54);
  
         return 0;
diff --git a/arch/arm/mach-imx/mach-cpuimx27.c b/arch/arm/mach-imx/mach-cpuimx27.c

index 87887ac5806b43612c8bba200e3f57df83be0c24..f851fe903687dc09c45c51228dcb06d3606679a0 100644 (file)
--- a/arch/arm/mach-imx/mach-cpuimx27.c
+++ b/arch/arm/mach-imx/mach-cpuimx27.c
@@ -310,7 +310,7 @@ static struct sys_timer eukrea_cpuimx27_timer = {
         .init = eukrea_cpuimx27_timer_init,
  };
  
-MACHINE_START(CPUIMX27, "EUKREA CPUIMX27")
+MACHINE_START(EUKREA_CPUIMX27, "EUKREA CPUIMX27")
         .boot_params = MX27_PHYS_OFFSET + 0x100,
         .map_io = mx27_map_io,
         .init_early = imx27_init_early,
diff --git a/arch/arm/mach-imx/mach-cpuimx35.c b/arch/arm/mach-imx/mach-cpuimx35.c

index f39a478ba1a63540a26783a4713033b37121d4f3..4bd083ba9af218077aae1381b5fbef716aa625e6 100644 (file)
--- a/arch/arm/mach-imx/mach-cpuimx35.c
+++ b/arch/arm/mach-imx/mach-cpuimx35.c
@@ -192,7 +192,7 @@ struct sys_timer eukrea_cpuimx35_timer = {
         .init   = eukrea_cpuimx35_timer_init,
  };
  
-MACHINE_START(EUKREA_CPUIMX35, "Eukrea CPUIMX35")
+MACHINE_START(EUKREA_CPUIMX35SD, "Eukrea CPUIMX35")
         /* Maintainer: Eukrea Electromatique */
         .boot_params = MX3x_PHYS_OFFSET + 0x100,
         .map_io = mx35_map_io,
diff --git a/arch/arm/mach-imx/mach-eukrea_cpuimx25.c b/arch/arm/mach-imx/mach-eukrea_cpuimx25.c

index da36da52969db2bc82e5da006e7085df21a3482a..2442d5da883d075259d8add3e927c9ad270b9314 100644 (file)
--- a/arch/arm/mach-imx/mach-eukrea_cpuimx25.c
+++ b/arch/arm/mach-imx/mach-eukrea_cpuimx25.c
@@ -161,7 +161,7 @@ static struct sys_timer eukrea_cpuimx25_timer = {
         .init   = eukrea_cpuimx25_timer_init,
  };
  
-MACHINE_START(EUKREA_CPUIMX25, "Eukrea CPUIMX25")
+MACHINE_START(EUKREA_CPUIMX25SD, "Eukrea CPUIMX25")
         /* Maintainer: Eukrea Electromatique */
         .boot_params = MX25_PHYS_OFFSET + 0x100,
         .map_io = mx25_map_io,
diff --git a/arch/arm/mach-imx/mach-imx27_visstrim_m10.c b/arch/arm/mach-imx/mach-imx27_visstrim_m10.c

index 6707de0ab71671ca07098f279aec78e8522305dc..6778f8193bc66cfbf4deffde745e6209654580c2 100644 (file)
--- a/arch/arm/mach-imx/mach-imx27_visstrim_m10.c
+++ b/arch/arm/mach-imx/mach-imx27_visstrim_m10.c
@@ -30,6 +30,7 @@
  #include <linux/input.h>
  #include <linux/gpio.h>
  #include <linux/delay.h>
+#include <sound/tlv320aic32x4.h>
  #include <asm/mach-types.h>
  #include <asm/mach/arch.h>
  #include <asm/mach/time.h>
@@ -196,6 +197,17 @@ static struct pca953x_platform_data visstrim_m10_pca9555_pdata = {
         .invert = 0,
  };
  
+static struct aic32x4_pdata visstrim_m10_aic32x4_pdata = {
+       .power_cfg = AIC32X4_PWR_MICBIAS_2075_LDOIN |
+                    AIC32X4_PWR_AVDD_DVDD_WEAK_DISABLE |
+                    AIC32X4_PWR_AIC32X4_LDO_ENABLE |
+                    AIC32X4_PWR_CMMODE_LDOIN_RANGE_18_36 |
+                    AIC32X4_PWR_CMMODE_HP_LDOIN_POWERED,
+       .micpga_routing = AIC32X4_MICPGA_ROUTE_LMIC_IN2R_10K |
+                        AIC32X4_MICPGA_ROUTE_RMIC_IN1L_10K,
+       .swapdacs = false,
+};
+
  static struct i2c_board_info visstrim_m10_i2c_devices[] = {
         {
                 I2C_BOARD_INFO("pca9555", 0x20),
@@ -203,6 +215,7 @@ static struct i2c_board_info visstrim_m10_i2c_devices[] = {
         },
         {
                 I2C_BOARD_INFO("tlv320aic32x4", 0x18),
+               .platform_data = &visstrim_m10_aic32x4_pdata,
         }
  };
  
diff --git a/arch/arm/mach-imx/mach-mx31ads.c b/arch/arm/mach-imx/mach-mx31ads.c

index 0ce49478a479783947ee6547b3eab34e86fd04be..29ca8907a78034f32cd7961a6040138e7356e2c2 100644 (file)
--- a/arch/arm/mach-imx/mach-mx31ads.c
+++ b/arch/arm/mach-imx/mach-mx31ads.c
@@ -468,7 +468,7 @@ static struct i2c_board_info __initdata mx31ads_i2c1_devices[] = {
  #endif
  };
  
-static void mxc_init_i2c(void)
+static void __init mxc_init_i2c(void)
  {
         i2c_register_board_info(1, mx31ads_i2c1_devices,
                                 ARRAY_SIZE(mx31ads_i2c1_devices));
@@ -486,7 +486,7 @@ static unsigned int ssi_pins[] = {
         MX31_PIN_STXD5__STXD5,
  };
  
-static void mxc_init_audio(void)
+static void __init mxc_init_audio(void)
  {
         imx31_add_imx_ssi(0, NULL);
         mxc_iomux_setup_multiple_pins(ssi_pins, ARRAY_SIZE(ssi_pins), "ssi");
diff --git a/arch/arm/mach-imx/mach-mx31lilly.c b/arch/arm/mach-imx/mach-mx31lilly.c

index 750368ddf0f977479ee301933d31832ff5434e65..126913ad106ab092c1455b58f4c006aca8325493 100644 (file)
--- a/arch/arm/mach-imx/mach-mx31lilly.c
+++ b/arch/arm/mach-imx/mach-mx31lilly.c
@@ -192,7 +192,7 @@ static struct mxc_usbh_platform_data usbh2_pdata __initdata = {
         .portsc = MXC_EHCI_MODE_ULPI | MXC_EHCI_UTMI_8BIT,
  };
  
-static void lilly1131_usb_init(void)
+static void __init lilly1131_usb_init(void)
  {
         imx31_add_mxc_ehci_hs(1, &usbh1_pdata);
  
diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c

index 2fbbdd5eac352bb4df44611894e4220e029f7903..fcf0ae95651fd3c0fa3ce9f17f3c4b7d3cc55d15 100644 (file)
--- a/arch/arm/mach-integrator/integrator_ap.c
+++ b/arch/arm/mach-integrator/integrator_ap.c
@@ -337,15 +337,15 @@ static unsigned long timer_reload;
  static void integrator_clocksource_init(u32 khz)
  {
         void __iomem *base = (void __iomem *)TIMER2_VA_BASE;
-       u32 ctrl = TIMER_CTRL_ENABLE;
+       u32 ctrl = TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC;
  
         if (khz >= 1500) {
                 khz /= 16;
-               ctrl = TIMER_CTRL_DIV16;
+               ctrl |= TIMER_CTRL_DIV16;
         }
  
-       writel(ctrl, base + TIMER_CTRL);
         writel(0xffff, base + TIMER_LOAD);
+       writel(ctrl, base + TIMER_CTRL);
  
         clocksource_mmio_init(base + TIMER_VALUE, "timer2",
                 khz * 1000, 200, 16, clocksource_mmio_readl_down);
diff --git a/arch/arm/mach-mmp/gplugd.c b/arch/arm/mach-mmp/gplugd.c

index c070c24255f43562c1d434c1ee35d33951e43f0c..98e25d9aaab61ea9c4865d6130fafa0e35d8134b 100644 (file)
--- a/arch/arm/mach-mmp/gplugd.c
+++ b/arch/arm/mach-mmp/gplugd.c
@@ -16,16 +16,18 @@
  #include <mach/gpio.h>
  #include <mach/pxa168.h>
  #include <mach/mfp-pxa168.h>
-#include <mach/mfp-gplugd.h>
  
  #include "common.h"
  
  static unsigned long gplugd_pin_config[] __initdata = {
         /* UART3 */
-       GPIO8_UART3_SOUT,
-       GPIO9_UART3_SIN,
-       GPI1O_UART3_CTS,
-       GPI11_UART3_RTS,
+       GPIO8_UART3_TXD,
+       GPIO9_UART3_RXD,
+       GPIO1O_UART3_CTS,
+       GPIO11_UART3_RTS,
+
+       /* USB OTG PEN */
+       GPIO18_GPIO,
  
         /* MMC2 */
         GPIO28_MMC2_CMD,
@@ -109,6 +111,12 @@ static unsigned long gplugd_pin_config[] __initdata = {
         GPIO105_CI2C_SDA,
         GPIO106_CI2C_SCL,
  
+       /* SPI NOR Flash on SSP2 */
+       GPIO107_SSP2_RXD,
+       GPIO108_SSP2_TXD,
+       GPIO110_GPIO,     /* SPI_CSn */
+       GPIO111_SSP2_CLK,
+
         /* Select JTAG */
         GPIO109_GPIO,
  
@@ -154,7 +162,7 @@ static void __init select_disp_freq(void)
                                 "frequency\n");
         } else {
                 gpio_direction_output(35, 1);
-               gpio_free(104);
+               gpio_free(35);
         }
  
         if (unlikely(gpio_request(85, "DISP_FREQ_SEL_2"))) {
@@ -162,7 +170,7 @@ static void __init select_disp_freq(void)
                                 "frequency\n");
         } else {
                 gpio_direction_output(85, 0);
-               gpio_free(104);
+               gpio_free(85);
         }
  }
  
diff --git a/arch/arm/mach-mmp/include/mach/mfp-gplugd.h b/arch/arm/mach-mmp/include/mach/mfp-gplugd.h

deleted file mode 100644 (file)

index b8cf38d..0000000
--- a/arch/arm/mach-mmp/include/mach/mfp-gplugd.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * linux/arch/arm/mach-mmp/include/mach/mfp-gplugd.h
- *
- *   MFP definitions used in gplugD
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __MACH_MFP_GPLUGD_H
-#define __MACH_MFP_GPLUGD_H
-
-#include <plat/mfp.h>
-#include <mach/mfp.h>
-
-/* UART3 */
-#define GPIO8_UART3_SOUT       MFP_CFG(GPIO8, AF2)
-#define GPIO9_UART3_SIN        MFP_CFG(GPIO9, AF2)
-#define GPI1O_UART3_CTS        MFP_CFG(GPIO10, AF2)
-#define GPI11_UART3_RTS        MFP_CFG(GPIO11, AF2)
-
-/* MMC2 */
-#define        GPIO28_MMC2_CMD         MFP_CFG_DRV(GPIO28, AF6, FAST)
-#define        GPIO29_MMC2_CLK         MFP_CFG_DRV(GPIO29, AF6, FAST)
-#define        GPIO30_MMC2_DAT0        MFP_CFG_DRV(GPIO30, AF6, FAST)
-#define        GPIO31_MMC2_DAT1        MFP_CFG_DRV(GPIO31, AF6, FAST)
-#define        GPIO32_MMC2_DAT2        MFP_CFG_DRV(GPIO32, AF6, FAST)
-#define        GPIO33_MMC2_DAT3        MFP_CFG_DRV(GPIO33, AF6, FAST)
-
-/* I2S */
-#undef GPIO114_I2S_FRM
-#undef GPIO115_I2S_BCLK
-
-#define GPIO114_I2S_FRM                MFP_CFG_DRV(GPIO114, AF1, FAST)
-#define GPIO115_I2S_BCLK        MFP_CFG_DRV(GPIO115, AF1, FAST)
-#define GPIO116_I2S_TXD         MFP_CFG_DRV(GPIO116, AF1, FAST)
-
-/* MMC4 */
-#define GPIO125_MMC4_DAT3       MFP_CFG_DRV(GPIO125, AF7, FAST)
-#define GPIO126_MMC4_DAT2       MFP_CFG_DRV(GPIO126, AF7, FAST)
-#define GPIO127_MMC4_DAT1       MFP_CFG_DRV(GPIO127, AF7, FAST)
-#define GPIO0_2_MMC4_DAT0       MFP_CFG_DRV(GPIO0_2, AF7, FAST)
-#define GPIO1_2_MMC4_CMD        MFP_CFG_DRV(GPIO1_2, AF7, FAST)
-#define GPIO2_2_MMC4_CLK        MFP_CFG_DRV(GPIO2_2, AF7, FAST)
-
-/* OTG GPIO */
-#define GPIO_USB_OTG_PEN        18
-#define GPIO_USB_OIDIR          20
-
-/* Other GPIOs are 35, 84, 85 */
-#endif /* __MACH_MFP_GPLUGD_H */
diff --git a/arch/arm/mach-mmp/include/mach/mfp-pxa168.h b/arch/arm/mach-mmp/include/mach/mfp-pxa168.h

index 8c782328b21cf4e6a92460b436a5241a5e573fae..92aaa3c19d611fd1ab66f564b5148acf688fb49d 100644 (file)
--- a/arch/arm/mach-mmp/include/mach/mfp-pxa168.h
+++ b/arch/arm/mach-mmp/include/mach/mfp-pxa168.h
@@ -203,6 +203,10 @@
  #define GPIO33_CF_nCD2         MFP_CFG(GPIO33, AF3)
  
  /* UART */
+#define GPIO8_UART3_TXD                MFP_CFG(GPIO8, AF2)
+#define GPIO9_UART3_RXD                MFP_CFG(GPIO9, AF2)
+#define GPIO1O_UART3_CTS       MFP_CFG(GPIO10, AF2)
+#define GPIO11_UART3_RTS       MFP_CFG(GPIO11, AF2)
  #define GPIO88_UART2_TXD       MFP_CFG(GPIO88, AF2)
  #define GPIO89_UART2_RXD       MFP_CFG(GPIO89, AF2)
  #define GPIO107_UART1_TXD      MFP_CFG_DRV(GPIO107, AF1, FAST)
@@ -232,6 +236,22 @@
  #define GPIO53_MMC1_CD         MFP_CFG(GPIO53, AF1)
  #define GPIO46_MMC1_WP         MFP_CFG(GPIO46, AF1)
  
+/* MMC2 */
+#define        GPIO28_MMC2_CMD         MFP_CFG_DRV(GPIO28, AF6, FAST)
+#define        GPIO29_MMC2_CLK         MFP_CFG_DRV(GPIO29, AF6, FAST)
+#define        GPIO30_MMC2_DAT0        MFP_CFG_DRV(GPIO30, AF6, FAST)
+#define        GPIO31_MMC2_DAT1        MFP_CFG_DRV(GPIO31, AF6, FAST)
+#define        GPIO32_MMC2_DAT2        MFP_CFG_DRV(GPIO32, AF6, FAST)
+#define        GPIO33_MMC2_DAT3        MFP_CFG_DRV(GPIO33, AF6, FAST)
+
+/* MMC4 */
+#define GPIO125_MMC4_DAT3       MFP_CFG_DRV(GPIO125, AF7, FAST)
+#define GPIO126_MMC4_DAT2       MFP_CFG_DRV(GPIO126, AF7, FAST)
+#define GPIO127_MMC4_DAT1       MFP_CFG_DRV(GPIO127, AF7, FAST)
+#define GPIO0_2_MMC4_DAT0       MFP_CFG_DRV(GPIO0_2, AF7, FAST)
+#define GPIO1_2_MMC4_CMD        MFP_CFG_DRV(GPIO1_2, AF7, FAST)
+#define GPIO2_2_MMC4_CLK        MFP_CFG_DRV(GPIO2_2, AF7, FAST)
+
  /* LCD */
  #define GPIO84_LCD_CS          MFP_CFG(GPIO84, AF1)
  #define GPIO60_LCD_DD0         MFP_CFG(GPIO60, AF1)
@@ -269,11 +289,12 @@
  #define GPIO106_CI2C_SCL       MFP_CFG(GPIO106, AF1)
  
  /* I2S */
-#define GPIO113_I2S_MCLK       MFP_CFG(GPIO113,AF6)
-#define GPIO114_I2S_FRM                MFP_CFG(GPIO114,AF1)
-#define GPIO115_I2S_BCLK       MFP_CFG(GPIO115,AF1)
-#define GPIO116_I2S_RXD                MFP_CFG(GPIO116,AF2)
-#define GPIO117_I2S_TXD                MFP_CFG(GPIO117,AF2)
+#define GPIO113_I2S_MCLK       MFP_CFG(GPIO113, AF6)
+#define GPIO114_I2S_FRM                MFP_CFG(GPIO114, AF1)
+#define GPIO115_I2S_BCLK       MFP_CFG(GPIO115, AF1)
+#define GPIO116_I2S_RXD                MFP_CFG(GPIO116, AF2)
+#define GPIO116_I2S_TXD         MFP_CFG(GPIO116, AF1)
+#define GPIO117_I2S_TXD                MFP_CFG(GPIO117, AF2)
  
  /* PWM */
  #define GPIO96_PWM3_OUT                MFP_CFG(GPIO96, AF1)
@@ -324,4 +345,10 @@
  #define GPIO101_MII_MDIO       MFP_CFG(GPIO101, AF5)
  #define GPIO103_RX_DV          MFP_CFG(GPIO103, AF5)
  
+/* SSP2 */
+#define GPIO107_SSP2_RXD       MFP_CFG(GPIO107, AF4)
+#define GPIO108_SSP2_TXD       MFP_CFG(GPIO108, AF4)
+#define GPIO111_SSP2_CLK       MFP_CFG(GPIO111, AF4)
+#define GPIO112_SSP2_FRM       MFP_CFG(GPIO112, AF4)
+
  #endif /* __ASM_MACH_MFP_PXA168_H */
diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c

index 99833b9485cf15fc464f5ed1ab4f5d2bd4288f6e..4e91ee6e27c8e6e59810a91fe4c8ef0bc59e5e3c 100644 (file)
--- a/arch/arm/mach-mmp/time.c
+++ b/arch/arm/mach-mmp/time.c
@@ -51,12 +51,12 @@ static inline uint32_t timer_read(void)
  {
         int delay = 100;
  
-       __raw_writel(1, TIMERS_VIRT_BASE + TMR_CVWR(0));
+       __raw_writel(1, TIMERS_VIRT_BASE + TMR_CVWR(1));
  
         while (delay--)
                 cpu_relax();
  
-       return __raw_readl(TIMERS_VIRT_BASE + TMR_CVWR(0));
+       return __raw_readl(TIMERS_VIRT_BASE + TMR_CVWR(1));
  }
  
  unsigned long long notrace sched_clock(void)
@@ -75,28 +75,51 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
  {
         struct clock_event_device *c = dev_id;
  
-       /* disable and clear pending interrupt status */
-       __raw_writel(0x0, TIMERS_VIRT_BASE + TMR_IER(0));
-       __raw_writel(0x1, TIMERS_VIRT_BASE + TMR_ICR(0));
+       /*
+        * Clear pending interrupt status.
+        */
+       __raw_writel(0x01, TIMERS_VIRT_BASE + TMR_ICR(0));
+
+       /*
+        * Disable timer 0.
+        */
+       __raw_writel(0x02, TIMERS_VIRT_BASE + TMR_CER);
+
         c->event_handler(c);
+
         return IRQ_HANDLED;
  }
  
  static int timer_set_next_event(unsigned long delta,
                                 struct clock_event_device *dev)
  {
-       unsigned long flags, next;
+       unsigned long flags;
  
         local_irq_save(flags);
  
-       /* clear pending interrupt status and enable */
+       /*
+        * Disable timer 0.
+        */
+       __raw_writel(0x02, TIMERS_VIRT_BASE + TMR_CER);
+
+       /*
+        * Clear and enable timer match 0 interrupt.
+        */
         __raw_writel(0x01, TIMERS_VIRT_BASE + TMR_ICR(0));
         __raw_writel(0x01, TIMERS_VIRT_BASE + TMR_IER(0));
  
-       next = timer_read() + delta;
-       __raw_writel(next, TIMERS_VIRT_BASE + TMR_TN_MM(0, 0));
+       /*
+        * Setup new clockevent timer value.
+        */
+       __raw_writel(delta - 1, TIMERS_VIRT_BASE + TMR_TN_MM(0, 0));
+
+       /*
+        * Enable timer 0.
+        */
+       __raw_writel(0x03, TIMERS_VIRT_BASE + TMR_CER);
  
         local_irq_restore(flags);
+
         return 0;
  }
  
@@ -145,23 +168,26 @@ static struct clocksource cksrc = {
  static void __init timer_config(void)
  {
         uint32_t ccr = __raw_readl(TIMERS_VIRT_BASE + TMR_CCR);
-       uint32_t cer = __raw_readl(TIMERS_VIRT_BASE + TMR_CER);
-       uint32_t cmr = __raw_readl(TIMERS_VIRT_BASE + TMR_CMR);
  
-       __raw_writel(cer & ~0x1, TIMERS_VIRT_BASE + TMR_CER); /* disable */
+       __raw_writel(0x0, TIMERS_VIRT_BASE + TMR_CER); /* disable */
  
-       ccr &= (cpu_is_mmp2()) ? TMR_CCR_CS_0(0) : TMR_CCR_CS_0(3);
+       ccr &= (cpu_is_mmp2()) ? (TMR_CCR_CS_0(0) | TMR_CCR_CS_1(0)) :
+               (TMR_CCR_CS_0(3) | TMR_CCR_CS_1(3));
         __raw_writel(ccr, TIMERS_VIRT_BASE + TMR_CCR);
  
-       /* free-running mode */
-       __raw_writel(cmr | 0x01, TIMERS_VIRT_BASE + TMR_CMR);
+       /* set timer 0 to periodic mode, and timer 1 to free-running mode */
+       __raw_writel(0x2, TIMERS_VIRT_BASE + TMR_CMR);
  
-       __raw_writel(0x0, TIMERS_VIRT_BASE + TMR_PLCR(0)); /* free-running */
+       __raw_writel(0x1, TIMERS_VIRT_BASE + TMR_PLCR(0)); /* periodic */
         __raw_writel(0x7, TIMERS_VIRT_BASE + TMR_ICR(0));  /* clear status */
         __raw_writel(0x0, TIMERS_VIRT_BASE + TMR_IER(0));
  
-       /* enable timer counter */
-       __raw_writel(cer | 0x01, TIMERS_VIRT_BASE + TMR_CER);
+       __raw_writel(0x0, TIMERS_VIRT_BASE + TMR_PLCR(1)); /* free-running */
+       __raw_writel(0x7, TIMERS_VIRT_BASE + TMR_ICR(1));  /* clear status */
+       __raw_writel(0x0, TIMERS_VIRT_BASE + TMR_IER(1));
+
+       /* enable timer 1 counter */
+       __raw_writel(0x2, TIMERS_VIRT_BASE + TMR_CER);
  }
  
  static struct irqaction timer_irq = {
diff --git a/arch/arm/mach-mx5/board-cpuimx51.c b/arch/arm/mach-mx5/board-cpuimx51.c

index 7c893fa70266928418aebff1f3140221d4355b30..68934ea8725a8a18399d7da508752c81c09dc0b3 100644 (file)
--- a/arch/arm/mach-mx5/board-cpuimx51.c
+++ b/arch/arm/mach-mx5/board-cpuimx51.c
@@ -81,7 +81,7 @@ static struct plat_serial8250_port serial_platform_data[] = {
                 .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_IOREMAP,
         }, {
                 .mapbase = (unsigned long)(MX51_CS1_BASE_ADDR + 0x2000000),
-               .irq = irq_to_gpio(CPUIMX51_QUARTD_GPIO),
+               .irq = gpio_to_irq(CPUIMX51_QUARTD_GPIO),
                 .irqflags = IRQF_TRIGGER_HIGH,
                 .uartclk = CPUIMX51_QUART_XTAL,
                 .regshift = CPUIMX51_QUART_REGSHIFT,
diff --git a/arch/arm/mach-mx5/board-mx51_babbage.c b/arch/arm/mach-mx5/board-mx51_babbage.c

index e400b09109ceb423c6e5ea041fd67102df6470b2..11b0ff67f89d5f2eaca8e04b46307ab955decba1 100644 (file)
--- a/arch/arm/mach-mx5/board-mx51_babbage.c
+++ b/arch/arm/mach-mx5/board-mx51_babbage.c
@@ -369,7 +369,7 @@ static void __init mx51_babbage_init(void)
                                         ARRAY_SIZE(mx51babbage_pads));
  
         imx51_add_imx_uart(0, &uart_pdata);
-       imx51_add_imx_uart(1, &uart_pdata);
+       imx51_add_imx_uart(1, NULL);
         imx51_add_imx_uart(2, &uart_pdata);
  
         babbage_fec_reset();
diff --git a/arch/arm/mach-mx5/board-mx51_efikamx.c b/arch/arm/mach-mx5/board-mx51_efikamx.c

index f70700dc0ec1d7bf3b4120c5645c96dbbb05db26..551daf85ff8cec5fb18b905cc7a1876797413970 100644 (file)
--- a/arch/arm/mach-mx5/board-mx51_efikamx.c
+++ b/arch/arm/mach-mx5/board-mx51_efikamx.c
@@ -108,9 +108,9 @@ static void __init mx51_efikamx_board_id(void)
         gpio_request(EFIKAMX_PCBID2, "pcbid2");
         gpio_direction_input(EFIKAMX_PCBID2);
  
-       id = gpio_get_value(EFIKAMX_PCBID0);
-       id |= gpio_get_value(EFIKAMX_PCBID1) << 1;
-       id |= gpio_get_value(EFIKAMX_PCBID2) << 2;
+       id = gpio_get_value(EFIKAMX_PCBID0) ? 1 : 0;
+       id |= (gpio_get_value(EFIKAMX_PCBID1) ? 1 : 0) << 1;
+       id |= (gpio_get_value(EFIKAMX_PCBID2) ? 1 : 0) << 2;
  
         switch (id) {
         case 7:
diff --git a/arch/arm/mach-mx5/board-mx51_efikasb.c b/arch/arm/mach-mx5/board-mx51_efikasb.c

index 2e4d9d32a87c688061181939c318790ea3f52d37..8a9bca22beb530cb67eb637060e6ed5d60a9a223 100644 (file)
--- a/arch/arm/mach-mx5/board-mx51_efikasb.c
+++ b/arch/arm/mach-mx5/board-mx51_efikasb.c
@@ -156,23 +156,24 @@ static struct gpio_keys_button mx51_efikasb_keys[] = {
         {
                 .code = KEY_POWER,
                 .gpio = EFIKASB_PWRKEY,
-               .type = EV_PWR,
+               .type = EV_KEY,
                 .desc = "Power Button",
                 .wakeup = 1,
-               .debounce_interval = 10, /* ms */
+               .active_low = 1,
         },
         {
                 .code = SW_LID,
                 .gpio = EFIKASB_LID,
                 .type = EV_SW,
                 .desc = "Lid Switch",
+               .active_low = 1,
         },
         {
-               /* SW_RFKILLALL vs KEY_RFKILL ? */
-               .code = SW_RFKILL_ALL,
+               .code = KEY_RFKILL,
                 .gpio = EFIKASB_RFKILL,
-               .type = EV_SW,
+               .type = EV_KEY,
                 .desc = "rfkill",
+               .active_low = 1,
         },
  };
  
@@ -224,8 +225,8 @@ static void __init mx51_efikasb_board_id(void)
         gpio_request(EFIKASB_PCBID1, "pcb id1");
         gpio_direction_input(EFIKASB_PCBID1);
  
-       id = gpio_get_value(EFIKASB_PCBID0);
-       id |= gpio_get_value(EFIKASB_PCBID1) << 1;
+       id = gpio_get_value(EFIKASB_PCBID0) ? 1 : 0;
+       id |= (gpio_get_value(EFIKASB_PCBID1) ? 1 : 0) << 1;
  
         switch (id) {
         default:
diff --git a/arch/arm/mach-mx5/clock-mx51-mx53.c b/arch/arm/mach-mx5/clock-mx51-mx53.c

index 7f20308c4dbd3f2a949a77ab0d96e8555a6d9ad2..f7bf996f463b60c3cafd94d89f52882deaf49517 100644 (file)
--- a/arch/arm/mach-mx5/clock-mx51-mx53.c
+++ b/arch/arm/mach-mx5/clock-mx51-mx53.c
@@ -271,7 +271,11 @@ static int _clk_pll_enable(struct clk *clk)
         int i = 0;
  
         pllbase = _get_pll_base(clk);
-       reg = __raw_readl(pllbase + MXC_PLL_DP_CTL) | MXC_PLL_DP_CTL_UPEN;
+       reg = __raw_readl(pllbase + MXC_PLL_DP_CTL);
+       if (reg & MXC_PLL_DP_CTL_UPEN)
+               return 0;
+
+       reg |= MXC_PLL_DP_CTL_UPEN;
         __raw_writel(reg, pllbase + MXC_PLL_DP_CTL);
  
         /* Wait for lock */
diff --git a/arch/arm/mach-mx5/mx51_efika.c b/arch/arm/mach-mx5/mx51_efika.c

index 4435e03cea5d844b77263829b03919c3d6ddd2d1..c9209454807ad24887e70867a2f48974e7480aaa 100644 (file)
--- a/arch/arm/mach-mx5/mx51_efika.c
+++ b/arch/arm/mach-mx5/mx51_efika.c
@@ -186,7 +186,7 @@ static int initialize_usbh1_port(struct platform_device *pdev)
  
         mdelay(10);
  
-       return mx51_initialize_usb_hw(0, MXC_EHCI_ITC_NO_THRESHOLD);
+       return mx51_initialize_usb_hw(pdev->id, MXC_EHCI_ITC_NO_THRESHOLD);
  }
  
  static struct mxc_usbh_platform_data usbh1_config = {
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig

index 4ae6257b39a40e40d99005d7f61216c062efe890..57b66d590c522994f923bb01329453d9d14a7330 100644 (file)
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -7,7 +7,6 @@ config ARCH_OMAP2PLUS_TYPICAL
         default y
         select AEABI
         select REGULATOR
-       select PM
         select PM_RUNTIME
         select VFP
         select NEON if ARCH_OMAP3 || ARCH_OMAP4
diff --git a/arch/arm/mach-omap2/board-am3517crane.c b/arch/arm/mach-omap2/board-am3517crane.c

index 5f2b55ff04ff5975dc56bdf991e0a560782b63a7..933e9353cb379cbab378bb43e2fea965b60c81f4 100644 (file)
--- a/arch/arm/mach-omap2/board-am3517crane.c
+++ b/arch/arm/mach-omap2/board-am3517crane.c
@@ -45,8 +45,6 @@ static struct omap_board_config_kernel am3517_crane_config[] __initdata = {
  static struct omap_board_mux board_mux[] __initdata = {
         { .reg_offset = OMAP_MUX_TERMINATOR },
  };
-#else
-#define board_mux      NULL
  #endif
  
  static void __init am3517_crane_init_early(void)
diff --git a/arch/arm/mach-omap2/board-omap3beagle.c b/arch/arm/mach-omap2/board-omap3beagle.c

index 32f5f895568a0c01e0b4566a3dfe600130ff87a9..3ae16b4e3f52b4068399978a7bea43536b2fa9c2 100644 (file)
--- a/arch/arm/mach-omap2/board-omap3beagle.c
+++ b/arch/arm/mach-omap2/board-omap3beagle.c
@@ -491,23 +491,22 @@ static void __init beagle_opp_init(void)
  
         /* Custom OPP enabled for all xM versions */
         if (cpu_is_omap3630()) {
-               struct omap_hwmod *mh = omap_hwmod_lookup("mpu");
-               struct omap_hwmod *dh = omap_hwmod_lookup("iva");
-               struct device *dev;
+               struct device *mpu_dev, *iva_dev;
  
-               if (!mh || !dh) {
+               mpu_dev = omap2_get_mpuss_device();
+               iva_dev = omap2_get_iva_device();
+
+               if (!mpu_dev || !iva_dev) {
                         pr_err("%s: Aiee.. no mpu/dsp devices? %p %p\n",
-                               __func__, mh, dh);
+                               __func__, mpu_dev, iva_dev);
                         return;
                 }
                 /* Enable MPU 1GHz and lower opps */
-               dev = &mh->od->pdev.dev;
-               r = opp_enable(dev, 800000000);
+               r = opp_enable(mpu_dev, 800000000);
                 /* TODO: MPU 1GHz needs SR and ABB */
  
                 /* Enable IVA 800MHz and lower opps */
-               dev = &dh->od->pdev.dev;
-               r |= opp_enable(dev, 660000000);
+               r |= opp_enable(iva_dev, 660000000);
                 /* TODO: DSP 800MHz needs SR and ABB */
                 if (r) {
                         pr_err("%s: failed to enable higher opp %d\n",
@@ -516,10 +515,8 @@ static void __init beagle_opp_init(void)
                          * Cleanup - disable the higher freqs - we dont care
                          * about the results
                          */
-                       dev = &mh->od->pdev.dev;
-                       opp_disable(dev, 800000000);
-                       dev = &dh->od->pdev.dev;
-                       opp_disable(dev, 660000000);
+                       opp_disable(mpu_dev, 800000000);
+                       opp_disable(iva_dev, 660000000);
                 }
         }
         return;
diff --git a/arch/arm/mach-omap2/clock3xxx_data.c b/arch/arm/mach-omap2/clock3xxx_data.c

index ffd55b1c439622002be65a320957c943a7b19a18..b9b8446831477facb05a7192d94701245c2b211f 100644 (file)
--- a/arch/arm/mach-omap2/clock3xxx_data.c
+++ b/arch/arm/mach-omap2/clock3xxx_data.c
@@ -3078,6 +3078,7 @@ static struct clk gpt12_fck = {
         .name           = "gpt12_fck",
         .ops            = &clkops_null,
         .parent         = &secure_32k_fck,
+       .clkdm_name     = "wkup_clkdm",
         .recalc         = &followparent_recalc,
  };
  
@@ -3085,6 +3086,7 @@ static struct clk wdt1_fck = {
         .name           = "wdt1_fck",
         .ops            = &clkops_null,
         .parent         = &secure_32k_fck,
+       .clkdm_name     = "wkup_clkdm",
         .recalc         = &followparent_recalc,
  };
  
diff --git a/arch/arm/mach-omap2/clock44xx_data.c b/arch/arm/mach-omap2/clock44xx_data.c

index 2af0e3f00ce1b509fe09fbd396b912026ca0d161..c0b6fbda340898257247193f60bb8a7f7bfaf5ff 100644 (file)
--- a/arch/arm/mach-omap2/clock44xx_data.c
+++ b/arch/arm/mach-omap2/clock44xx_data.c
@@ -3376,10 +3376,18 @@ int __init omap4xxx_clk_init(void)
         } else if (cpu_is_omap446x()) {
                 cpu_mask = RATE_IN_4460;
                 cpu_clkflg = CK_446X;
+       } else {
+               return 0;
         }
  
         clk_init(&omap2_clk_functions);
-       omap2_clk_disable_clkdm_control();
+
+       /*
+        * Must stay commented until all OMAP SoC drivers are
+        * converted to runtime PM, or drivers may start crashing
+        *
+        * omap2_clk_disable_clkdm_control();
+        */
  
         for (c = omap44xx_clks; c < omap44xx_clks + ARRAY_SIZE(omap44xx_clks);
                                                                           c++)
diff --git a/arch/arm/mach-omap2/clockdomain.c b/arch/arm/mach-omap2/clockdomain.c

index ab7db083f97fda3ffa9b6b5b419ae321878bc6ed..8f0890685d7b10f7f594e9cd44c8a972c33d15e9 100644 (file)
--- a/arch/arm/mach-omap2/clockdomain.c
+++ b/arch/arm/mach-omap2/clockdomain.c
@@ -747,6 +747,7 @@ int clkdm_wakeup(struct clockdomain *clkdm)
         spin_lock_irqsave(&clkdm->lock, flags);
         clkdm->_flags &= ~_CLKDM_FLAG_HWSUP_ENABLED;
         ret = arch_clkdm->clkdm_wakeup(clkdm);
+       ret |= pwrdm_state_switch(clkdm->pwrdm.ptr);
         spin_unlock_irqrestore(&clkdm->lock, flags);
         return ret;
  }
@@ -818,6 +819,7 @@ void clkdm_deny_idle(struct clockdomain *clkdm)
         spin_lock_irqsave(&clkdm->lock, flags);
         clkdm->_flags &= ~_CLKDM_FLAG_HWSUP_ENABLED;
         arch_clkdm->clkdm_deny_idle(clkdm);
+       pwrdm_state_switch(clkdm->pwrdm.ptr);
         spin_unlock_irqrestore(&clkdm->lock, flags);
  }
  
diff --git a/arch/arm/mach-omap2/cminst44xx.h b/arch/arm/mach-omap2/cminst44xx.h

index f2ea6453ade0b7ff1ee4119daf3d274648208fa4..a018a7327879daba08da25f7376db36d8b5a4985 100644 (file)
--- a/arch/arm/mach-omap2/cminst44xx.h
+++ b/arch/arm/mach-omap2/cminst44xx.h
@@ -18,13 +18,36 @@ extern void omap4_cminst_clkdm_force_sleep(u8 part, s16 inst, u16 cdoffs);
  extern void omap4_cminst_clkdm_force_wakeup(u8 part, s16 inst, u16 cdoffs);
  
  extern int omap4_cminst_wait_module_ready(u8 part, u16 inst, s16 cdoffs, u16 clkctrl_offs);
-extern int omap4_cminst_wait_module_idle(u8 part, u16 inst, s16 cdoffs, u16 clkctrl_offs);
+
+# ifdef CONFIG_ARCH_OMAP4
+extern int omap4_cminst_wait_module_idle(u8 part, u16 inst, s16 cdoffs,
+                                        u16 clkctrl_offs);
  
  extern void omap4_cminst_module_enable(u8 mode, u8 part, u16 inst, s16 cdoffs,
                                        u16 clkctrl_offs);
  extern void omap4_cminst_module_disable(u8 part, u16 inst, s16 cdoffs,
                                         u16 clkctrl_offs);
  
+# else
+
+static inline int omap4_cminst_wait_module_idle(u8 part, u16 inst, s16 cdoffs,
+                                       u16 clkctrl_offs)
+{
+       return 0;
+}
+
+static inline void omap4_cminst_module_enable(u8 mode, u8 part, u16 inst,
+                               s16 cdoffs, u16 clkctrl_offs)
+{
+}
+
+static inline void omap4_cminst_module_disable(u8 part, u16 inst, s16 cdoffs,
+                                u16 clkctrl_offs)
+{
+}
+
+# endif
+
  /*
   * In an ideal world, we would not export these low-level functions,
   * but this will probably take some time to fix properly
diff --git a/arch/arm/mach-omap2/mux.c b/arch/arm/mach-omap2/mux.c

index c7fb22abc219db3756758275e0aa30c90fcdd5e6..655e9480eb98c7ac5692590e833b5484f6f412b6 100644 (file)
--- a/arch/arm/mach-omap2/mux.c
+++ b/arch/arm/mach-omap2/mux.c
@@ -821,11 +821,10 @@ static void __init omap_mux_set_cmdline_signals(void)
         if (!omap_mux_options)
                 return;
  
-       options = kmalloc(strlen(omap_mux_options) + 1, GFP_KERNEL);
+       options = kstrdup(omap_mux_options, GFP_KERNEL);
         if (!options)
                 return;
  
-       strcpy(options, omap_mux_options);
         next_opt = options;
  
         while ((token = strsep(&next_opt, ",")) != NULL) {
@@ -855,24 +854,19 @@ static int __init omap_mux_copy_names(struct omap_mux *src,
  
         for (i = 0; i < OMAP_MUX_NR_MODES; i++) {
                 if (src->muxnames[i]) {
-                       dst->muxnames[i] =
-                               kmalloc(strlen(src->muxnames[i]) + 1,
-                                       GFP_KERNEL);
+                       dst->muxnames[i] = kstrdup(src->muxnames[i],
+                                                  GFP_KERNEL);
                         if (!dst->muxnames[i])
                                 goto free;
-                       strcpy(dst->muxnames[i], src->muxnames[i]);
                 }
         }
  
  #ifdef CONFIG_DEBUG_FS
         for (i = 0; i < OMAP_MUX_NR_SIDES; i++) {
                 if (src->balls[i]) {
-                       dst->balls[i] =
-                               kmalloc(strlen(src->balls[i]) + 1,
-                                       GFP_KERNEL);
+                       dst->balls[i] = kstrdup(src->balls[i], GFP_KERNEL);
                         if (!dst->balls[i])
                                 goto free;
-                       strcpy(dst->balls[i], src->balls[i]);
                 }
         }
  #endif
diff --git a/arch/arm/mach-omap2/omap_hwmod_2430_data.c b/arch/arm/mach-omap2/omap_hwmod_2430_data.c

index 16743c7d6e8effe6c1b0089008865543f5798c43..408193d8e044b6288a2c8e696f5f50f94ca46fe3 100644 (file)
--- a/arch/arm/mach-omap2/omap_hwmod_2430_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
@@ -192,6 +192,7 @@ static struct omap_hwmod_addr_space omap2430_usbhsotg_addrs[] = {
                 .pa_end         = OMAP243X_HS_BASE + SZ_4K - 1,
                 .flags          = ADDR_TYPE_RT
         },
+       { }
  };
  
  /*  l4_core ->usbhsotg  interface */
diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c

index 3feb35911a325576bf7a6f57892adbcccbeda853..472bf22d5e84875416854c3e76c206fa02c85302 100644 (file)
--- a/arch/arm/mach-omap2/pm.c
+++ b/arch/arm/mach-omap2/pm.c
@@ -130,7 +130,6 @@ int omap_set_pwrdm_state(struct powerdomain *pwrdm, u32 state)
                 } else {
                         hwsup = clkdm_in_hwsup(pwrdm->pwrdm_clkdms[0]);
                         clkdm_wakeup(pwrdm->pwrdm_clkdms[0]);
-                       pwrdm_wait_transition(pwrdm);
                         sleep_switch = FORCEWAKEUP_SWITCH;
                 }
         }
@@ -156,7 +155,6 @@ int omap_set_pwrdm_state(struct powerdomain *pwrdm, u32 state)
                 return ret;
         }
  
-       pwrdm_wait_transition(pwrdm);
         pwrdm_state_switch(pwrdm);
  err:
         return ret;
diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c

index 9af08473bf1033c6019d82e36dd503184c3c6b93..ef71fdd40fc47b05072821ede75365606d20ab61 100644 (file)
--- a/arch/arm/mach-omap2/powerdomain.c
+++ b/arch/arm/mach-omap2/powerdomain.c
@@ -195,28 +195,35 @@ static int _pwrdm_post_transition_cb(struct powerdomain *pwrdm, void *unused)
  
  /**
   * pwrdm_init - set up the powerdomain layer
- * @pwrdm_list: array of struct powerdomain pointers to register
+ * @pwrdms: array of struct powerdomain pointers to register
   * @custom_funcs: func pointers for arch specific implementations
   *
- * Loop through the array of powerdomains @pwrdm_list, registering all
- * that are available on the current CPU. If pwrdm_list is supplied
- * and not null, all of the referenced powerdomains will be
- * registered.  No return value.  XXX pwrdm_list is not really a
- * "list"; it is an array.  Rename appropriately.
+ * Loop through the array of powerdomains @pwrdms, registering all
+ * that are available on the current CPU.  Also, program all
+ * powerdomain target state as ON; this is to prevent domains from
+ * hitting low power states (if bootloader has target states set to
+ * something other than ON) and potentially even losing context while
+ * PM is not fully initialized.  The PM late init code can then program
+ * the desired target state for all the power domains.  No return
+ * value.
   */
-void pwrdm_init(struct powerdomain **pwrdm_list, struct pwrdm_ops *custom_funcs)
+void pwrdm_init(struct powerdomain **pwrdms, struct pwrdm_ops *custom_funcs)
  {
         struct powerdomain **p = NULL;
+       struct powerdomain *temp_p;
  
         if (!custom_funcs)
                 WARN(1, "powerdomain: No custom pwrdm functions registered\n");
         else
                 arch_pwrdm = custom_funcs;
  
-       if (pwrdm_list) {
-               for (p = pwrdm_list; *p; p++)
+       if (pwrdms) {
+               for (p = pwrdms; *p; p++)
                         _pwrdm_register(*p);
         }
+
+       list_for_each_entry(temp_p, &pwrdm_list, node)
+               pwrdm_set_next_pwrst(temp_p, PWRDM_POWER_ON);
  }
  
  /**
diff --git a/arch/arm/mach-omap2/smartreflex.c b/arch/arm/mach-omap2/smartreflex.c

index 2ce2fb7664bc60992f7f5eebbbed3fc57345ac9e..34c01a7de81088b6e1657a575e8679125c13f75c 100644 (file)
--- a/arch/arm/mach-omap2/smartreflex.c
+++ b/arch/arm/mach-omap2/smartreflex.c
@@ -621,7 +621,7 @@ void sr_disable(struct voltagedomain *voltdm)
                         sr_v2_disable(sr);
         }
  
-       pm_runtime_put_sync(&sr->pdev->dev);
+       pm_runtime_put_sync_suspend(&sr->pdev->dev);
  }
  
  /**
@@ -860,6 +860,7 @@ static int __init omap_sr_probe(struct platform_device *pdev)
         irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
  
         pm_runtime_enable(&pdev->dev);
+       pm_runtime_irq_safe(&pdev->dev);
  
         sr_info->pdev = pdev;
         sr_info->srid = pdev->id;
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c

index e9640728239bee400efd40264319ac133f5a7887..cf1de7d2630d69ebfcefee911670bdd94110f305 100644 (file)
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -293,7 +293,8 @@ static void __init omap2_gp_clocksource_init(int gptimer_id,
         pr_info("OMAP clocksource: GPTIMER%d at %lu Hz\n",
                 gptimer_id, clksrc.rate);
  
-       __omap_dm_timer_load_start(clksrc.io_base, OMAP_TIMER_CTRL_ST, 0, 1);
+       __omap_dm_timer_load_start(clksrc.io_base,
+                       OMAP_TIMER_CTRL_ST | OMAP_TIMER_CTRL_AR, 0, 1);
         init_sched_clock(&cd, dmtimer_update_sched_clock, 32, clksrc.rate);
  
         if (clocksource_register_hz(&clocksource_gpt, clksrc.rate))
diff --git a/arch/arm/mach-omap2/twl-common.c b/arch/arm/mach-omap2/twl-common.c

index 2543342dbccb2532d391921c2651f4b764fe53c4..daa056ed8738112305aba454898a7e18c3ed129c 100644 (file)
--- a/arch/arm/mach-omap2/twl-common.c
+++ b/arch/arm/mach-omap2/twl-common.c
@@ -48,14 +48,7 @@ void __init omap_pmic_init(int bus, u32 clkrate,
         omap_register_i2c_bus(bus, clkrate, &pmic_i2c_board_info, 1);
  }
  
-static struct twl4030_usb_data omap4_usb_pdata = {
-       .phy_init       = omap4430_phy_init,
-       .phy_exit       = omap4430_phy_exit,
-       .phy_power      = omap4430_phy_power,
-       .phy_set_clock  = omap4430_phy_set_clk,
-       .phy_suspend    = omap4430_phy_suspend,
-};
-
+#if defined(CONFIG_ARCH_OMAP3)
  static struct twl4030_usb_data omap3_usb_pdata = {
         .usb_mode       = T2_USB_MODE_ULPI,
  };
@@ -122,6 +115,45 @@ static struct regulator_init_data omap3_vpll2_idata = {
         .consumer_supplies              = omap3_vpll2_supplies,
  };
  
+void __init omap3_pmic_get_config(struct twl4030_platform_data *pmic_data,
+                                 u32 pdata_flags, u32 regulators_flags)
+{
+       if (!pmic_data->irq_base)
+               pmic_data->irq_base = TWL4030_IRQ_BASE;
+       if (!pmic_data->irq_end)
+               pmic_data->irq_end = TWL4030_IRQ_END;
+
+       /* Common platform data configurations */
+       if (pdata_flags & TWL_COMMON_PDATA_USB && !pmic_data->usb)
+               pmic_data->usb = &omap3_usb_pdata;
+
+       if (pdata_flags & TWL_COMMON_PDATA_BCI && !pmic_data->bci)
+               pmic_data->bci = &omap3_bci_pdata;
+
+       if (pdata_flags & TWL_COMMON_PDATA_MADC && !pmic_data->madc)
+               pmic_data->madc = &omap3_madc_pdata;
+
+       if (pdata_flags & TWL_COMMON_PDATA_AUDIO && !pmic_data->audio)
+               pmic_data->audio = &omap3_audio_pdata;
+
+       /* Common regulator configurations */
+       if (regulators_flags & TWL_COMMON_REGULATOR_VDAC && !pmic_data->vdac)
+               pmic_data->vdac = &omap3_vdac_idata;
+
+       if (regulators_flags & TWL_COMMON_REGULATOR_VPLL2 && !pmic_data->vpll2)
+               pmic_data->vpll2 = &omap3_vpll2_idata;
+}
+#endif /* CONFIG_ARCH_OMAP3 */
+
+#if defined(CONFIG_ARCH_OMAP4)
+static struct twl4030_usb_data omap4_usb_pdata = {
+       .phy_init       = omap4430_phy_init,
+       .phy_exit       = omap4430_phy_exit,
+       .phy_power      = omap4430_phy_power,
+       .phy_set_clock  = omap4430_phy_set_clk,
+       .phy_suspend    = omap4430_phy_suspend,
+};
+
  static struct regulator_init_data omap4_vdac_idata = {
         .constraints = {
                 .min_uV                 = 1800000,
@@ -273,32 +305,4 @@ void __init omap4_pmic_get_config(struct twl4030_platform_data *pmic_data,
             !pmic_data->clk32kg)
                 pmic_data->clk32kg = &omap4_clk32kg_idata;
  }
-
-void __init omap3_pmic_get_config(struct twl4030_platform_data *pmic_data,
-                                 u32 pdata_flags, u32 regulators_flags)
-{
-       if (!pmic_data->irq_base)
-               pmic_data->irq_base = TWL4030_IRQ_BASE;
-       if (!pmic_data->irq_end)
-               pmic_data->irq_end = TWL4030_IRQ_END;
-
-       /* Common platform data configurations */
-       if (pdata_flags & TWL_COMMON_PDATA_USB && !pmic_data->usb)
-               pmic_data->usb = &omap3_usb_pdata;
-
-       if (pdata_flags & TWL_COMMON_PDATA_BCI && !pmic_data->bci)
-               pmic_data->bci = &omap3_bci_pdata;
-
-       if (pdata_flags & TWL_COMMON_PDATA_MADC && !pmic_data->madc)
-               pmic_data->madc = &omap3_madc_pdata;
-
-       if (pdata_flags & TWL_COMMON_PDATA_AUDIO && !pmic_data->audio)
-               pmic_data->audio = &omap3_audio_pdata;
-
-       /* Common regulator configurations */
-       if (regulators_flags & TWL_COMMON_REGULATOR_VDAC && !pmic_data->vdac)
-               pmic_data->vdac = &omap3_vdac_idata;
-
-       if (regulators_flags & TWL_COMMON_REGULATOR_VPLL2 && !pmic_data->vpll2)
-               pmic_data->vpll2 = &omap3_vpll2_idata;
-}
+#endif /* CONFIG_ARCH_OMAP4 */
diff --git a/arch/arm/mach-orion5x/dns323-setup.c b/arch/arm/mach-orion5x/dns323-setup.c

index a6eddae82a0b5b236db29658bbeb2c8b8a1d30ff..c105556a0ee1a9ab158742e8d3b417eae7a8646b 100644 (file)
--- a/arch/arm/mach-orion5x/dns323-setup.c
+++ b/arch/arm/mach-orion5x/dns323-setup.c
@@ -77,7 +77,7 @@ static int __init dns323_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
         /*
          * Check for devices with hard-wired IRQs.
          */
-       irq = orion5x_pci_map_irq(const dev, slot, pin);
+       irq = orion5x_pci_map_irq(dev, slot, pin);
         if (irq != -1)
                 return irq;
  
diff --git a/arch/arm/mach-orion5x/pci.c b/arch/arm/mach-orion5x/pci.c

index 28b8760ab9fa07c6e09128811bbfa3dd1ba1cffb..bc4a920e26ee2767e19639c2ef502b4e735587ed 100644 (file)
--- a/arch/arm/mach-orion5x/pci.c
+++ b/arch/arm/mach-orion5x/pci.c
@@ -14,6 +14,7 @@
  #include <linux/pci.h>
  #include <linux/slab.h>
  #include <linux/mbus.h>
+#include <video/vga.h>
  #include <asm/irq.h>
  #include <asm/mach/pci.h>
  #include <plat/pcie.h>
diff --git a/arch/arm/mach-prima2/clock.c b/arch/arm/mach-prima2/clock.c

index f9a2aaf63f711de6c4e868cac8330e5ba551c961..615a4e75ceabc0941d3624e2672a0b2ccf66cff0 100644 (file)
--- a/arch/arm/mach-prima2/clock.c
+++ b/arch/arm/mach-prima2/clock.c
@@ -481,6 +481,7 @@ static void __init sirfsoc_clk_init(void)
  
  static struct of_device_id clkc_ids[] = {
         { .compatible = "sirf,prima2-clkc" },
+       {},
  };
  
  void __init sirfsoc_of_clk_init(void)
diff --git a/arch/arm/mach-prima2/irq.c b/arch/arm/mach-prima2/irq.c

index c3404cbb6ff7a613dd8717293c82d9a0f86d5f17..7af254d046ba0044385be65514101a9820c2618a 100644 (file)
--- a/arch/arm/mach-prima2/irq.c
+++ b/arch/arm/mach-prima2/irq.c
@@ -51,6 +51,7 @@ static __init void sirfsoc_irq_init(void)
  
  static struct of_device_id intc_ids[]  = {
         { .compatible = "sirf,prima2-intc" },
+       {},
  };
  
  void __init sirfsoc_of_irq_init(void)
diff --git a/arch/arm/mach-prima2/rstc.c b/arch/arm/mach-prima2/rstc.c

index d074786e83d469e3ed4aac8f6a182a12a3434776..492cfa8d261073eac1d17498bee1af9123b77c00 100644 (file)
--- a/arch/arm/mach-prima2/rstc.c
+++ b/arch/arm/mach-prima2/rstc.c
@@ -19,6 +19,7 @@ static DEFINE_MUTEX(rstc_lock);
  
  static struct of_device_id rstc_ids[]  = {
         { .compatible = "sirf,prima2-rstc" },
+       {},
  };
  
  static int __init sirfsoc_of_rstc_init(void)
diff --git a/arch/arm/mach-prima2/timer.c b/arch/arm/mach-prima2/timer.c

index 44027f34a88aadfc066945ef686c8f9530fc9e4c..ed7ec48d11da853f3bff27559151d282c3c3f684 100644 (file)
--- a/arch/arm/mach-prima2/timer.c
+++ b/arch/arm/mach-prima2/timer.c
@@ -190,6 +190,7 @@ static void __init sirfsoc_timer_init(void)
  
  static struct of_device_id timer_ids[] = {
         { .compatible = "sirf,prima2-tick" },
+       {},
  };
  
  static void __init sirfsoc_of_timer_map(void)
diff --git a/arch/arm/mach-realview/include/mach/system.h b/arch/arm/mach-realview/include/mach/system.h

index a30f2e3ec17852a8f865b2ddb8320f6c9b3f4987..6657ff23116139184061b66c52064cb81565e737 100644 (file)
--- a/arch/arm/mach-realview/include/mach/system.h
+++ b/arch/arm/mach-realview/include/mach/system.h
@@ -44,6 +44,7 @@ static inline void arch_reset(char mode, const char *cmd)
          */
         if (realview_reset)
                 realview_reset(mode);
+       dsb();
  }
  
  #endif
diff --git a/arch/arm/mach-s3c64xx/pm.c b/arch/arm/mach-s3c64xx/pm.c

index 8bad64370689b4163501631e3b7fc85f7e184510..055e2858b0dd9a31c027ecd8d7032d6ecefbddc7 100644 (file)
--- a/arch/arm/mach-s3c64xx/pm.c
+++ b/arch/arm/mach-s3c64xx/pm.c
@@ -16,6 +16,7 @@
  #include <linux/suspend.h>
  #include <linux/serial_core.h>
  #include <linux/io.h>
+#include <linux/gpio.h>
  
  #include <mach/map.h>
  #include <mach/irqs.h>
diff --git a/arch/arm/mach-s5p64x0/irq-eint.c b/arch/arm/mach-s5p64x0/irq-eint.c

index 69ed4545112b121aeafa1fa9d9d7a36617f6d9e1..fe7380f5c3cd858613753230a9ef0f0e973e76bf 100644 (file)
--- a/arch/arm/mach-s5p64x0/irq-eint.c
+++ b/arch/arm/mach-s5p64x0/irq-eint.c
@@ -129,7 +129,7 @@ static int s5p64x0_alloc_gc(void)
         }
  
         ct = gc->chip_types;
-       ct->chip.irq_ack = irq_gc_ack;
+       ct->chip.irq_ack = irq_gc_ack_set_bit;
         ct->chip.irq_mask = irq_gc_mask_set_bit;
         ct->chip.irq_unmask = irq_gc_mask_clr_bit;
         ct->chip.irq_set_type = s5p64x0_irq_eint_set_type;
diff --git a/arch/arm/mach-s5pv210/pm.c b/arch/arm/mach-s5pv210/pm.c

index 309e388a8a83388d711b1956f71aa74874a2cab0..f149d278377b8f28f29a53f55ba303e450bd6a55 100644 (file)
--- a/arch/arm/mach-s5pv210/pm.c
+++ b/arch/arm/mach-s5pv210/pm.c
@@ -88,7 +88,7 @@ static struct sleep_save s5pv210_core_save[] = {
         SAVE_ITEM(S3C2410_TCNTO(0)),
  };
  
-void s5pv210_cpu_suspend(unsigned long arg)
+static int s5pv210_cpu_suspend(unsigned long arg)
  {
         unsigned long tmp;
  
diff --git a/arch/arm/mach-sa1100/pci-nanoengine.c b/arch/arm/mach-sa1100/pci-nanoengine.c

index 964c6c3cd7a668285b53a3ebcc7eed15bce2bf91..dd39fee59549784aff6a7341966ad192f8904a09 100644 (file)
--- a/arch/arm/mach-sa1100/pci-nanoengine.c
+++ b/arch/arm/mach-sa1100/pci-nanoengine.c
@@ -28,6 +28,7 @@
  #include <asm/mach-types.h>
  
  #include <mach/nanoengine.h>
+#include <mach/hardware.h>
  
  static DEFINE_SPINLOCK(nano_lock);
  
diff --git a/arch/arm/mach-shmobile/board-ag5evm.c b/arch/arm/mach-shmobile/board-ag5evm.c

index ce5c2513c6ce93f62f37356d55f2835fbf3a6332..cdfdd624d21dd27719c156639588c06e95be1b9d 100644 (file)
--- a/arch/arm/mach-shmobile/board-ag5evm.c
+++ b/arch/arm/mach-shmobile/board-ag5evm.c
@@ -341,6 +341,7 @@ static struct platform_device mipidsi0_device = {
  static struct sh_mobile_sdhi_info sdhi0_info = {
         .dma_slave_tx   = SHDMA_SLAVE_SDHI0_TX,
         .dma_slave_rx   = SHDMA_SLAVE_SDHI0_RX,
+       .tmio_flags     = TMIO_MMC_HAS_IDLE_WAIT,
         .tmio_caps      = MMC_CAP_SD_HIGHSPEED,
         .tmio_ocr_mask  = MMC_VDD_27_28 | MMC_VDD_28_29,
  };
@@ -382,7 +383,7 @@ void ag5evm_sdhi1_set_pwr(struct platform_device *pdev, int state)
  }
  
  static struct sh_mobile_sdhi_info sh_sdhi1_info = {
-       .tmio_flags     = TMIO_MMC_WRPROTECT_DISABLE,
+       .tmio_flags     = TMIO_MMC_WRPROTECT_DISABLE | TMIO_MMC_HAS_IDLE_WAIT,
         .tmio_caps      = MMC_CAP_NONREMOVABLE | MMC_CAP_SDIO_IRQ,
         .tmio_ocr_mask  = MMC_VDD_32_33 | MMC_VDD_33_34,
         .set_pwr        = ag5evm_sdhi1_set_pwr,
diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c

index 9e0856b2f9e9fcc8adb4455267e0ec4964efa56d..523f608eb8cf0109609188b4589f56e03bdc4a36 100644 (file)
--- a/arch/arm/mach-shmobile/board-ap4evb.c
+++ b/arch/arm/mach-shmobile/board-ap4evb.c
@@ -1412,6 +1412,7 @@ static void __init ap4evb_init(void)
         fsi_init_pm_clock();
         sh7372_pm_init();
         pm_clk_add(&fsi_device.dev, "spu2");
+       pm_clk_add(&lcdc1_device.dev, "hdmi");
  }
  
  static void __init ap4evb_timer_init(void)
diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c

index d41c01f83f152f75589cc228a186a6f98da59a5e..17c19dc2560431b99699e28569f52f68f9202cc8 100644 (file)
--- a/arch/arm/mach-shmobile/board-mackerel.c
+++ b/arch/arm/mach-shmobile/board-mackerel.c
@@ -641,6 +641,8 @@ static struct usbhs_private usbhs0_private = {
                 },
                 .driver_param = {
                         .buswait_bwait  = 4,
+                       .d0_tx_id       = SHDMA_SLAVE_USB0_TX,
+                       .d1_rx_id       = SHDMA_SLAVE_USB0_RX,
                 },
         },
  };
@@ -810,6 +812,8 @@ static struct usbhs_private usbhs1_private = {
                         .buswait_bwait  = 4,
                         .pipe_type      = usbhs1_pipe_cfg,
                         .pipe_size      = ARRAY_SIZE(usbhs1_pipe_cfg),
+                       .d0_tx_id       = SHDMA_SLAVE_USB1_TX,
+                       .d1_rx_id       = SHDMA_SLAVE_USB1_RX,
                 },
         },
  };
@@ -1588,6 +1592,7 @@ static void __init mackerel_init(void)
         hdmi_init_pm_clock();
         sh7372_pm_init();
         pm_clk_add(&fsi_device.dev, "spu2");
+       pm_clk_add(&hdmi_lcdc_device.dev, "hdmi");
  }
  
  static void __init mackerel_timer_init(void)
diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c

index 6b1619a65dbac16bf4535f106ab3b2c03016ed9f..66975921e6467b363e037ca355c82952a17a300a 100644 (file)
--- a/arch/arm/mach-shmobile/clock-sh7372.c
+++ b/arch/arm/mach-shmobile/clock-sh7372.c
@@ -503,16 +503,17 @@ static struct clk *late_main_clks[] = {
         &sh7372_fsidivb_clk,
  };
  
-enum { MSTP001,
+enum { MSTP001, MSTP000,
         MSTP131, MSTP130,
         MSTP129, MSTP128, MSTP127, MSTP126, MSTP125,
         MSTP118, MSTP117, MSTP116, MSTP113,
         MSTP106, MSTP101, MSTP100,
         MSTP223,
-       MSTP218, MSTP217, MSTP216,
-       MSTP207, MSTP206, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200,
-       MSTP329, MSTP328, MSTP323, MSTP322, MSTP314, MSTP313, MSTP312,
-       MSTP423, MSTP415, MSTP413, MSTP411, MSTP410, MSTP406, MSTP403,
+       MSTP218, MSTP217, MSTP216, MSTP214, MSTP208, MSTP207,
+       MSTP206, MSTP205, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200,
+       MSTP328, MSTP323, MSTP322, MSTP314, MSTP313, MSTP312,
+       MSTP423, MSTP415, MSTP413, MSTP411, MSTP410, MSTP407, MSTP406,
+       MSTP405, MSTP404, MSTP403, MSTP400,
         MSTP_NR };
  
  #define MSTP(_parent, _reg, _bit, _flags) \
@@ -520,6 +521,7 @@ enum { MSTP001,
  
  static struct clk mstp_clks[MSTP_NR] = {
         [MSTP001] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR0, 1, 0), /* IIC2 */
+       [MSTP000] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR0, 0, 0), /* MSIOF0 */
         [MSTP131] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 31, 0), /* VEU3 */
         [MSTP130] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 30, 0), /* VEU2 */
         [MSTP129] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 29, 0), /* VEU1 */
@@ -538,14 +540,16 @@ static struct clk mstp_clks[MSTP_NR] = {
         [MSTP218] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 18, 0), /* DMAC1 */
         [MSTP217] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 17, 0), /* DMAC2 */
         [MSTP216] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 16, 0), /* DMAC3 */
+       [MSTP214] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 14, 0), /* USBDMAC */
+       [MSTP208] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 8, 0), /* MSIOF1 */
         [MSTP207] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 7, 0), /* SCIFA5 */
         [MSTP206] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 6, 0), /* SCIFB */
+       [MSTP205] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 5, 0), /* MSIOF2 */
         [MSTP204] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 4, 0), /* SCIFA0 */
         [MSTP203] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 3, 0), /* SCIFA1 */
         [MSTP202] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 2, 0), /* SCIFA2 */
         [MSTP201] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 1, 0), /* SCIFA3 */
         [MSTP200] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 0, 0), /* SCIFA4 */
-       [MSTP329] = MSTP(&r_clk, SMSTPCR3, 29, 0), /* CMT10 */
         [MSTP328] = MSTP(&div6_clks[DIV6_SPU], SMSTPCR3, 28, 0), /* FSI2 */
         [MSTP323] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR3, 23, 0), /* IIC1 */
         [MSTP322] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR3, 22, 0), /* USB0 */
@@ -557,8 +561,12 @@ static struct clk mstp_clks[MSTP_NR] = {
         [MSTP413] = MSTP(&pllc1_div2_clk, SMSTPCR4, 13, 0), /* HDMI */
         [MSTP411] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR4, 11, 0), /* IIC3 */
         [MSTP410] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR4, 10, 0), /* IIC4 */
+       [MSTP407] = MSTP(&div4_clks[DIV4_HP], SMSTPCR4, 7, 0), /* USB-DMAC1 */
         [MSTP406] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR4, 6, 0), /* USB1 */
+       [MSTP405] = MSTP(&r_clk, SMSTPCR4, 5, 0), /* CMT4 */
+       [MSTP404] = MSTP(&r_clk, SMSTPCR4, 4, 0), /* CMT3 */
         [MSTP403] = MSTP(&r_clk, SMSTPCR4, 3, 0), /* KEYSC */
+       [MSTP400] = MSTP(&r_clk, SMSTPCR4, 0, 0), /* CMT2 */
  };
  
  static struct clk_lookup lookups[] = {
@@ -609,6 +617,7 @@ static struct clk_lookup lookups[] = {
  
         /* MSTP32 clocks */
         CLKDEV_DEV_ID("i2c-sh_mobile.2", &mstp_clks[MSTP001]), /* IIC2 */
+       CLKDEV_DEV_ID("spi_sh_msiof.0", &mstp_clks[MSTP000]), /* MSIOF0 */
         CLKDEV_DEV_ID("uio_pdrv_genirq.4", &mstp_clks[MSTP131]), /* VEU3 */
         CLKDEV_DEV_ID("uio_pdrv_genirq.3", &mstp_clks[MSTP130]), /* VEU2 */
         CLKDEV_DEV_ID("uio_pdrv_genirq.2", &mstp_clks[MSTP129]), /* VEU1 */
@@ -629,14 +638,16 @@ static struct clk_lookup lookups[] = {
         CLKDEV_DEV_ID("sh-dma-engine.0", &mstp_clks[MSTP218]), /* DMAC1 */
         CLKDEV_DEV_ID("sh-dma-engine.1", &mstp_clks[MSTP217]), /* DMAC2 */
         CLKDEV_DEV_ID("sh-dma-engine.2", &mstp_clks[MSTP216]), /* DMAC3 */
+       CLKDEV_DEV_ID("sh-dma-engine.3", &mstp_clks[MSTP214]), /* USB-DMAC0 */
+       CLKDEV_DEV_ID("spi_sh_msiof.1", &mstp_clks[MSTP208]), /* MSIOF1 */
         CLKDEV_DEV_ID("sh-sci.5", &mstp_clks[MSTP207]), /* SCIFA5 */
         CLKDEV_DEV_ID("sh-sci.6", &mstp_clks[MSTP206]), /* SCIFB */
+       CLKDEV_DEV_ID("spi_sh_msiof.2", &mstp_clks[MSTP205]), /* MSIOF2 */
         CLKDEV_DEV_ID("sh-sci.0", &mstp_clks[MSTP204]), /* SCIFA0 */
         CLKDEV_DEV_ID("sh-sci.1", &mstp_clks[MSTP203]), /* SCIFA1 */
         CLKDEV_DEV_ID("sh-sci.2", &mstp_clks[MSTP202]), /* SCIFA2 */
         CLKDEV_DEV_ID("sh-sci.3", &mstp_clks[MSTP201]), /* SCIFA3 */
         CLKDEV_DEV_ID("sh-sci.4", &mstp_clks[MSTP200]), /* SCIFA4 */
-       CLKDEV_DEV_ID("sh_cmt.10", &mstp_clks[MSTP329]), /* CMT10 */
         CLKDEV_DEV_ID("sh_fsi2", &mstp_clks[MSTP328]), /* FSI2 */
         CLKDEV_DEV_ID("i2c-sh_mobile.1", &mstp_clks[MSTP323]), /* IIC1 */
         CLKDEV_DEV_ID("r8a66597_hcd.0", &mstp_clks[MSTP322]), /* USB0 */
@@ -650,11 +661,17 @@ static struct clk_lookup lookups[] = {
         CLKDEV_DEV_ID("sh-mobile-hdmi", &mstp_clks[MSTP413]), /* HDMI */
         CLKDEV_DEV_ID("i2c-sh_mobile.3", &mstp_clks[MSTP411]), /* IIC3 */
         CLKDEV_DEV_ID("i2c-sh_mobile.4", &mstp_clks[MSTP410]), /* IIC4 */
+       CLKDEV_DEV_ID("sh-dma-engine.4", &mstp_clks[MSTP407]), /* USB-DMAC1 */
         CLKDEV_DEV_ID("r8a66597_hcd.1", &mstp_clks[MSTP406]), /* USB1 */
         CLKDEV_DEV_ID("r8a66597_udc.1", &mstp_clks[MSTP406]), /* USB1 */
         CLKDEV_DEV_ID("renesas_usbhs.1", &mstp_clks[MSTP406]), /* USB1 */
+       CLKDEV_DEV_ID("sh_cmt.4", &mstp_clks[MSTP405]), /* CMT4 */
+       CLKDEV_DEV_ID("sh_cmt.3", &mstp_clks[MSTP404]), /* CMT3 */
         CLKDEV_DEV_ID("sh_keysc.0", &mstp_clks[MSTP403]), /* KEYSC */
+       CLKDEV_DEV_ID("sh_cmt.2", &mstp_clks[MSTP400]), /* CMT2 */
  
+       CLKDEV_ICK_ID("hdmi", "sh_mobile_lcdc_fb.1",
+                     &div6_reparent_clks[DIV6_HDMI]),
         CLKDEV_ICK_ID("ick", "sh-mobile-hdmi", &div6_reparent_clks[DIV6_HDMI]),
         CLKDEV_ICK_ID("icka", "sh_fsi2", &div6_reparent_clks[DIV6_FSIA]),
         CLKDEV_ICK_ID("ickb", "sh_fsi2", &div6_reparent_clks[DIV6_FSIB]),
diff --git a/arch/arm/mach-shmobile/clock-sh73a0.c b/arch/arm/mach-shmobile/clock-sh73a0.c

index 6db2ccabc2bf95fe0cf4d99a90b382d0271d116c..61a846bb30f2034ec3ae69253aea2d2d6d695aa8 100644 (file)
--- a/arch/arm/mach-shmobile/clock-sh73a0.c
+++ b/arch/arm/mach-shmobile/clock-sh73a0.c
@@ -365,7 +365,7 @@ void __init sh73a0_clock_init(void)
         __raw_writel(0x108, SD2CKCR);
  
         /* detect main clock parent */
-       switch ((__raw_readl(CKSCR) >> 24) & 0x03) {
+       switch ((__raw_readl(CKSCR) >> 28) & 0x03) {
         case 0:
                 main_clk.parent = &sh73a0_extal1_clk;
                 break;
diff --git a/arch/arm/mach-shmobile/include/mach/sh7372.h b/arch/arm/mach-shmobile/include/mach/sh7372.h

index ce595cee86cd50e313984131ab5329578b98612d..24e63a85e6699a51660352601ca8d378e0570f36 100644 (file)
--- a/arch/arm/mach-shmobile/include/mach/sh7372.h
+++ b/arch/arm/mach-shmobile/include/mach/sh7372.h
@@ -459,6 +459,10 @@ enum {
         SHDMA_SLAVE_SDHI2_TX,
         SHDMA_SLAVE_MMCIF_RX,
         SHDMA_SLAVE_MMCIF_TX,
+       SHDMA_SLAVE_USB0_TX,
+       SHDMA_SLAVE_USB0_RX,
+       SHDMA_SLAVE_USB1_TX,
+       SHDMA_SLAVE_USB1_RX,
  };
  
  extern struct clk sh7372_extal1_clk;
diff --git a/arch/arm/mach-shmobile/intc-sh7372.c b/arch/arm/mach-shmobile/intc-sh7372.c

index 3b28743c77eb738f502737f434f0aee4c0f7cdf3..739315e30eb9f5c9f5b06d474df30602157dec3c 100644 (file)
--- a/arch/arm/mach-shmobile/intc-sh7372.c
+++ b/arch/arm/mach-shmobile/intc-sh7372.c
@@ -379,7 +379,7 @@ enum {
         /* BBIF2 */
         VPU,
         TSIF1,
-       _3DG_SGX530,
+       /* 3DG */
         _2DDMAC,
         IIC2_ALI2, IIC2_TACKI2, IIC2_WAITI2, IIC2_DTEI2,
         IPMMU_IPMMUR, IPMMU_IPMMUR2,
@@ -436,7 +436,7 @@ static struct intc_vect intcs_vectors[] = {
         /* BBIF2 */
         INTCS_VECT(VPU, 0x980),
         INTCS_VECT(TSIF1, 0x9a0),
-       INTCS_VECT(_3DG_SGX530, 0x9e0),
+       /* 3DG */
         INTCS_VECT(_2DDMAC, 0xa00),
         INTCS_VECT(IIC2_ALI2, 0xa80), INTCS_VECT(IIC2_TACKI2, 0xaa0),
         INTCS_VECT(IIC2_WAITI2, 0xac0), INTCS_VECT(IIC2_DTEI2, 0xae0),
@@ -521,7 +521,7 @@ static struct intc_mask_reg intcs_mask_registers[] = {
             RTDMAC_1_DEI3, RTDMAC_1_DEI2, RTDMAC_1_DEI1, RTDMAC_1_DEI0 } },
         { 0xffd20198, 0xffd201d8, 8, /* IMR6SA / IMCR6SA */
           { 0, 0, MSIOF, 0,
-           _3DG_SGX530, 0, 0, 0 } },
+           0, 0, 0, 0 } },
         { 0xffd2019c, 0xffd201dc, 8, /* IMR7SA / IMCR7SA */
           { 0, TMU_TUNI2, TMU_TUNI1, TMU_TUNI0,
             0, 0, 0, 0 } },
@@ -561,7 +561,6 @@ static struct intc_prio_reg intcs_prio_registers[] = {
                                               TMU_TUNI2, TSIF1 } },
         { 0xffd2001c, 0, 16, 4, /* IPRHS */ { 0, 0, VEU, BEU } },
         { 0xffd20020, 0, 16, 4, /* IPRIS */ { 0, MSIOF, TSIF0, IIC0 } },
-       { 0xffd20024, 0, 16, 4, /* IPRJS */ { 0, _3DG_SGX530, 0, 0 } },
         { 0xffd20028, 0, 16, 4, /* IPRKS */ { 0, 0, LMB, 0 } },
         { 0xffd2002c, 0, 16, 4, /* IPRLS */ { IPMMU, 0, 0, 0 } },
         { 0xffd20030, 0, 16, 4, /* IPRMS */ { IIC2, 0, 0, 0 } },
diff --git a/arch/arm/mach-shmobile/setup-sh7372.c b/arch/arm/mach-shmobile/setup-sh7372.c

index 79f0413d8725cb8af1bbc5e5ba9d91e7f29feca8..2d9b1b1a25387fe1b4605663a5dc2a9135c6f5a5 100644 (file)
--- a/arch/arm/mach-shmobile/setup-sh7372.c
+++ b/arch/arm/mach-shmobile/setup-sh7372.c
@@ -169,35 +169,35 @@ static struct platform_device scif6_device = {
  };
  
  /* CMT */
-static struct sh_timer_config cmt10_platform_data = {
-       .name = "CMT10",
-       .channel_offset = 0x10,
-       .timer_bit = 0,
+static struct sh_timer_config cmt2_platform_data = {
+       .name = "CMT2",
+       .channel_offset = 0x40,
+       .timer_bit = 5,
         .clockevent_rating = 125,
         .clocksource_rating = 125,
  };
  
-static struct resource cmt10_resources[] = {
+static struct resource cmt2_resources[] = {
         [0] = {
-               .name   = "CMT10",
-               .start  = 0xe6138010,
-               .end    = 0xe613801b,
+               .name   = "CMT2",
+               .start  = 0xe6130040,
+               .end    = 0xe613004b,
                 .flags  = IORESOURCE_MEM,
         },
         [1] = {
-               .start  = evt2irq(0x0b00), /* CMT1_CMT10 */
+               .start  = evt2irq(0x0b80), /* CMT2 */
                 .flags  = IORESOURCE_IRQ,
         },
  };
  
-static struct platform_device cmt10_device = {
+static struct platform_device cmt2_device = {
         .name           = "sh_cmt",
-       .id             = 10,
+       .id             = 2,
         .dev = {
-               .platform_data  = &cmt10_platform_data,
+               .platform_data  = &cmt2_platform_data,
         },
-       .resource       = cmt10_resources,
-       .num_resources  = ARRAY_SIZE(cmt10_resources),
+       .resource       = cmt2_resources,
+       .num_resources  = ARRAY_SIZE(cmt2_resources),
  };
  
  /* TMU */
@@ -602,6 +602,150 @@ static struct platform_device dma2_device = {
         },
  };
  
+/*
+ * USB-DMAC
+ */
+
+unsigned int usbts_shift[] = {3, 4, 5};
+
+enum {
+       XMIT_SZ_8BYTE           = 0,
+       XMIT_SZ_16BYTE          = 1,
+       XMIT_SZ_32BYTE          = 2,
+};
+
+#define USBTS_INDEX2VAL(i) (((i) & 3) << 6)
+
+static const struct sh_dmae_channel sh7372_usb_dmae_channels[] = {
+       {
+               .offset = 0,
+       }, {
+               .offset = 0x20,
+       },
+};
+
+/* USB DMAC0 */
+static const struct sh_dmae_slave_config sh7372_usb_dmae0_slaves[] = {
+       {
+               .slave_id       = SHDMA_SLAVE_USB0_TX,
+               .chcr           = USBTS_INDEX2VAL(XMIT_SZ_8BYTE),
+       }, {
+               .slave_id       = SHDMA_SLAVE_USB0_RX,
+               .chcr           = USBTS_INDEX2VAL(XMIT_SZ_8BYTE),
+       },
+};
+
+static struct sh_dmae_pdata usb_dma0_platform_data = {
+       .slave          = sh7372_usb_dmae0_slaves,
+       .slave_num      = ARRAY_SIZE(sh7372_usb_dmae0_slaves),
+       .channel        = sh7372_usb_dmae_channels,
+       .channel_num    = ARRAY_SIZE(sh7372_usb_dmae_channels),
+       .ts_low_shift   = 6,
+       .ts_low_mask    = 0xc0,
+       .ts_high_shift  = 0,
+       .ts_high_mask   = 0,
+       .ts_shift       = usbts_shift,
+       .ts_shift_num   = ARRAY_SIZE(usbts_shift),
+       .dmaor_init     = DMAOR_DME,
+       .chcr_offset    = 0x14,
+       .chcr_ie_bit    = 1 << 5,
+       .dmaor_is_32bit = 1,
+       .needs_tend_set = 1,
+       .no_dmars       = 1,
+};
+
+static struct resource sh7372_usb_dmae0_resources[] = {
+       {
+               /* Channel registers and DMAOR */
+               .start  = 0xe68a0020,
+               .end    = 0xe68a0064 - 1,
+               .flags  = IORESOURCE_MEM,
+       },
+       {
+               /* VCR/SWR/DMICR */
+               .start  = 0xe68a0000,
+               .end    = 0xe68a0014 - 1,
+               .flags  = IORESOURCE_MEM,
+       },
+       {
+               /* IRQ for channels */
+               .start  = evt2irq(0x0a00),
+               .end    = evt2irq(0x0a00),
+               .flags  = IORESOURCE_IRQ,
+       },
+};
+
+static struct platform_device usb_dma0_device = {
+       .name           = "sh-dma-engine",
+       .id             = 3,
+       .resource       = sh7372_usb_dmae0_resources,
+       .num_resources  = ARRAY_SIZE(sh7372_usb_dmae0_resources),
+       .dev            = {
+               .platform_data  = &usb_dma0_platform_data,
+       },
+};
+
+/* USB DMAC1 */
+static const struct sh_dmae_slave_config sh7372_usb_dmae1_slaves[] = {
+       {
+               .slave_id       = SHDMA_SLAVE_USB1_TX,
+               .chcr           = USBTS_INDEX2VAL(XMIT_SZ_8BYTE),
+       }, {
+               .slave_id       = SHDMA_SLAVE_USB1_RX,
+               .chcr           = USBTS_INDEX2VAL(XMIT_SZ_8BYTE),
+       },
+};
+
+static struct sh_dmae_pdata usb_dma1_platform_data = {
+       .slave          = sh7372_usb_dmae1_slaves,
+       .slave_num      = ARRAY_SIZE(sh7372_usb_dmae1_slaves),
+       .channel        = sh7372_usb_dmae_channels,
+       .channel_num    = ARRAY_SIZE(sh7372_usb_dmae_channels),
+       .ts_low_shift   = 6,
+       .ts_low_mask    = 0xc0,
+       .ts_high_shift  = 0,
+       .ts_high_mask   = 0,
+       .ts_shift       = usbts_shift,
+       .ts_shift_num   = ARRAY_SIZE(usbts_shift),
+       .dmaor_init     = DMAOR_DME,
+       .chcr_offset    = 0x14,
+       .chcr_ie_bit    = 1 << 5,
+       .dmaor_is_32bit = 1,
+       .needs_tend_set = 1,
+       .no_dmars       = 1,
+};
+
+static struct resource sh7372_usb_dmae1_resources[] = {
+       {
+               /* Channel registers and DMAOR */
+               .start  = 0xe68c0020,
+               .end    = 0xe68c0064 - 1,
+               .flags  = IORESOURCE_MEM,
+       },
+       {
+               /* VCR/SWR/DMICR */
+               .start  = 0xe68c0000,
+               .end    = 0xe68c0014 - 1,
+               .flags  = IORESOURCE_MEM,
+       },
+       {
+               /* IRQ for channels */
+               .start  = evt2irq(0x1d00),
+               .end    = evt2irq(0x1d00),
+               .flags  = IORESOURCE_IRQ,
+       },
+};
+
+static struct platform_device usb_dma1_device = {
+       .name           = "sh-dma-engine",
+       .id             = 4,
+       .resource       = sh7372_usb_dmae1_resources,
+       .num_resources  = ARRAY_SIZE(sh7372_usb_dmae1_resources),
+       .dev            = {
+               .platform_data  = &usb_dma1_platform_data,
+       },
+};
+
  /* VPU */
  static struct uio_info vpu_platform_data = {
         .name = "VPU5HG",
@@ -818,7 +962,7 @@ static struct platform_device *sh7372_early_devices[] __initdata = {
         &scif4_device,
         &scif5_device,
         &scif6_device,
-       &cmt10_device,
+       &cmt2_device,
         &tmu00_device,
         &tmu01_device,
  };
@@ -829,6 +973,8 @@ static struct platform_device *sh7372_late_devices[] __initdata = {
         &dma0_device,
         &dma1_device,
         &dma2_device,
+       &usb_dma0_device,
+       &usb_dma1_device,
         &vpu_device,
         &veu0_device,
         &veu1_device,
diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c

index 9e6b93b1a04342e7883aa241ddecb02856818a0e..d0d267a8d3f927dc0af84ebbccde640fa46ebafa 100644 (file)
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -318,6 +318,10 @@ static struct clk v2m_sp804_clk = {
         .rate   = 1000000,
  };
  
+static struct clk v2m_ref_clk = {
+       .rate   = 32768,
+};
+
  static struct clk dummy_apb_pclk;
  
  static struct clk_lookup v2m_lookups[] = {
@@ -348,6 +352,9 @@ static struct clk_lookup v2m_lookups[] = {
         }, {    /* CLCD */
                 .dev_id         = "mb:clcd",
                 .clk            = &osc1_clk,
+       }, {    /* SP805 WDT */
+               .dev_id         = "mb:wdt",
+               .clk            = &v2m_ref_clk,
         }, {    /* SP804 timers */
                 .dev_id         = "sp804",
                 .con_id         = "v2m-timer0",
diff --git a/arch/arm/mm/abort-macro.S b/arch/arm/mm/abort-macro.S

index 52162d59407a4679eee5fce0f4d848a62b2ea182..2cbf68ef0e8321121e5ecabb55f50f95083beb1d 100644 (file)
--- a/arch/arm/mm/abort-macro.S
+++ b/arch/arm/mm/abort-macro.S
@@ -17,7 +17,7 @@
         cmp     \tmp, # 0x5600                  @ Is it ldrsb?
         orreq   \tmp, \tmp, #1 << 11            @ Set L-bit if yes
         tst     \tmp, #1 << 11                  @ L = 0 -> write
-       orreq   \psr, \psr, #1 << 11            @ yes.
+       orreq   \fsr, \fsr, #1 << 11            @ yes.
         b       do_DataAbort
  not_thumb:
         .endm
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c

index be7c638b648bb9771665719adbb2c7e74b5aafef..cfbcf8b9559955a9919a5f7732afa826d516e28b 100644 (file)
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -22,6 +22,7 @@
  #include <linux/sched.h>
  #include <linux/uaccess.h>
  
+#include <asm/system.h>
  #include <asm/unaligned.h>
  
  #include "fault.h"
@@ -95,6 +96,33 @@ static const char *usermode_action[] = {
         "signal+warn"
  };
  
+/* Return true if and only if the ARMv6 unaligned access model is in use. */
+static bool cpu_is_v6_unaligned(void)
+{
+       return cpu_architecture() >= CPU_ARCH_ARMv6 && (cr_alignment & CR_U);
+}
+
+static int safe_usermode(int new_usermode, bool warn)
+{
+       /*
+        * ARMv6 and later CPUs can perform unaligned accesses for
+        * most single load and store instructions up to word size.
+        * LDM, STM, LDRD and STRD still need to be handled.
+        *
+        * Ignoring the alignment fault is not an option on these
+        * CPUs since we spin re-faulting the instruction without
+        * making any progress.
+        */
+       if (cpu_is_v6_unaligned() && !(new_usermode & (UM_FIXUP | UM_SIGNAL))) {
+               new_usermode |= UM_FIXUP;
+
+               if (warn)
+                       printk(KERN_WARNING "alignment: ignoring faults is unsafe on this CPU.  Defaulting to fixup mode.\n");
+       }
+
+       return new_usermode;
+}
+
  static int alignment_proc_show(struct seq_file *m, void *v)
  {
         seq_printf(m, "User:\t\t%lu\n", ai_user);
@@ -125,7 +153,7 @@ static ssize_t alignment_proc_write(struct file *file, const char __user *buffer
                 if (get_user(mode, buffer))
                         return -EFAULT;
                 if (mode >= '0' && mode <= '5')
-                       ai_usermode = mode - '0';
+                       ai_usermode = safe_usermode(mode - '0', true);
         }
         return count;
  }
@@ -886,9 +914,16 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
         if (ai_usermode & UM_FIXUP)
                 goto fixup;
  
-       if (ai_usermode & UM_SIGNAL)
-               force_sig(SIGBUS, current);
-       else {
+       if (ai_usermode & UM_SIGNAL) {
+               siginfo_t si;
+
+               si.si_signo = SIGBUS;
+               si.si_errno = 0;
+               si.si_code = BUS_ADRALN;
+               si.si_addr = (void __user *)addr;
+
+               force_sig_info(si.si_signo, &si, current);
+       } else {
                 /*
                  * We're about to disable the alignment trap and return to
                  * user space.  But if an interrupt occurs before actually
@@ -926,20 +961,11 @@ static int __init alignment_init(void)
                 return -ENOMEM;
  #endif
  
-       /*
-        * ARMv6 and later CPUs can perform unaligned accesses for
-        * most single load and store instructions up to word size.
-        * LDM, STM, LDRD and STRD still need to be handled.
-        *
-        * Ignoring the alignment fault is not an option on these
-        * CPUs since we spin re-faulting the instruction without
-        * making any progress.
-        */
-       if (cpu_architecture() >= CPU_ARCH_ARMv6 && (cr_alignment & CR_U)) {
+       if (cpu_is_v6_unaligned()) {
                 cr_alignment &= ~CR_A;
                 cr_no_alignment &= ~CR_A;
                 set_cr(cr_alignment);
-               ai_usermode = UM_FIXUP;
+               ai_usermode = safe_usermode(ai_usermode, false);
         }
  
         hook_fault_code(1, do_alignment, SIGBUS, BUS_ADRALN,
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c

index 44c086710d2ba5d21a40741290ef8c40de35928c..9ecfdb5119513b632154f9812b4cd490de24efc5 100644 (file)
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -277,6 +277,25 @@ static void l2x0_disable(void)
         spin_unlock_irqrestore(&l2x0_lock, flags);
  }
  
+static void __init l2x0_unlock(__u32 cache_id)
+{
+       int lockregs;
+       int i;
+
+       if (cache_id == L2X0_CACHE_ID_PART_L310)
+               lockregs = 8;
+       else
+               /* L210 and unknown types */
+               lockregs = 1;
+
+       for (i = 0; i < lockregs; i++) {
+               writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_D_BASE +
+                              i * L2X0_LOCKDOWN_STRIDE);
+               writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_I_BASE +
+                              i * L2X0_LOCKDOWN_STRIDE);
+       }
+}
+
  void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask)
  {
         __u32 aux;
@@ -328,6 +347,8 @@ void __init l2x0_init(void __iomem *base, __u32 aux_val, __u32 aux_mask)
          * accessing the below registers will fault.
          */
         if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & 1)) {
+               /* Make sure that I&D is not locked down when starting */
+               l2x0_unlock(cache_id);
  
                 /* l2x0 controller is disabled */
                 writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL);
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c

index 2fee782077c1ac5e7043efed88ad971f779bfa07..cc7e2d8be9aa6f55cd1f670c998e1c065acec75e 100644 (file)
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -298,7 +298,7 @@ static void __init arm_bootmem_free(unsigned long min, unsigned long max_low,
  #ifdef CONFIG_HAVE_ARCH_PFN_VALID
  int pfn_valid(unsigned long pfn)
  {
-       return memblock_is_memory(pfn << PAGE_SHIFT);
+       return memblock_is_memory(__pfn_to_phys(pfn));
  }
  EXPORT_SYMBOL(pfn_valid);
  #endif
@@ -441,7 +441,7 @@ static inline int free_area(unsigned long pfn, unsigned long end, char *s)
  static inline void poison_init_mem(void *s, size_t count)
  {
         u32 *p = (u32 *)s;
-       while ((count = count - 4))
+       for (; count != 0; count -= 4)
                 *p++ = 0xe7fddef0;
  }
  
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S

index 92bd102e39822f182db96b6b4900dc62df0c014d..2e6849b41f6696170226e6181f24fa96cd3fc91d 100644 (file)
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -379,7 +379,7 @@ ENTRY(cpu_arm920_set_pte_ext)
  
  /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */
  .globl cpu_arm920_suspend_size
-.equ   cpu_arm920_suspend_size, 4 * 3
+.equ   cpu_arm920_suspend_size, 4 * 4
  #ifdef CONFIG_PM_SLEEP
  ENTRY(cpu_arm920_do_suspend)
         stmfd   sp!, {r4 - r7, lr}
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S

index 2bbcf053dffd26054dceb5a56af537622c8ec843..cd8f79c3a282a05f3283b14239dee687c3cd212d 100644 (file)
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -394,7 +394,7 @@ ENTRY(cpu_arm926_set_pte_ext)
  
  /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */
  .globl cpu_arm926_suspend_size
-.equ   cpu_arm926_suspend_size, 4 * 3
+.equ   cpu_arm926_suspend_size, 4 * 4
  #ifdef CONFIG_PM_SLEEP
  ENTRY(cpu_arm926_do_suspend)
         stmfd   sp!, {r4 - r7, lr}
diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S

index f8f7ea34bfc57f6429d7a25c196ed555a5567394..683af3a182b7e8bea1138ffa4f977b3f0828af61 100644 (file)
--- a/arch/arm/mm/proc-arm946.S
+++ b/arch/arm/mm/proc-arm946.S
@@ -410,6 +410,7 @@ __arm946_proc_info:
         .long   0x41009460
         .long   0xff00fff0
         .long   0
+       .long   0
         b       __arm946_setup
         .long   cpu_arch_name
         .long   cpu_elf_name
@@ -418,6 +419,6 @@ __arm946_proc_info:
         .long   arm946_processor_functions
         .long   0
         .long   0
-       .long   arm940_cache_fns
+       .long   arm946_cache_fns
         .size   __arm946_proc_info, . - __arm946_proc_info
  
diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S

index 07219c2ae114779c8015da0640f69fb69d63b925..69e7f2ef7384410e0eb476cb7040a836b981ea19 100644 (file)
--- a/arch/arm/mm/proc-sa1100.S
+++ b/arch/arm/mm/proc-sa1100.S
@@ -182,11 +182,11 @@ ENDPROC(cpu_sa1100_do_suspend)
  
  ENTRY(cpu_sa1100_do_resume)
         ldmia   r0, {r4 - r7}                   @ load cp regs
-       mov     r1, #0
-       mcr     p15, 0, r1, c8, c7, 0           @ flush I+D TLBs
-       mcr     p15, 0, r1, c7, c7, 0           @ flush I&D cache
-       mcr     p15, 0, r1, c9, c0, 0           @ invalidate RB
-       mcr     p15, 0, r1, c9, c0, 5           @ allow user space to use RB
+       mov     ip, #0
+       mcr     p15, 0, ip, c8, c7, 0           @ flush I+D TLBs
+       mcr     p15, 0, ip, c7, c7, 0           @ flush I&D cache
+       mcr     p15, 0, ip, c9, c0, 0           @ invalidate RB
+       mcr     p15, 0, ip, c9, c0, 5           @ allow user space to use RB
  
         mcr     p15, 0, r4, c3, c0, 0           @ domain ID
         mcr     p15, 0, r5, c2, c0, 0           @ translation table base addr
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S

index 219138d2f158843059fbfec5373e0dbf275dbab2..a923aa0fd00dd65fb977aad23145d514e3836b13 100644 (file)
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -223,6 +223,22 @@ __v6_setup:
         mrc     p15, 0, r0, c1, c0, 0           @ read control register
         bic     r0, r0, r5                      @ clear bits them
         orr     r0, r0, r6                      @ set them
+#ifdef CONFIG_ARM_ERRATA_364296
+       /*
+        * Workaround for the 364296 ARM1136 r0p2 erratum (possible cache data
+        * corruption with hit-under-miss enabled). The conditional code below
+        * (setting the undocumented bit 31 in the auxiliary control register
+        * and the FI bit in the control register) disables hit-under-miss
+        * without putting the processor into full low interrupt latency mode.
+        */
+       ldr     r6, =0x4107b362                 @ id for ARM1136 r0p2
+       mrc     p15, 0, r5, c0, c0, 0           @ get processor id
+       teq     r5, r6                          @ check for the faulty core
+       mrceq   p15, 0, r5, c1, c0, 1           @ load aux control reg
+       orreq   r5, r5, #(1 << 31)              @ set the undocumented bit 31
+       mcreq   p15, 0, r5, c1, c0, 1           @ write aux control reg
+       orreq   r0, r0, #(1 << 21)              @ low interrupt latency configuration
+#endif
         mov     pc, lr                          @ return to head.S:__ret
  
         /*
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S

index a30e78542ccf3201fa07e1dee6534552821dfa97..9049c0764db271cefac57824191db79e98b6df0c 100644 (file)
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -66,6 +66,7 @@ ENDPROC(cpu_v7_proc_fin)
  ENTRY(cpu_v7_reset)
         mrc     p15, 0, r1, c1, c0, 0           @ ctrl register
         bic     r1, r1, #0x1                    @ ...............m
+ THUMB(        bic     r1, r1, #1 << 30 )              @ SCTLR.TE (Thumb exceptions)
         mcr     p15, 0, r1, c1, c0, 0           @ disable MMU
         isb
         mov     pc, r0
@@ -247,13 +248,16 @@ ENTRY(cpu_v7_do_resume)
         mcr     p15, 0, r7, c2, c0, 0   @ TTB 0
         mcr     p15, 0, r8, c2, c0, 1   @ TTB 1
         mcr     p15, 0, ip, c2, c0, 2   @ TTB control register
-       mcr     p15, 0, r10, c1, c0, 1  @ Auxiliary control register
+       mrc     p15, 0, r4, c1, c0, 1   @ Read Auxiliary control register
+       teq     r4, r10                 @ Is it already set?
+       mcrne   p15, 0, r10, c1, c0, 1  @ No, so write it
         mcr     p15, 0, r11, c1, c0, 2  @ Co-processor access control
         ldr     r4, =PRRR               @ PRRR
         ldr     r5, =NMRR               @ NMRR
         mcr     p15, 0, r4, c10, c2, 0  @ write PRRR
         mcr     p15, 0, r5, c10, c2, 1  @ write NMRR
         isb
+       dsb
         mov     r0, r9                  @ control register
         mov     r2, r7, lsr #14         @ get TTB0 base
         mov     r2, r2, lsl #14
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S

index 28c72a2006a1a9c306615de796113feb61b24792..755e1bf22681270e6071c6e64a98b76b654c4432 100644 (file)
--- a/arch/arm/mm/proc-xsc3.S
+++ b/arch/arm/mm/proc-xsc3.S
@@ -406,7 +406,7 @@ ENTRY(cpu_xsc3_set_pte_ext)
         .align
  
  .globl cpu_xsc3_suspend_size
-.equ   cpu_xsc3_suspend_size, 4 * 8
+.equ   cpu_xsc3_suspend_size, 4 * 7
  #ifdef CONFIG_PM_SLEEP
  ENTRY(cpu_xsc3_do_suspend)
         stmfd   sp!, {r4 - r10, lr}
@@ -418,12 +418,12 @@ ENTRY(cpu_xsc3_do_suspend)
         mrc     p15, 0, r9, c1, c0, 1   @ auxiliary control reg
         mrc     p15, 0, r10, c1, c0, 0  @ control reg
         bic     r4, r4, #2              @ clear frequency change bit
-       stmia   r0, {r1, r4 - r10}      @ store v:p offset + cp regs
+       stmia   r0, {r4 - r10}          @ store cp regs
         ldmia   sp!, {r4 - r10, pc}
  ENDPROC(cpu_xsc3_do_suspend)
  
  ENTRY(cpu_xsc3_do_resume)
-       ldmia   r0, {r1, r4 - r10}      @ load v:p offset + cp regs
+       ldmia   r0, {r4 - r10}          @ load cp regs
         mov     ip, #0
         mcr     p15, 0, ip, c7, c7, 0   @ invalidate I & D caches, BTB
         mcr     p15, 0, ip, c7, c10, 4  @ drain write (&fill) buffer
diff --git a/arch/arm/plat-mxc/include/mach/debug-macro.S b/arch/arm/plat-mxc/include/mach/debug-macro.S

index 91fc7cdb5dc97309b143aae6f70c72a106977208..e4dde91f023103b64b0d78c8e9b70fbc4a284713 100644 (file)
--- a/arch/arm/plat-mxc/include/mach/debug-macro.S
+++ b/arch/arm/plat-mxc/include/mach/debug-macro.S
@@ -44,6 +44,14 @@
  #define UART_PADDR     MX51_UART1_BASE_ADDR
  #endif
  
+/* iMX50/53 have same addresses, but not iMX51 */
+#if defined(CONFIG_SOC_IMX50) || defined(CONFIG_SOC_IMX53)
+#ifdef UART_PADDR
+#error "CONFIG_DEBUG_LL is incompatible with multiple archs"
+#endif
+#define UART_PADDR     MX53_UART1_BASE_ADDR
+#endif
+
  #define UART_VADDR     IMX_IO_ADDRESS(UART_PADDR)
  
                 .macro  addruart, rp, rv
diff --git a/arch/arm/plat-mxc/include/mach/iomux-mx53.h b/arch/arm/plat-mxc/include/mach/iomux-mx53.h

index 9440b9e00e89c1536ce3d2e8d7ecbb55a5bbfd99..5408fd1fc7368b094612c7701ac55a02ab097232 100644 (file)
--- a/arch/arm/plat-mxc/include/mach/iomux-mx53.h
+++ b/arch/arm/plat-mxc/include/mach/iomux-mx53.h
@@ -30,6 +30,9 @@
  #define MX53_SDHC_PAD_CTRL     (PAD_CTL_HYS | PAD_CTL_PKE | PAD_CTL_PUE | \
                                 PAD_CTL_PUS_47K_UP | PAD_CTL_DSE_HIGH | \
                                 PAD_CTL_SRE_FAST)
+#define PAD_CTRL_I2C   (PAD_CTL_SRE_FAST | PAD_CTL_ODE | PAD_CTL_PKE | \
+                       PAD_CTL_PUE | PAD_CTL_DSE_HIGH | PAD_CTL_PUS_100K_UP \
+                       | PAD_CTL_HYS)
  
  #define _MX53_PAD_GPIO_19__KPP_COL_5           IOMUX_PAD(0x348, 0x20, 0, 0x840, 0, 0)
  #define _MX53_PAD_GPIO_19__GPIO4_5             IOMUX_PAD(0x348, 0x20, 1, 0x0, 0, 0)
@@ -1256,7 +1259,7 @@
  #define MX53_PAD_KEY_COL3__GPIO4_12            (_MX53_PAD_KEY_COL3__GPIO4_12 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_KEY_COL3__USBOH3_H2_DP                (_MX53_PAD_KEY_COL3__USBOH3_H2_DP | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_KEY_COL3__SPDIF_IN1           (_MX53_PAD_KEY_COL3__SPDIF_IN1 | MUX_PAD_CTRL(NO_PAD_CTRL))
-#define MX53_PAD_KEY_COL3__I2C2_SCL            (_MX53_PAD_KEY_COL3__I2C2_SCL | MUX_PAD_CTRL(NO_PAD_CTRL))
+#define MX53_PAD_KEY_COL3__I2C2_SCL            (_MX53_PAD_KEY_COL3__I2C2_SCL | MUX_PAD_CTRL(PAD_CTRL_I2C))
  #define MX53_PAD_KEY_COL3__ECSPI1_SS3          (_MX53_PAD_KEY_COL3__ECSPI1_SS3 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_KEY_COL3__FEC_CRS             (_MX53_PAD_KEY_COL3__FEC_CRS | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_KEY_COL3__USBPHY1_SIECLOCK            (_MX53_PAD_KEY_COL3__USBPHY1_SIECLOCK | MUX_PAD_CTRL(NO_PAD_CTRL))
@@ -1264,7 +1267,7 @@
  #define MX53_PAD_KEY_ROW3__GPIO4_13            (_MX53_PAD_KEY_ROW3__GPIO4_13 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_KEY_ROW3__USBOH3_H2_DM                (_MX53_PAD_KEY_ROW3__USBOH3_H2_DM | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_KEY_ROW3__CCM_ASRC_EXT_CLK            (_MX53_PAD_KEY_ROW3__CCM_ASRC_EXT_CLK | MUX_PAD_CTRL(NO_PAD_CTRL))
-#define MX53_PAD_KEY_ROW3__I2C2_SDA            (_MX53_PAD_KEY_ROW3__I2C2_SDA | MUX_PAD_CTRL(NO_PAD_CTRL))
+#define MX53_PAD_KEY_ROW3__I2C2_SDA            (_MX53_PAD_KEY_ROW3__I2C2_SDA | MUX_PAD_CTRL(PAD_CTRL_I2C))
  #define MX53_PAD_KEY_ROW3__OSC32K_32K_OUT              (_MX53_PAD_KEY_ROW3__OSC32K_32K_OUT | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_KEY_ROW3__CCM_PLL4_BYP                (_MX53_PAD_KEY_ROW3__CCM_PLL4_BYP | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_KEY_ROW3__USBPHY1_LINESTATE_0         (_MX53_PAD_KEY_ROW3__USBPHY1_LINESTATE_0 | MUX_PAD_CTRL(NO_PAD_CTRL))
@@ -1536,7 +1539,7 @@
  #define MX53_PAD_CSI0_DAT8__KPP_COL_7          (_MX53_PAD_CSI0_DAT8__KPP_COL_7 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_CSI0_DAT8__ECSPI2_SCLK                (_MX53_PAD_CSI0_DAT8__ECSPI2_SCLK | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_CSI0_DAT8__USBOH3_USBH3_OC            (_MX53_PAD_CSI0_DAT8__USBOH3_USBH3_OC | MUX_PAD_CTRL(NO_PAD_CTRL))
-#define MX53_PAD_CSI0_DAT8__I2C1_SDA           (_MX53_PAD_CSI0_DAT8__I2C1_SDA | MUX_PAD_CTRL(NO_PAD_CTRL))
+#define MX53_PAD_CSI0_DAT8__I2C1_SDA           (_MX53_PAD_CSI0_DAT8__I2C1_SDA | MUX_PAD_CTRL(PAD_CTRL_I2C))
  #define MX53_PAD_CSI0_DAT8__EMI_EMI_DEBUG_37           (_MX53_PAD_CSI0_DAT8__EMI_EMI_DEBUG_37 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_CSI0_DAT8__TPIU_TRACE_5               (_MX53_PAD_CSI0_DAT8__TPIU_TRACE_5 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_CSI0_DAT9__IPU_CSI0_D_9               (_MX53_PAD_CSI0_DAT9__IPU_CSI0_D_9 | MUX_PAD_CTRL(NO_PAD_CTRL))
@@ -1544,7 +1547,7 @@
  #define MX53_PAD_CSI0_DAT9__KPP_ROW_7          (_MX53_PAD_CSI0_DAT9__KPP_ROW_7 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_CSI0_DAT9__ECSPI2_MOSI                (_MX53_PAD_CSI0_DAT9__ECSPI2_MOSI | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_CSI0_DAT9__USBOH3_USBH3_PWR           (_MX53_PAD_CSI0_DAT9__USBOH3_USBH3_PWR | MUX_PAD_CTRL(NO_PAD_CTRL))
-#define MX53_PAD_CSI0_DAT9__I2C1_SCL           (_MX53_PAD_CSI0_DAT9__I2C1_SCL | MUX_PAD_CTRL(NO_PAD_CTRL))
+#define MX53_PAD_CSI0_DAT9__I2C1_SCL           (_MX53_PAD_CSI0_DAT9__I2C1_SCL | MUX_PAD_CTRL(PAD_CTRL_I2C))
  #define MX53_PAD_CSI0_DAT9__EMI_EMI_DEBUG_38           (_MX53_PAD_CSI0_DAT9__EMI_EMI_DEBUG_38 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_CSI0_DAT9__TPIU_TRACE_6               (_MX53_PAD_CSI0_DAT9__TPIU_TRACE_6 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_CSI0_DAT10__IPU_CSI0_D_10             (_MX53_PAD_CSI0_DAT10__IPU_CSI0_D_10 | MUX_PAD_CTRL(NO_PAD_CTRL))
@@ -1631,25 +1634,25 @@
  #define MX53_PAD_EIM_EB2__CCM_DI1_EXT_CLK              (_MX53_PAD_EIM_EB2__CCM_DI1_EXT_CLK | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_EIM_EB2__IPU_SER_DISP1_CS             (_MX53_PAD_EIM_EB2__IPU_SER_DISP1_CS | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_EIM_EB2__ECSPI1_SS0           (_MX53_PAD_EIM_EB2__ECSPI1_SS0 | MUX_PAD_CTRL(NO_PAD_CTRL))
-#define MX53_PAD_EIM_EB2__I2C2_SCL             (_MX53_PAD_EIM_EB2__I2C2_SCL | MUX_PAD_CTRL(NO_PAD_CTRL))
+#define MX53_PAD_EIM_EB2__I2C2_SCL             (_MX53_PAD_EIM_EB2__I2C2_SCL | MUX_PAD_CTRL(PAD_CTRL_I2C))
  #define MX53_PAD_EIM_D16__EMI_WEIM_D_16                (_MX53_PAD_EIM_D16__EMI_WEIM_D_16 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_EIM_D16__GPIO3_16             (_MX53_PAD_EIM_D16__GPIO3_16 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_EIM_D16__IPU_DI0_PIN5         (_MX53_PAD_EIM_D16__IPU_DI0_PIN5 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_EIM_D16__IPU_DISPB1_SER_CLK           (_MX53_PAD_EIM_D16__IPU_DISPB1_SER_CLK | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_EIM_D16__ECSPI1_SCLK          (_MX53_PAD_EIM_D16__ECSPI1_SCLK | MUX_PAD_CTRL(NO_PAD_CTRL))
-#define MX53_PAD_EIM_D16__I2C2_SDA             (_MX53_PAD_EIM_D16__I2C2_SDA | MUX_PAD_CTRL(NO_PAD_CTRL))
+#define MX53_PAD_EIM_D16__I2C2_SDA             (_MX53_PAD_EIM_D16__I2C2_SDA | MUX_PAD_CTRL(PAD_CTRL_I2C))
  #define MX53_PAD_EIM_D17__EMI_WEIM_D_17                (_MX53_PAD_EIM_D17__EMI_WEIM_D_17 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_EIM_D17__GPIO3_17             (_MX53_PAD_EIM_D17__GPIO3_17 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_EIM_D17__IPU_DI0_PIN6         (_MX53_PAD_EIM_D17__IPU_DI0_PIN6 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_EIM_D17__IPU_DISPB1_SER_DIN           (_MX53_PAD_EIM_D17__IPU_DISPB1_SER_DIN | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_EIM_D17__ECSPI1_MISO          (_MX53_PAD_EIM_D17__ECSPI1_MISO | MUX_PAD_CTRL(NO_PAD_CTRL))
-#define MX53_PAD_EIM_D17__I2C3_SCL             (_MX53_PAD_EIM_D17__I2C3_SCL | MUX_PAD_CTRL(NO_PAD_CTRL))
+#define MX53_PAD_EIM_D17__I2C3_SCL             (_MX53_PAD_EIM_D17__I2C3_SCL | MUX_PAD_CTRL(PAD_CTRL_I2C))
  #define MX53_PAD_EIM_D18__EMI_WEIM_D_18                (_MX53_PAD_EIM_D18__EMI_WEIM_D_18 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_EIM_D18__GPIO3_18             (_MX53_PAD_EIM_D18__GPIO3_18 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_EIM_D18__IPU_DI0_PIN7         (_MX53_PAD_EIM_D18__IPU_DI0_PIN7 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_EIM_D18__IPU_DISPB1_SER_DIO           (_MX53_PAD_EIM_D18__IPU_DISPB1_SER_DIO | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_EIM_D18__ECSPI1_MOSI          (_MX53_PAD_EIM_D18__ECSPI1_MOSI | MUX_PAD_CTRL(NO_PAD_CTRL))
-#define MX53_PAD_EIM_D18__I2C3_SDA             (_MX53_PAD_EIM_D18__I2C3_SDA | MUX_PAD_CTRL(NO_PAD_CTRL))
+#define MX53_PAD_EIM_D18__I2C3_SDA             (_MX53_PAD_EIM_D18__I2C3_SDA | MUX_PAD_CTRL(PAD_CTRL_I2C))
  #define MX53_PAD_EIM_D18__IPU_DI1_D0_CS                (_MX53_PAD_EIM_D18__IPU_DI1_D0_CS | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_EIM_D19__EMI_WEIM_D_19                (_MX53_PAD_EIM_D19__EMI_WEIM_D_19 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_EIM_D19__GPIO3_19             (_MX53_PAD_EIM_D19__GPIO3_19 | MUX_PAD_CTRL(NO_PAD_CTRL))
@@ -1672,7 +1675,7 @@
  #define MX53_PAD_EIM_D21__IPU_DI0_PIN17                (_MX53_PAD_EIM_D21__IPU_DI0_PIN17 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_EIM_D21__IPU_DISPB0_SER_CLK           (_MX53_PAD_EIM_D21__IPU_DISPB0_SER_CLK | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_EIM_D21__CSPI_SCLK            (_MX53_PAD_EIM_D21__CSPI_SCLK | MUX_PAD_CTRL(NO_PAD_CTRL))
-#define MX53_PAD_EIM_D21__I2C1_SCL             (_MX53_PAD_EIM_D21__I2C1_SCL | MUX_PAD_CTRL(NO_PAD_CTRL))
+#define MX53_PAD_EIM_D21__I2C1_SCL             (_MX53_PAD_EIM_D21__I2C1_SCL | MUX_PAD_CTRL(PAD_CTRL_I2C))
  #define MX53_PAD_EIM_D21__USBOH3_USBOTG_OC             (_MX53_PAD_EIM_D21__USBOH3_USBOTG_OC | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_EIM_D22__EMI_WEIM_D_22                (_MX53_PAD_EIM_D22__EMI_WEIM_D_22 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_EIM_D22__GPIO3_22             (_MX53_PAD_EIM_D22__GPIO3_22 | MUX_PAD_CTRL(NO_PAD_CTRL))
@@ -1732,7 +1735,7 @@
  #define MX53_PAD_EIM_D28__UART2_CTS            (_MX53_PAD_EIM_D28__UART2_CTS | MUX_PAD_CTRL(MX53_UART_PAD_CTRL))
  #define MX53_PAD_EIM_D28__IPU_DISPB0_SER_DIO           (_MX53_PAD_EIM_D28__IPU_DISPB0_SER_DIO | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_EIM_D28__CSPI_MOSI            (_MX53_PAD_EIM_D28__CSPI_MOSI | MUX_PAD_CTRL(NO_PAD_CTRL))
-#define MX53_PAD_EIM_D28__I2C1_SDA             (_MX53_PAD_EIM_D28__I2C1_SDA | MUX_PAD_CTRL(NO_PAD_CTRL))
+#define MX53_PAD_EIM_D28__I2C1_SDA             (_MX53_PAD_EIM_D28__I2C1_SDA | MUX_PAD_CTRL(PAD_CTRL_I2C))
  #define MX53_PAD_EIM_D28__IPU_EXT_TRIG         (_MX53_PAD_EIM_D28__IPU_EXT_TRIG | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_EIM_D28__IPU_DI0_PIN13                (_MX53_PAD_EIM_D28__IPU_DI0_PIN13 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_EIM_D29__EMI_WEIM_D_29                (_MX53_PAD_EIM_D29__EMI_WEIM_D_29 | MUX_PAD_CTRL(NO_PAD_CTRL))
@@ -2297,7 +2300,7 @@
  #define MX53_PAD_GPIO_9__SCC_FAIL_STATE                (_MX53_PAD_GPIO_9__SCC_FAIL_STATE | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_GPIO_3__ESAI1_HCKR            (_MX53_PAD_GPIO_3__ESAI1_HCKR | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_GPIO_3__GPIO1_3               (_MX53_PAD_GPIO_3__GPIO1_3 | MUX_PAD_CTRL(NO_PAD_CTRL))
-#define MX53_PAD_GPIO_3__I2C3_SCL              (_MX53_PAD_GPIO_3__I2C3_SCL | MUX_PAD_CTRL(NO_PAD_CTRL))
+#define MX53_PAD_GPIO_3__I2C3_SCL              (_MX53_PAD_GPIO_3__I2C3_SCL | MUX_PAD_CTRL(PAD_CTRL_I2C))
  #define MX53_PAD_GPIO_3__DPLLIP1_TOG_EN                (_MX53_PAD_GPIO_3__DPLLIP1_TOG_EN | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_GPIO_3__CCM_CLKO2             (_MX53_PAD_GPIO_3__CCM_CLKO2 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_GPIO_3__OBSERVE_MUX_OBSRV_INT_OUT0            (_MX53_PAD_GPIO_3__OBSERVE_MUX_OBSRV_INT_OUT0 | MUX_PAD_CTRL(NO_PAD_CTRL))
@@ -2305,7 +2308,7 @@
  #define MX53_PAD_GPIO_3__MLB_MLBCLK            (_MX53_PAD_GPIO_3__MLB_MLBCLK | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_GPIO_6__ESAI1_SCKT            (_MX53_PAD_GPIO_6__ESAI1_SCKT | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_GPIO_6__GPIO1_6               (_MX53_PAD_GPIO_6__GPIO1_6 | MUX_PAD_CTRL(NO_PAD_CTRL))
-#define MX53_PAD_GPIO_6__I2C3_SDA              (_MX53_PAD_GPIO_6__I2C3_SDA | MUX_PAD_CTRL(NO_PAD_CTRL))
+#define MX53_PAD_GPIO_6__I2C3_SDA              (_MX53_PAD_GPIO_6__I2C3_SDA | MUX_PAD_CTRL(PAD_CTRL_I2C))
  #define MX53_PAD_GPIO_6__CCM_CCM_OUT_0         (_MX53_PAD_GPIO_6__CCM_CCM_OUT_0 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_GPIO_6__CSU_CSU_INT_DEB               (_MX53_PAD_GPIO_6__CSU_CSU_INT_DEB | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_GPIO_6__OBSERVE_MUX_OBSRV_INT_OUT1            (_MX53_PAD_GPIO_6__OBSERVE_MUX_OBSRV_INT_OUT1 | MUX_PAD_CTRL(NO_PAD_CTRL))
@@ -2333,7 +2336,7 @@
  #define MX53_PAD_GPIO_5__CCM_CLKO              (_MX53_PAD_GPIO_5__CCM_CLKO | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_GPIO_5__CSU_CSU_ALARM_AUT_2           (_MX53_PAD_GPIO_5__CSU_CSU_ALARM_AUT_2 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_GPIO_5__OBSERVE_MUX_OBSRV_INT_OUT4            (_MX53_PAD_GPIO_5__OBSERVE_MUX_OBSRV_INT_OUT4 | MUX_PAD_CTRL(NO_PAD_CTRL))
-#define MX53_PAD_GPIO_5__I2C3_SCL              (_MX53_PAD_GPIO_5__I2C3_SCL | MUX_PAD_CTRL(NO_PAD_CTRL))
+#define MX53_PAD_GPIO_5__I2C3_SCL              (_MX53_PAD_GPIO_5__I2C3_SCL | MUX_PAD_CTRL(PAD_CTRL_I2C))
  #define MX53_PAD_GPIO_5__CCM_PLL1_BYP          (_MX53_PAD_GPIO_5__CCM_PLL1_BYP | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_GPIO_7__ESAI1_TX4_RX1         (_MX53_PAD_GPIO_7__ESAI1_TX4_RX1 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_GPIO_7__GPIO1_7               (_MX53_PAD_GPIO_7__GPIO1_7 | MUX_PAD_CTRL(NO_PAD_CTRL))
@@ -2356,7 +2359,7 @@
  #define MX53_PAD_GPIO_16__TZIC_PWRFAIL_INT             (_MX53_PAD_GPIO_16__TZIC_PWRFAIL_INT | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_GPIO_16__RTC_CE_RTC_EXT_TRIG1         (_MX53_PAD_GPIO_16__RTC_CE_RTC_EXT_TRIG1 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_GPIO_16__SPDIF_IN1            (_MX53_PAD_GPIO_16__SPDIF_IN1 | MUX_PAD_CTRL(NO_PAD_CTRL))
-#define MX53_PAD_GPIO_16__I2C3_SDA             (_MX53_PAD_GPIO_16__I2C3_SDA | MUX_PAD_CTRL(NO_PAD_CTRL))
+#define MX53_PAD_GPIO_16__I2C3_SDA             (_MX53_PAD_GPIO_16__I2C3_SDA | MUX_PAD_CTRL(PAD_CTRL_I2C))
  #define MX53_PAD_GPIO_16__SJC_DE_B             (_MX53_PAD_GPIO_16__SJC_DE_B | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_GPIO_17__ESAI1_TX0            (_MX53_PAD_GPIO_17__ESAI1_TX0 | MUX_PAD_CTRL(NO_PAD_CTRL))
  #define MX53_PAD_GPIO_17__GPIO7_12             (_MX53_PAD_GPIO_17__GPIO7_12 | MUX_PAD_CTRL(NO_PAD_CTRL))
diff --git a/arch/arm/plat-omap/Kconfig b/arch/arm/plat-omap/Kconfig

index 6e6735f04ee3f66e90fa25c81a77351b9cbe42bd..bb8f4a6b3e37d4ebb00e5f1fc9be31f2bd963926 100644 (file)
--- a/arch/arm/plat-omap/Kconfig
+++ b/arch/arm/plat-omap/Kconfig
@@ -13,6 +13,7 @@ config ARCH_OMAP1
         bool "TI OMAP1"
         select CLKDEV_LOOKUP
         select CLKSRC_MMIO
+       select GENERIC_IRQ_CHIP
         help
           "Systems based on omap7xx, omap15xx or omap16xx"
  
diff --git a/arch/arm/plat-omap/include/plat/dma.h b/arch/arm/plat-omap/include/plat/dma.h

index d1c916fcf7700440c80deb96759807622009ae56..dc562a5c0a8ad2c7117b0b02e43b85cabe7b9aa5 100644 (file)
--- a/arch/arm/plat-omap/include/plat/dma.h
+++ b/arch/arm/plat-omap/include/plat/dma.h
@@ -195,6 +195,11 @@
  
  #define OMAP36XX_DMA_UART4_TX          81      /* S_DMA_80 */
  #define OMAP36XX_DMA_UART4_RX          82      /* S_DMA_81 */
+
+/* Only for AM35xx */
+#define AM35XX_DMA_UART4_TX            54
+#define AM35XX_DMA_UART4_RX            55
+
  /*----------------------------------------------------------------------------*/
  
  #define OMAP1_DMA_TOUT_IRQ             (1 << 0)
diff --git a/arch/arm/plat-omap/include/plat/irqs.h b/arch/arm/plat-omap/include/plat/irqs.h

index 926d25c780f35eb648def7168d71b8d4827e05af..30e10719b774853e2247badf8a414111408c8fac 100644 (file)
--- a/arch/arm/plat-omap/include/plat/irqs.h
+++ b/arch/arm/plat-omap/include/plat/irqs.h
@@ -357,6 +357,7 @@
  #define INT_35XX_EMAC_C0_TX_PULSE_IRQ  69
  #define INT_35XX_EMAC_C0_MISC_PULSE_IRQ        70
  #define INT_35XX_USBOTG_IRQ            71
+#define INT_35XX_UART4                 84
  #define INT_35XX_CCDC_VD0_IRQ          88
  #define INT_35XX_CCDC_VD1_IRQ          92
  #define INT_35XX_CCDC_VD2_IRQ          93
diff --git a/arch/arm/plat-omap/include/plat/serial.h b/arch/arm/plat-omap/include/plat/serial.h

index 2723f9166ea20f07c150170c15097d06d04794d7..de3b10c18127510872b3920ad42ee689e56eed47 100644 (file)
--- a/arch/arm/plat-omap/include/plat/serial.h
+++ b/arch/arm/plat-omap/include/plat/serial.h
@@ -56,6 +56,9 @@
  #define TI816X_UART2_BASE      0x48022000
  #define TI816X_UART3_BASE      0x48024000
  
+/* AM3505/3517 UART4 */
+#define AM35XX_UART4_BASE      0x4809E000      /* Only on AM3505/3517 */
+
  /* External port on Zoom2/3 */
  #define ZOOM_UART_BASE         0x10000000
  #define ZOOM_UART_VIRT         0xfa400000
diff --git a/arch/arm/plat-omap/iovmm.c b/arch/arm/plat-omap/iovmm.c

index c60737c49a32fae14b445ce938c59975998d7ae4..79e7fedb8602ae1a50bd704921ad432a878f9765 100644 (file)
--- a/arch/arm/plat-omap/iovmm.c
+++ b/arch/arm/plat-omap/iovmm.c
@@ -423,9 +423,6 @@ static void sgtable_fill_kmalloc(struct sg_table *sgt, u32 pa, u32 da,
  {
         unsigned int i;
         struct scatterlist *sg;
-       void *va;
-
-       va = phys_to_virt(pa);
  
         for_each_sg(sgt->sgl, sg, sgt->nents, i) {
                 unsigned bytes;
diff --git a/arch/arm/plat-omap/omap_device.c b/arch/arm/plat-omap/omap_device.c

index b6b40974495469314b3bec6faaf2cbeae981ea25..02609eee05621ab682de81d0fd215972dffbb158 100644 (file)
--- a/arch/arm/plat-omap/omap_device.c
+++ b/arch/arm/plat-omap/omap_device.c
@@ -615,6 +615,9 @@ static int _od_resume_noirq(struct device *dev)
  
         return pm_generic_resume_noirq(dev);
  }
+#else
+#define _od_suspend_noirq NULL
+#define _od_resume_noirq NULL
  #endif
  
  static struct dev_pm_domain omap_device_pm_domain = {
@@ -622,7 +625,8 @@ static struct dev_pm_domain omap_device_pm_domain = {
                 SET_RUNTIME_PM_OPS(_od_runtime_suspend, _od_runtime_resume,
                                    _od_runtime_idle)
                 USE_PLATFORM_PM_SLEEP_OPS
-               SET_SYSTEM_SLEEP_PM_OPS(_od_suspend_noirq, _od_resume_noirq)
+               .suspend_noirq = _od_suspend_noirq,
+               .resume_noirq = _od_resume_noirq,
         }
  };
  
diff --git a/arch/arm/plat-s5p/clock.c b/arch/arm/plat-s5p/clock.c

index 02af235298e2e1ff4f1e850afe3b67f0b43f9856..5f84a3f13ef9bb042eae6b85e96903ccdb2600a3 100644 (file)
--- a/arch/arm/plat-s5p/clock.c
+++ b/arch/arm/plat-s5p/clock.c
@@ -192,7 +192,7 @@ unsigned long s5p_spdif_get_rate(struct clk *clk)
         if (IS_ERR(pclk))
                 return -EINVAL;
  
-       rate = pclk->ops->get_rate(clk);
+       rate = pclk->ops->get_rate(pclk);
         clk_put(pclk);
  
         return rate;
diff --git a/arch/arm/plat-s5p/irq-gpioint.c b/arch/arm/plat-s5p/irq-gpioint.c

index 327ab9f662e8bca3019489817bb649b0eff68389..f71078ef6bb55ca840afdc33ef0a7befef4cdafc 100644 (file)
--- a/arch/arm/plat-s5p/irq-gpioint.c
+++ b/arch/arm/plat-s5p/irq-gpioint.c
@@ -23,6 +23,8 @@
  #include <plat/gpio-core.h>
  #include <plat/gpio-cfg.h>
  
+#include <asm/mach/irq.h>
+
  #define GPIO_BASE(chip)                (((unsigned long)(chip)->base) & 0xFFFFF000u)
  
  #define CON_OFFSET             0x700
@@ -81,6 +83,9 @@ static void s5p_gpioint_handler(unsigned int irq, struct irq_desc *desc)
         int group, pend_offset, mask_offset;
         unsigned int pend, mask;
  
+       struct irq_chip *chip = irq_get_chip(irq);
+       chained_irq_enter(chip, desc);
+
         for (group = 0; group < bank->nr_groups; group++) {
                 struct s3c_gpio_chip *chip = bank->chips[group];
                 if (!chip)
@@ -102,6 +107,7 @@ static void s5p_gpioint_handler(unsigned int irq, struct irq_desc *desc)
                         pend &= ~BIT(offset);
                 }
         }
+       chained_irq_exit(chip, desc);
  }
  
  static __init int s5p_gpioint_add(struct s3c_gpio_chip *chip)
diff --git a/arch/arm/plat-samsung/include/plat/backlight.h b/arch/arm/plat-samsung/include/plat/backlight.h

index 51d8da846a629281c77ff29c78018185d049bf03..ad530c78fe8c3563535a5c682a76125eb1571ffc 100644 (file)
--- a/arch/arm/plat-samsung/include/plat/backlight.h
+++ b/arch/arm/plat-samsung/include/plat/backlight.h
@@ -20,7 +20,7 @@ struct samsung_bl_gpio_info {
         int func;
  };
  
-extern void samsung_bl_set(struct samsung_bl_gpio_info *gpio_info,
+extern void __init samsung_bl_set(struct samsung_bl_gpio_info *gpio_info,
         struct platform_pwm_backlight_data *bl_data);
  
  #endif /* __ASM_PLAT_BACKLIGHT_H */
diff --git a/arch/arm/plat-samsung/irq-vic-timer.c b/arch/arm/plat-samsung/irq-vic-timer.c

index f714d060370d6f1647e29e2367591dafa242b4b6..51583cd301645483f110622ef2fc8dff362bf90d 100644 (file)
--- a/arch/arm/plat-samsung/irq-vic-timer.c
+++ b/arch/arm/plat-samsung/irq-vic-timer.c
@@ -22,9 +22,14 @@
  #include <plat/irq-vic-timer.h>
  #include <plat/regs-timer.h>
  
+#include <asm/mach/irq.h>
+
  static void s3c_irq_demux_vic_timer(unsigned int irq, struct irq_desc *desc)
  {
+       struct irq_chip *chip = irq_get_chip(irq);
+       chained_irq_enter(chip, desc);
         generic_handle_irq((int)desc->irq_data.handler_data);
+       chained_irq_exit(chip, desc);
  }
  
  /* We assume the IRQ_TIMER0..IRQ_TIMER4 range is continuous. */
diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types

index 3b3776d0a1a764e5edfd2a57a01f5a7fbd644bc0..62cc8f981171d071afd87ae79e59808bd5b0ee5c 100644 (file)
--- a/arch/arm/tools/mach-types
+++ b/arch/arm/tools/mach-types
@@ -351,7 +351,7 @@ centro                      MACH_CENTRO             CENTRO                  1944
  nokia_rx51             MACH_NOKIA_RX51         NOKIA_RX51              1955
  omap_zoom2             MACH_OMAP_ZOOM2         OMAP_ZOOM2              1967
  cpuat9260              MACH_CPUAT9260          CPUAT9260               1973
-eukrea_cpuimx27                MACH_CPUIMX27           CPUIMX27                1975
+eukrea_cpuimx27                MACH_EUKREA_CPUIMX27    EUKREA_CPUIMX27         1975
  acs5k                  MACH_ACS5K              ACS5K                   1982
  snapper_9260           MACH_SNAPPER_9260       SNAPPER_9260            1987
  dsm320                 MACH_DSM320             DSM320                  1988
@@ -476,8 +476,8 @@ cns3420vb           MACH_CNS3420VB          CNS3420VB               2776
  omap4_panda            MACH_OMAP4_PANDA        OMAP4_PANDA             2791
  ti8168evm              MACH_TI8168EVM          TI8168EVM               2800
  teton_bga              MACH_TETON_BGA          TETON_BGA               2816
-eukrea_cpuimx25sd      MACH_EUKREA_CPUIMX25    EUKREA_CPUIMX25         2820
-eukrea_cpuimx35sd      MACH_EUKREA_CPUIMX35    EUKREA_CPUIMX35         2821
+eukrea_cpuimx25sd      MACH_EUKREA_CPUIMX25SD  EUKREA_CPUIMX25SD       2820
+eukrea_cpuimx35sd      MACH_EUKREA_CPUIMX35SD  EUKREA_CPUIMX35SD       2821
  eukrea_cpuimx51sd      MACH_EUKREA_CPUIMX51SD  EUKREA_CPUIMX51SD       2822
  eukrea_cpuimx51                MACH_EUKREA_CPUIMX51    EUKREA_CPUIMX51         2823
  smdkc210               MACH_SMDKC210           SMDKC210                2838
@@ -910,7 +910,7 @@ omapl138_case_a3    MACH_OMAPL138_CASE_A3   OMAPL138_CASE_A3        3280
  uemd                   MACH_UEMD               UEMD                    3281
  ccwmx51mut             MACH_CCWMX51MUT         CCWMX51MUT              3282
  rockhopper             MACH_ROCKHOPPER         ROCKHOPPER              3283
-nookcolor              MACH_NOOKCOLOR          NOOKCOLOR               3284
+encore                 MACH_ENCORE             ENCORE                  3284
  hkdkc100               MACH_HKDKC100           HKDKC100                3285
  ts42xx                 MACH_TS42XX             TS42XX                  3286
  aebl                   MACH_AEBL               AEBL                    3287
diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S

index c7fd394d28a4d4e968addf61f659e0e781d21b60..6eba53530d1c552ea9813a16419a679c27075415 100644 (file)
--- a/arch/avr32/kernel/syscall_table.S
+++ b/arch/avr32/kernel/syscall_table.S
@@ -158,7 +158,7 @@ sys_call_table:
         .long   sys_sched_rr_get_interval
         .long   sys_nanosleep
         .long   sys_poll
-       .long   sys_nfsservctl          /* 145 */
+       .long   sys_ni_syscall          /* 145 was nfsservctl */
         .long   sys_setresgid
         .long   sys_getresgid
         .long   sys_prctl
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S

index 225d311c97013048a6920b578b1956c57f6d57a2..e4137297b790067b7a803d0c8dabd48dd808700c 100644 (file)
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -1543,7 +1543,7 @@ ENTRY(_sys_call_table)
         .long _sys_ni_syscall   /* for vm86 */
         .long _sys_ni_syscall   /* old "query_module" */
         .long _sys_ni_syscall   /* sys_poll */
-       .long _sys_nfsservctl
+       .long _sys_ni_syscall   /* old nfsservctl */
         .long _sys_setresgid    /* setresgid16 */       /* 170 */
         .long _sys_getresgid    /* getresgid16 */
         .long _sys_prctl
diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S

index 1161883eb582964fb5ad583e20b5bf327dd83097..592fbe9dfb629cc84bf126855fd80df16aa53bff 100644 (file)
--- a/arch/cris/arch-v10/kernel/entry.S
+++ b/arch/cris/arch-v10/kernel/entry.S
@@ -771,7 +771,7 @@ sys_call_table:
         .long sys_ni_syscall    /* sys_vm86 */
         .long sys_ni_syscall    /* Old sys_query_module */
         .long sys_poll
-       .long sys_nfsservctl
+       .long sys_ni_syscall    /* old nfsservctl */
         .long sys_setresgid16   /* 170 */
         .long sys_getresgid16
         .long sys_prctl
diff --git a/arch/cris/arch-v32/kernel/entry.S b/arch/cris/arch-v32/kernel/entry.S

index 84fed7e91ada221772f5afa8f3e4d9707a899116..c3ea4694fbaf879c5b83e20b286e22587361dd22 100644 (file)
--- a/arch/cris/arch-v32/kernel/entry.S
+++ b/arch/cris/arch-v32/kernel/entry.S
@@ -714,7 +714,7 @@ sys_call_table:
         .long sys_ni_syscall    /* sys_vm86 */
         .long sys_ni_syscall    /* Old sys_query_module */
         .long sys_poll
-       .long sys_nfsservctl
+       .long sys_ni_syscall    /* Old nfsservctl */
         .long sys_setresgid16   /* 170 */
         .long sys_getresgid16
         .long sys_prctl
diff --git a/arch/cris/include/asm/serial.h b/arch/cris/include/asm/serial.h

new file mode 100644 (file)

index 0000000..af7535a
--- /dev/null
+++ b/arch/cris/include/asm/serial.h
@@ -0,0 +1,9 @@
+#ifndef _ASM_SERIAL_H
+#define _ASM_SERIAL_H
+
+/*
+ * This assumes you have a 1.8432 MHz clock for your UART.
+ */
+#define BASE_BAUD (1843200 / 16)
+
+#endif /* _ASM_SERIAL_H */
diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S

index 017d6d7b784fa5324f78dd5b4c11d431e8127278..5ba23f715ea5e7f0f3dfcc0cab6a860519e41ec6 100644 (file)
--- a/arch/frv/kernel/entry.S
+++ b/arch/frv/kernel/entry.S
@@ -1358,7 +1358,7 @@ sys_call_table:
         .long sys_ni_syscall    /* for vm86 */
         .long sys_ni_syscall    /* Old sys_query_module */
         .long sys_poll
-       .long sys_nfsservctl
+       .long sys_ni_syscall    /* Old nfsservctl */
         .long sys_setresgid16   /* 170 */
         .long sys_getresgid16
         .long sys_prctl
diff --git a/arch/h8300/kernel/syscalls.S b/arch/h8300/kernel/syscalls.S

index f4b2e67bcc34dcd93340baf8bfcf4cdb35176be7..4be2ea2fbe26a4ab5c823db232f84cf7484e0a6a 100644 (file)
--- a/arch/h8300/kernel/syscalls.S
+++ b/arch/h8300/kernel/syscalls.S
@@ -183,7 +183,7 @@ SYMBOL_NAME_LABEL(sys_call_table)
         .long SYMBOL_NAME(sys_ni_syscall)       /* for vm86 */
         .long SYMBOL_NAME(sys_ni_syscall)       /* sys_query_module */
         .long SYMBOL_NAME(sys_poll)
-       .long SYMBOL_NAME(sys_nfsservctl)
+       .long SYMBOL_NAME(sys_ni_syscall)       /* old nfsservctl */
         .long SYMBOL_NAME(sys_setresgid16)      /* 170 */
         .long SYMBOL_NAME(sys_getresgid16)
         .long SYMBOL_NAME(sys_prctl)
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig

index 12485471495893cd00c80be024b7983c11e8168c..3ff7785b3beb4d5d042b60e07ce25dc793c8c10f 100644 (file)
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -162,7 +162,6 @@ config IA64_GENERIC
         select ACPI_NUMA
         select SWIOTLB
         select PCI_MSI
-       select DMAR
         help
           This selects the system type of your hardware.  A "generic" kernel
           will run on any supported IA-64 system.  However, if you configure
diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig

index 1d7bca0a396d3ba27068b5bd2ac48abdb72c1b00..0e5cd1405e0e506c28a7a7edd00a2ff82a20137d 100644 (file)
--- a/arch/ia64/configs/generic_defconfig
+++ b/arch/ia64/configs/generic_defconfig
@@ -234,3 +234,4 @@ CONFIG_CRYPTO_MD5=y
  # CONFIG_CRYPTO_ANSI_CPRNG is not set
  CONFIG_CRC_T10DIF=y
  CONFIG_MISC_DEVICES=y
+CONFIG_DMAR=y
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S

index 97dd2abdeb1a3b2cd66a409d1dc4bf8ec0345b55..198c753d1006a5abf0bc0c1b669ba53ebc754b34 100644 (file)
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1614,7 +1614,7 @@ sys_call_table:
         data8 sys_sched_get_priority_min
         data8 sys_sched_rr_get_interval
         data8 sys_nanosleep
-       data8 sys_nfsservctl
+       data8 sys_ni_syscall                    // old nfsservctl
         data8 sys_prctl                         // 1170
         data8 sys_getpagesize
         data8 sys_mmap2
diff --git a/arch/m32r/kernel/syscall_table.S b/arch/m32r/kernel/syscall_table.S

index 528f2e6ad06421e20d9c6f8d30e5d1c24f726350..f365c19795ef52df3da1429fc83fed3237fc09aa 100644 (file)
--- a/arch/m32r/kernel/syscall_table.S
+++ b/arch/m32r/kernel/syscall_table.S
@@ -168,7 +168,7 @@ ENTRY(sys_call_table)
         .long sys_tas                   /* vm86 syscall holder */
         .long sys_ni_syscall            /* query_module syscall holder */
         .long sys_poll
-       .long sys_nfsservctl
+       .long sys_ni_syscall            /* was nfsservctl */
         .long sys_setresgid             /* 170 */
         .long sys_getresgid
         .long sys_prctl
diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h

index 31d5570d65676784afd92a95665073e9179bb2e2..89f201434b5aa2575a5837bc6730346e06cc7e30 100644 (file)
--- a/arch/m68k/include/asm/page_mm.h
+++ b/arch/m68k/include/asm/page_mm.h
@@ -162,7 +162,7 @@ static inline __attribute_const__ int __virt_to_node_shift(void)
         pgdat->node_mem_map + (__pfn - pgdat->node_start_pfn);          \
  })
  #define page_to_pfn(_page) ({                                          \
-       struct page *__p = (_page);                                     \
+       const struct page *__p = (_page);                               \
         struct pglist_data *pgdat;                                      \
         pgdat = &pg_data_map[page_to_nid(__p)];                         \
         ((__p) - pgdat->node_mem_map) + pgdat->node_start_pfn;          \
diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S

index 00d1452f9571073f98d4c5968fdb870f35daeb03..c468f2edaa85ec0cd2356e392a0f0e7efddb8218 100644 (file)
--- a/arch/m68k/kernel/syscalltable.S
+++ b/arch/m68k/kernel/syscalltable.S
@@ -189,7 +189,7 @@ ENTRY(sys_call_table)
         .long sys_getpagesize
         .long sys_ni_syscall            /* old "query_module" */
         .long sys_poll
-       .long sys_nfsservctl
+       .long sys_ni_syscall            /* old nfsservctl */
         .long sys_setresgid16           /* 170 */
         .long sys_getresgid16
         .long sys_prctl
diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S

index d915a122c86592fa29b8ebf395c4902881be5539..8789daa2a346683d43e7b42efe7935494b332e14 100644 (file)
--- a/arch/microblaze/kernel/syscall_table.S
+++ b/arch/microblaze/kernel/syscall_table.S
@@ -173,7 +173,7 @@ ENTRY(sys_call_table)
         .long sys_ni_syscall            /* sys_vm86 */
         .long sys_ni_syscall            /* Old sys_query_module */
         .long sys_poll
-       .long sys_nfsservctl
+       .long sys_ni_syscall            /* old nfsservctl */
         .long sys_setresgid             /* 170 */
         .long sys_getresgid
         .long sys_prctl
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S

index e521420a45a54896f16c6caa0e73ebd39718fd4b..865bc7a6f5a19ed32d150886658894b215f3617d 100644 (file)
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -424,7 +424,7 @@ einval:     li      v0, -ENOSYS
         sys     sys_getresuid           3
         sys     sys_ni_syscall          0       /* was sys_query_module */
         sys     sys_poll                3
-       sys     sys_nfsservctl          3
+       sys     sys_ni_syscall          0       /* was nfsservctl */
         sys     sys_setresgid           3       /* 4190 */
         sys     sys_getresgid           3
         sys     sys_prctl               5
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S

index 85874d6a8a709e31b8964a9ae65ee4326f4f6143..fb7334bea7316aedd8071ff41d63178539504cc8 100644 (file)
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -299,7 +299,7 @@ sys_call_table:
         PTR     sys_ni_syscall                  /* 5170, was get_kernel_syms */
         PTR     sys_ni_syscall                  /* was query_module */
         PTR     sys_quotactl
-       PTR     sys_nfsservctl
+       PTR     sys_ni_syscall                  /* was nfsservctl */
         PTR     sys_ni_syscall                  /* res. for getpmsg */
         PTR     sys_ni_syscall                  /* 5175  for putpmsg */
         PTR     sys_ni_syscall                  /* res. for afs_syscall */
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S

index b85842fc87ae60d65d9f1599a275de3d07c284d1..f9296e894e465f83cccd9195df9baeb5d43dee17 100644 (file)
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -294,7 +294,7 @@ EXPORT(sysn32_call_table)
         PTR     sys_ni_syscall                  /* 6170, was get_kernel_syms */
         PTR     sys_ni_syscall                  /* was query_module */
         PTR     sys_quotactl
-       PTR     compat_sys_nfsservctl
+       PTR     sys_ni_syscall                  /* was nfsservctl */
         PTR     sys_ni_syscall                  /* res. for getpmsg */
         PTR     sys_ni_syscall                  /* 6175  for putpmsg */
         PTR     sys_ni_syscall                  /* res. for afs_syscall */
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S

index 46c4763edf211b7d3af7c630ad76dba949e33787..4d7c9827706f3d8dc821cfb258a12c22a384ddc2 100644 (file)
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -392,7 +392,7 @@ sys_call_table:
         PTR     sys_getresuid
         PTR     sys_ni_syscall                  /* was query_module */
         PTR     sys_poll
-       PTR     compat_sys_nfsservctl
+       PTR     sys_ni_syscall                  /* was nfsservctl */
         PTR     sys_setresgid                   /* 4190 */
         PTR     sys_getresgid
         PTR     sys_prctl
diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S

index ae435e1d56694f71d5471b4000c130d7ea4c30d2..3e3620d9fc45eb2636aaae6e9a1d3f5e8f9762ed 100644 (file)
--- a/arch/mn10300/kernel/entry.S
+++ b/arch/mn10300/kernel/entry.S
@@ -589,7 +589,7 @@ ENTRY(sys_call_table)
         .long sys_ni_syscall    /* vm86 */
         .long sys_ni_syscall    /* Old sys_query_module */
         .long sys_poll
-       .long sys_nfsservctl
+       .long sys_ni_syscall    /* was nfsservctl */
         .long sys_setresgid16   /* 170 */
         .long sys_getresgid16
         .long sys_prctl
diff --git a/arch/openrisc/include/asm/dma-mapping.h b/arch/openrisc/include/asm/dma-mapping.h

index 052f877b52a53d63e58fae5f1fe713164e5f9f59..60b472233900c4882b8f66f1339118183e298d42 100644 (file)
--- a/arch/openrisc/include/asm/dma-mapping.h
+++ b/arch/openrisc/include/asm/dma-mapping.h
@@ -31,7 +31,6 @@
  
  #define DMA_ERROR_CODE         (~(dma_addr_t)0x0)
  
-int dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
  
  #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
  #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
@@ -47,6 +46,12 @@ dma_addr_t or1k_map_page(struct device *dev, struct page *page,
  void or1k_unmap_page(struct device *dev, dma_addr_t dma_handle,
                      size_t size, enum dma_data_direction dir,
                      struct dma_attrs *attrs);
+int or1k_map_sg(struct device *dev, struct scatterlist *sg,
+               int nents, enum dma_data_direction dir,
+               struct dma_attrs *attrs);
+void or1k_unmap_sg(struct device *dev, struct scatterlist *sg,
+                  int nents, enum dma_data_direction dir,
+                  struct dma_attrs *attrs);
  void or1k_sync_single_for_cpu(struct device *dev,
                               dma_addr_t dma_handle, size_t size,
                               enum dma_data_direction dir);
@@ -98,6 +103,51 @@ static inline void dma_unmap_single(struct device *dev, dma_addr_t addr,
         debug_dma_unmap_page(dev, addr, size, dir, true);
  }
  
+static inline int dma_map_sg(struct device *dev, struct scatterlist *sg,
+                                  int nents, enum dma_data_direction dir)
+{
+       int i, ents;
+       struct scatterlist *s;
+
+       for_each_sg(sg, s, nents, i)
+               kmemcheck_mark_initialized(sg_virt(s), s->length);
+       BUG_ON(!valid_dma_direction(dir));
+       ents = or1k_map_sg(dev, sg, nents, dir, NULL);
+       debug_dma_map_sg(dev, sg, nents, ents, dir);
+
+       return ents;
+}
+
+static inline void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+                                     int nents, enum dma_data_direction dir)
+{
+       BUG_ON(!valid_dma_direction(dir));
+       debug_dma_unmap_sg(dev, sg, nents, dir);
+       or1k_unmap_sg(dev, sg, nents, dir, NULL);
+}
+
+static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
+                                     size_t offset, size_t size,
+                                     enum dma_data_direction dir)
+{
+       dma_addr_t addr;
+
+       kmemcheck_mark_initialized(page_address(page) + offset, size);
+       BUG_ON(!valid_dma_direction(dir));
+       addr = or1k_map_page(dev, page, offset, size, dir, NULL);
+       debug_dma_map_page(dev, page, offset, size, dir, addr, false);
+
+       return addr;
+}
+
+static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
+                                 size_t size, enum dma_data_direction dir)
+{
+       BUG_ON(!valid_dma_direction(dir));
+       or1k_unmap_page(dev, addr, size, dir, NULL);
+       debug_dma_unmap_page(dev, addr, size, dir, true);
+}
+
  static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
                                            size_t size,
                                            enum dma_data_direction dir)
@@ -119,7 +169,12 @@ static inline void dma_sync_single_for_device(struct device *dev,
  static inline int dma_supported(struct device *dev, u64 dma_mask)
  {
         /* Support 32 bit DMA mask exclusively */
-       return dma_mask == 0xffffffffULL;
+       return dma_mask == DMA_BIT_MASK(32);
+}
+
+static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+       return 0;
  }
  
  static inline int dma_set_mask(struct device *dev, u64 dma_mask)
diff --git a/arch/openrisc/include/asm/sigcontext.h b/arch/openrisc/include/asm/sigcontext.h

index 54a5c50132e35e4000e45f874c6c1618894b9f9f..b79c2b19afbe6cfe9f4327535e257e12c9aaa29f 100644 (file)
--- a/arch/openrisc/include/asm/sigcontext.h
+++ b/arch/openrisc/include/asm/sigcontext.h
@@ -23,16 +23,11 @@
  
  /* This struct is saved by setup_frame in signal.c, to keep the current
     context while a signal handler is executed. It's restored by sys_sigreturn.
-
-   To keep things simple, we use pt_regs here even though normally you just
-   specify the list of regs to save. Then we can use copy_from_user on the
-   entire regs instead of a bunch of get_user's as well...
  */
  
  struct sigcontext {
-       struct pt_regs regs;  /* needs to be first */
+       struct user_regs_struct regs;  /* needs to be first */
         unsigned long oldmask;
-       unsigned long usp;    /* usp before stacking this gunk on it */
  };
  
  #endif /* __ASM_OPENRISC_SIGCONTEXT_H */
diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c

index 968d3ee477e331bc7be928a96b5768311e16d676..f1c8ee2895d0f26f51a634b913ee5f61d5a059d7 100644 (file)
--- a/arch/openrisc/kernel/dma.c
+++ b/arch/openrisc/kernel/dma.c
@@ -154,6 +154,33 @@ void or1k_unmap_page(struct device *dev, dma_addr_t dma_handle,
         /* Nothing special to do here... */
  }
  
+int or1k_map_sg(struct device *dev, struct scatterlist *sg,
+               int nents, enum dma_data_direction dir,
+               struct dma_attrs *attrs)
+{
+       struct scatterlist *s;
+       int i;
+
+       for_each_sg(sg, s, nents, i) {
+               s->dma_address = or1k_map_page(dev, sg_page(s), s->offset,
+                                              s->length, dir, NULL);
+       }
+
+       return nents;
+}
+
+void or1k_unmap_sg(struct device *dev, struct scatterlist *sg,
+                  int nents, enum dma_data_direction dir,
+                  struct dma_attrs *attrs)
+{
+       struct scatterlist *s;
+       int i;
+
+       for_each_sg(sg, s, nents, i) {
+               or1k_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, NULL);
+       }
+}
+
  void or1k_sync_single_for_cpu(struct device *dev,
                               dma_addr_t dma_handle, size_t size,
                               enum dma_data_direction dir)
@@ -187,5 +214,4 @@ static int __init dma_init(void)
  
         return 0;
  }
-
  fs_initcall(dma_init);
diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c

index 5f759c76834eee10dd821edc8a0b560d6d3931df..95207ab0c99ed536016e88b8be1a2b077ec3bae5 100644 (file)
--- a/arch/openrisc/kernel/signal.c
+++ b/arch/openrisc/kernel/signal.c
@@ -52,31 +52,25 @@ struct rt_sigframe {
  static int restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc)
  {
         unsigned int err = 0;
-       unsigned long old_usp;
  
         /* Alwys make any pending restarted system call return -EINTR */
         current_thread_info()->restart_block.fn = do_no_restart_syscall;
  
-       /* restore the regs from &sc->regs (same as sc, since regs is first)
+       /*
+        * Restore the regs from &sc->regs.
          * (sc is already checked for VERIFY_READ since the sigframe was
          *  checked in sys_sigreturn previously)
          */
-
-       if (__copy_from_user(regs, sc, sizeof(struct pt_regs)))
+       if (__copy_from_user(regs, sc->regs.gpr, 32 * sizeof(unsigned long)))
+               goto badframe;
+       if (__copy_from_user(&regs->pc, &sc->regs.pc, sizeof(unsigned long)))
+               goto badframe;
+       if (__copy_from_user(&regs->sr, &sc->regs.sr, sizeof(unsigned long)))
                 goto badframe;
  
         /* make sure the SM-bit is cleared so user-mode cannot fool us */
         regs->sr &= ~SPR_SR_SM;
  
-       /* restore the old USP as it was before we stacked the sc etc.
-        * (we cannot just pop the sigcontext since we aligned the sp and
-        *  stuff after pushing it)
-        */
-
-       err |= __get_user(old_usp, &sc->usp);
-
-       regs->sp = old_usp;
-
         /* TODO: the other ports use regs->orig_XX to disable syscall checks
          * after this completes, but we don't use that mechanism. maybe we can
          * use it now ?
@@ -137,18 +131,17 @@ static int setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs,
                             unsigned long mask)
  {
         int err = 0;
-       unsigned long usp = regs->sp;
  
-       /* copy the regs. they are first in sc so we can use sc directly */
+       /* copy the regs */
  
-       err |= __copy_to_user(sc, regs, sizeof(struct pt_regs));
+       err |= __copy_to_user(sc->regs.gpr, regs, 32 * sizeof(unsigned long));
+       err |= __copy_to_user(&sc->regs.pc, &regs->pc, sizeof(unsigned long));
+       err |= __copy_to_user(&sc->regs.sr, &regs->sr, sizeof(unsigned long));
  
         /* then some other stuff */
  
         err |= __put_user(mask, &sc->oldmask);
  
-       err |= __put_user(usp, &sc->usp);
-
         return err;
  }
  
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S

index e66366fd2abc8cbf55e74455c9e26f22bd89aedb..3735abd7f8f6f067488b20de9b44ab487ac2dff2 100644 (file)
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -259,7 +259,7 @@
         ENTRY_SAME(ni_syscall)          /* query_module */
         ENTRY_SAME(poll)
         /* structs contain pointers and an in_addr... */
-       ENTRY_COMP(nfsservctl)
+       ENTRY_SAME(ni_syscall)          /* was nfsservctl */
         ENTRY_SAME(setresgid)           /* 170 */
         ENTRY_SAME(getresgid)
         ENTRY_SAME(prctl)
diff --git a/arch/powerpc/boot/dts/p1023rds.dts b/arch/powerpc/boot/dts/p1023rds.dts

index bfa96aa8f2cac187eb22334ec621ea1d7a646086..d9b776740a6739707d26a9ad836a39533bc8718c 100644 (file)
--- a/arch/powerpc/boot/dts/p1023rds.dts
+++ b/arch/powerpc/boot/dts/p1023rds.dts
@@ -387,7 +387,7 @@
                         #size-cells = <1>;
                         compatible = "cfi-flash";
                         reg = <0x0 0x0 0x02000000>;
-                       bank-width = <1>;
+                       bank-width = <2>;
                         device-width = <1>;
                         partition@0 {
                                 label = "ramdisk";
diff --git a/arch/powerpc/configs/85xx/p1023rds_defconfig b/arch/powerpc/configs/85xx/p1023rds_defconfig

index 980ff8f61fd46d0af87459c8711ea7e6fe524126..3ff5a81c709f2f9febae80a5e1edd3a3a1ee9ca1 100644 (file)
--- a/arch/powerpc/configs/85xx/p1023rds_defconfig
+++ b/arch/powerpc/configs/85xx/p1023rds_defconfig
@@ -171,3 +171,4 @@ CONFIG_CRYPTO_SHA256=y
  CONFIG_CRYPTO_SHA512=y
  CONFIG_CRYPTO_AES=y
  # CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_DEV_FSL_CAAM=y
diff --git a/arch/powerpc/configs/corenet32_smp_defconfig b/arch/powerpc/configs/corenet32_smp_defconfig

index 10562a5c65b96ce8059808dcc047467cb89b2825..4311d02a3bfdaf1c250cf2c437809d768a611861 100644 (file)
--- a/arch/powerpc/configs/corenet32_smp_defconfig
+++ b/arch/powerpc/configs/corenet32_smp_defconfig
@@ -185,3 +185,4 @@ CONFIG_CRYPTO_SHA256=y
  CONFIG_CRYPTO_SHA512=y
  CONFIG_CRYPTO_AES=y
  # CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_DEV_FSL_CAAM=y
diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig

index d32283555b53eceb7644f5908506e804e77ba355..c92c204a204b22f6403552a961286135c61b603c 100644 (file)
--- a/arch/powerpc/configs/corenet64_smp_defconfig
+++ b/arch/powerpc/configs/corenet64_smp_defconfig
@@ -100,5 +100,8 @@ CONFIG_DEBUG_INFO=y
  CONFIG_SYSCTL_SYSCALL_CHECK=y
  CONFIG_VIRQ_DEBUG=y
  CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_SHA256=y
+CONFIG_CRYPTO_SHA512=y
+CONFIG_CRYPTO_AES=y
  # CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DEV_TALITOS=y
+CONFIG_CRYPTO_DEV_FSL_CAAM=y
diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig

index fcd85d2c72dc5289eccc0ae3efc973a99eb34ad5..a3467bfb767100aaf71a4c6e2b666a7448060020 100644 (file)
--- a/arch/powerpc/configs/mpc85xx_defconfig
+++ b/arch/powerpc/configs/mpc85xx_defconfig
@@ -139,6 +139,7 @@ CONFIG_SND=y
  CONFIG_SND_INTEL8X0=y
  # CONFIG_SND_PPC is not set
  # CONFIG_SND_USB is not set
+CONFIG_SND_SOC=y
  CONFIG_HID_A4TECH=y
  CONFIG_HID_APPLE=y
  CONFIG_HID_BELKIN=y
diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig

index 908c941fc24c8aeb11e50ba62cc67f18526bbf5a..9693f6ed3da066ad0fe7839fa399826987274c0f 100644 (file)
--- a/arch/powerpc/configs/mpc85xx_smp_defconfig
+++ b/arch/powerpc/configs/mpc85xx_smp_defconfig
@@ -140,6 +140,7 @@ CONFIG_SND=y
  CONFIG_SND_INTEL8X0=y
  # CONFIG_SND_PPC is not set
  # CONFIG_SND_USB is not set
+CONFIG_SND_SOC=y
  CONFIG_HID_A4TECH=y
  CONFIG_HID_APPLE=y
  CONFIG_HID_BELKIN=y
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h

index 1f780b95c0f08635f2a71bcad258b6c72c64846c..938986e412f193913f9113239c73ae37c9d3f9c6 100644 (file)
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -22,7 +22,6 @@ static __always_inline bool arch_static_branch(struct jump_label_key *key)
         asm goto("1:\n\t"
                  "nop\n\t"
                  ".pushsection __jump_table,  \"aw\"\n\t"
-                ".align 4\n\t"
                  JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
                  ".popsection \n\t"
                  : :  "i" (key) : : l_yes);
@@ -41,7 +40,6 @@ struct jump_entry {
         jump_label_t code;
         jump_label_t target;
         jump_label_t key;
-       jump_label_t pad;
  };
  
  #endif /* _ASM_POWERPC_JUMP_LABEL_H */
diff --git a/arch/powerpc/include/asm/kdump.h b/arch/powerpc/include/asm/kdump.h

index 6857af58b02eef9ddc9f2a91d42d28259a5a4b6b..bffd062adf799d605460fee0910d14082765df8c 100644 (file)
--- a/arch/powerpc/include/asm/kdump.h
+++ b/arch/powerpc/include/asm/kdump.h
@@ -3,17 +3,7 @@
  
  #include <asm/page.h>
  
-/*
- * If CONFIG_RELOCATABLE is enabled we can place the kdump kernel anywhere.
- * To keep enough space in the RMO for the first stage kernel on 64bit, we
- * place it at 64MB. If CONFIG_RELOCATABLE is not enabled we must place
- * the second stage at 32MB.
- */
-#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_PPC64)
-#define KDUMP_KERNELBASE       0x4000000
-#else
  #define KDUMP_KERNELBASE       0x2000000
-#endif
  
  /* How many bytes to reserve at zero for kdump. The reserve limit should
   * be greater or equal to the trampoline's end address.
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h

index e8aaf6fce38b9840623daf9aabd62bd7f644f834..559da199edb59699e3a252fedb77fa497f1d043b 100644 (file)
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1003,7 +1003,6 @@
  #define PV_970         0x0039
  #define PV_POWER5      0x003A
  #define PV_POWER5p     0x003B
-#define PV_POWER7      0x003F
  #define PV_970FX       0x003C
  #define PV_POWER6      0x003E
  #define PV_POWER7      0x003F
@@ -1024,13 +1023,16 @@
  #define mtmsrd(v)      __mtmsrd((v), 0)
  #define mtmsr(v)       mtmsrd(v)
  #else
-#define mtmsr(v)       asm volatile("mtmsr %0" : : "r" (v) : "memory")
+#define mtmsr(v)       asm volatile("mtmsr %0" : \
+                                    : "r" ((unsigned long)(v)) \
+                                    : "memory")
  #endif
  
  #define mfspr(rn)      ({unsigned long rval; \
                         asm volatile("mfspr %0," __stringify(rn) \
                                 : "=r" (rval)); rval;})
-#define mtspr(rn, v)   asm volatile("mtspr " __stringify(rn) ",%0" : : "r" (v)\
+#define mtspr(rn, v)   asm volatile("mtspr " __stringify(rn) ",%0" : \
+                                    : "r" ((unsigned long)(v)) \
                                      : "memory")
  
  #ifdef __powerpc64__
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h

index f6736b7da463508e2b9777e851d2bdd2c597fec8..fa0d27a400de917ee45a7a85dd75c2dbbaabb745 100644 (file)
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -171,7 +171,7 @@ SYSCALL_SPU(setresuid)
  SYSCALL_SPU(getresuid)
  SYSCALL(ni_syscall)
  SYSCALL_SPU(poll)
-COMPAT_SYS(nfsservctl)
+SYSCALL(ni_syscall)
  SYSCALL_SPU(setresgid)
  SYSCALL_SPU(getresgid)
  COMPAT_SYS_SPU(prctl)
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c

index 9fb933248ab69fe2272f447d1b286407dee5d195..fa44ff538861fa4b44c81b60ebff7b2a4ac0be64 100644 (file)
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -2051,7 +2051,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
  
  static struct cpu_spec the_cpu_spec;
  
-static void __init setup_cpu_spec(unsigned long offset, struct cpu_spec *s)
+static struct cpu_spec * __init setup_cpu_spec(unsigned long offset,
+                                              struct cpu_spec *s)
  {
         struct cpu_spec *t = &the_cpu_spec;
         struct cpu_spec old;
@@ -2114,6 +2115,8 @@ static void __init setup_cpu_spec(unsigned long offset, struct cpu_spec *s)
                 t->cpu_setup(offset, t);
         }
  #endif /* CONFIG_PPC64 || CONFIG_BOOKE */
+
+       return t;
  }
  
  struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
@@ -2124,10 +2127,8 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
         s = PTRRELOC(s);
  
         for (i = 0; i < ARRAY_SIZE(cpu_specs); i++,s++) {
-               if ((pvr & s->pvr_mask) == s->pvr_value) {
-                       setup_cpu_spec(offset, s);
-                       return s;
-               }
+               if ((pvr & s->pvr_mask) == s->pvr_value)
+                       return setup_cpu_spec(offset, s);
         }
  
         BUG();
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c

index 1577434f4088599b4243c96b94a14247397f682f..b25f6325fc7090a5e02e9731bbadd49b6c65f9dd 100644 (file)
--- a/arch/powerpc/kernel/iomap.c
+++ b/arch/powerpc/kernel/iomap.c
@@ -117,6 +117,7 @@ void ioport_unmap(void __iomem *addr)
  EXPORT_SYMBOL(ioport_map);
  EXPORT_SYMBOL(ioport_unmap);
  
+#ifdef CONFIG_PCI
  void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
  {
         resource_size_t start = pci_resource_start(dev, bar);
@@ -146,3 +147,4 @@ void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
  
  EXPORT_SYMBOL(pci_iomap);
  EXPORT_SYMBOL(pci_iounmap);
+#endif /* CONFIG_PCI */
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c

index 6658a1589955577859afc62af05f0cd4c66cda6a..9ce1672afb59c1581151d1a83bc8a80b20586385 100644 (file)
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -136,12 +136,16 @@ void __init reserve_crashkernel(void)
         crashk_res.start = KDUMP_KERNELBASE;
  #else
         if (!crashk_res.start) {
+#ifdef CONFIG_PPC64
                 /*
-                * unspecified address, choose a region of specified size
-                * can overlap with initrd (ignoring corruption when retained)
-                * ppc64 requires kernel and some stacks to be in first segemnt
+                * On 64bit we split the RMO in half but cap it at half of
+                * a small SLB (128MB) since the crash kernel needs to place
+                * itself and some stacks to be in the first segment.
                  */
+               crashk_res.start = min(0x80000000ULL, (ppc64_rma_size / 2));
+#else
                 crashk_res.start = KDUMP_KERNELBASE;
+#endif
         }
  
         crash_base = PAGE_ALIGN(crashk_res.start);
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c

index d05ae4204bbf3d3ddcc84266476b736a6790715c..564c1d8bdb5c4f1a3d2768863c56942efb863b14 100644 (file)
--- a/arch/powerpc/kernel/perf_callchain.c
+++ b/arch/powerpc/kernel/perf_callchain.c
@@ -154,8 +154,12 @@ static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)
             ((unsigned long)ptr & 7))
                 return -EFAULT;
  
-       if (!__get_user_inatomic(*ret, ptr))
+       pagefault_disable();
+       if (!__get_user_inatomic(*ret, ptr)) {
+               pagefault_enable();
                 return 0;
+       }
+       pagefault_enable();
  
         return read_user_stack_slow(ptr, ret, 8);
  }
@@ -166,8 +170,12 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
             ((unsigned long)ptr & 3))
                 return -EFAULT;
  
-       if (!__get_user_inatomic(*ret, ptr))
+       pagefault_disable();
+       if (!__get_user_inatomic(*ret, ptr)) {
+               pagefault_enable();
                 return 0;
+       }
+       pagefault_enable();
  
         return read_user_stack_slow(ptr, ret, 4);
  }
@@ -294,11 +302,17 @@ static inline int current_is_64bit(void)
   */
  static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
  {
+       int rc;
+
         if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
             ((unsigned long)ptr & 3))
                 return -EFAULT;
  
-       return __get_user_inatomic(*ret, ptr);
+       pagefault_disable();
+       rc = __get_user_inatomic(*ret, ptr);
+       pagefault_enable();
+
+       return rc;
  }
  
  static inline void perf_callchain_user_64(struct perf_callchain_entry *entry,
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c

index c016033ba78dc30166ce6f7d8d41ad828075d87c..a909f4e9343bd05d0dd044bf940ffce5f488abb6 100644 (file)
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1020,7 +1020,7 @@ static unsigned long __init alloc_up(unsigned long size, unsigned long align)
         }
         if (addr == 0)
                 return 0;
-       RELOC(alloc_bottom) = addr;
+       RELOC(alloc_bottom) = addr + size;
  
         prom_debug(" -> %x\n", addr);
         prom_debug("  alloc_bottom : %x\n", RELOC(alloc_bottom));
@@ -1830,11 +1830,13 @@ static void __init *make_room(unsigned long *mem_start, unsigned long *mem_end,
                 if (room > DEVTREE_CHUNK_SIZE)
                         room = DEVTREE_CHUNK_SIZE;
                 if (room < PAGE_SIZE)
-                       prom_panic("No memory for flatten_device_tree (no room)");
+                       prom_panic("No memory for flatten_device_tree "
+                                  "(no room)\n");
                 chunk = alloc_up(room, 0);
                 if (chunk == 0)
-                       prom_panic("No memory for flatten_device_tree (claim failed)");
-               *mem_end = RELOC(alloc_top);
+                       prom_panic("No memory for flatten_device_tree "
+                                  "(claim failed)\n");
+               *mem_end = chunk + room;
         }
  
         ret = (void *)*mem_start;
@@ -2042,7 +2044,7 @@ static void __init flatten_device_tree(void)
  
         /*
          * Check how much room we have between alloc top & bottom (+/- a
-        * few pages), crop to 4Mb, as this is our "chuck" size
+        * few pages), crop to 1MB, as this is our "chunk" size
          */
         room = RELOC(alloc_top) - RELOC(alloc_bottom) - 0x4000;
         if (room > DEVTREE_CHUNK_SIZE)
@@ -2053,7 +2055,7 @@ static void __init flatten_device_tree(void)
         mem_start = (unsigned long)alloc_up(room, PAGE_SIZE);
         if (mem_start == 0)
                 prom_panic("Can't allocate initial device-tree chunk\n");
-       mem_end = RELOC(alloc_top);
+       mem_end = mem_start + room;
  
         /* Get root of tree */
         root = call_prom("peer", 1, 1, (phandle)0);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S

index 6dd33581a22814afc30f0168ebd7ea21ae53e911..de2950135e6ef2e152948983c631e302469ba281 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1251,7 +1251,7 @@ BEGIN_FTR_SECTION
         reg = 0
         .rept   32
         li      r6,reg*16+VCPU_VSRS
-       stxvd2x reg,r6,r3
+       STXVD2X(reg,r6,r3)
         reg = reg + 1
         .endr
  FTR_SECTION_ELSE
@@ -1313,7 +1313,7 @@ BEGIN_FTR_SECTION
         reg = 0
         .rept   32
         li      r7,reg*16+VCPU_VSRS
-       lxvd2x  reg,r7,r4
+       LXVD2X(reg,r7,r4)
         reg = reg + 1
         .endr
  FTR_SECTION_ELSE
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig

index d0af7fb2f3441c6113671b5ee51f95d3a717a8e4..b9ba86191aedba2a7d50901756733addc3601998 100644 (file)
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -24,7 +24,7 @@ source "arch/powerpc/platforms/wsp/Kconfig"
  
  config KVM_GUEST
         bool "KVM Guest support"
-       default y
+       default n
         ---help---
           This option enables various optimizations for running under the KVM
           hypervisor. Overhead for the kernel when not running inside KVM should
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c

index e9190073bb9791cb3a3486d05c336b430ef36e63..0e8656370063fe6bf005e59032a67c414efe729f 100644 (file)
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -181,7 +181,7 @@ static void dtl_stop(struct dtl *dtl)
  
         lppaca_of(dtl->cpu).dtl_enable_mask = 0x0;
  
-       unregister_dtl(hwcpu, __pa(dtl->buf));
+       unregister_dtl(hwcpu);
  }
  
  static u64 dtl_current_index(struct dtl *dtl)
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c

index bc0288501f17c41388b998f7ac722337ad4c7821..83a3ca2fd2823a3101744b847cc09d5f0fb1eb33 100644 (file)
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -135,7 +135,7 @@ static void pseries_mach_cpu_die(void)
                 get_lppaca()->idle = 0;
  
                 if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) {
-                       unregister_slb_shadow(hwcpu, __pa(get_slb_shadow()));
+                       unregister_slb_shadow(hwcpu);
  
                         /*
                          * Call to start_secondary_resume() will not return.
@@ -150,7 +150,7 @@ static void pseries_mach_cpu_die(void)
         WARN_ON(get_preferred_offline_state(cpu) != CPU_STATE_OFFLINE);
  
         set_cpu_current_state(cpu, CPU_STATE_OFFLINE);
-       unregister_slb_shadow(hwcpu, __pa(get_slb_shadow()));
+       unregister_slb_shadow(hwcpu);
         rtas_stop_self();
  
         /* Should never get here... */
diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c

index c829e6067d545e05b66a023b3237508c21061f65..2c4dd1fb833331239ebeacf77d59cb8672bdf4ad 100644 (file)
--- a/arch/powerpc/platforms/pseries/io_event_irq.c
+++ b/arch/powerpc/platforms/pseries/io_event_irq.c
@@ -212,17 +212,15 @@ static int __init ioei_init(void)
         struct device_node *np;
  
         ioei_check_exception_token = rtas_token("check-exception");
-       if (ioei_check_exception_token == RTAS_UNKNOWN_SERVICE) {
-               pr_warning("IO Event IRQ not supported on this system !\n");
+       if (ioei_check_exception_token == RTAS_UNKNOWN_SERVICE)
                 return -ENODEV;
-       }
+
         np = of_find_node_by_path("/event-sources/ibm,io-events");
         if (np) {
                 request_event_sources_irqs(np, ioei_interrupt, "IO_EVENT");
+               pr_info("IBM I/O event interrupts enabled\n");
                 of_node_put(np);
         } else {
-               pr_err("io_event_irq: No ibm,io-events on system! "
-                      "IO Event interrupt disabled.\n");
                 return -ENODEV;
         }
         return 0;
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c

index 54cf3a4aa16b31d58214655c1b69d209a7a335ac..7d94bdc63d5075641e6924033c3a912fc186b9fe 100644 (file)
--- a/arch/powerpc/platforms/pseries/kexec.c
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -25,20 +25,30 @@ static void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
  {
         /* Don't risk a hypervisor call if we're crashing */
         if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) {
-               unsigned long addr;
+               int ret;
+               int cpu = smp_processor_id();
+               int hwcpu = hard_smp_processor_id();
  
-               addr = __pa(get_slb_shadow());
-               if (unregister_slb_shadow(hard_smp_processor_id(), addr))
-                       printk("SLB shadow buffer deregistration of "
-                              "cpu %u (hw_cpu_id %d) failed\n",
-                              smp_processor_id(),
-                              hard_smp_processor_id());
+               if (get_lppaca()->dtl_enable_mask) {
+                       ret = unregister_dtl(hwcpu);
+                       if (ret) {
+                               pr_err("WARNING: DTL deregistration for cpu "
+                                      "%d (hw %d) failed with %d\n",
+                                      cpu, hwcpu, ret);
+                       }
+               }
+
+               ret = unregister_slb_shadow(hwcpu);
+               if (ret) {
+                       pr_err("WARNING: SLB shadow buffer deregistration "
+                              "for cpu %d (hw %d) failed with %d\n",
+                              cpu, hwcpu, ret);
+               }
  
-               addr = __pa(get_lppaca());
-               if (unregister_vpa(hard_smp_processor_id(), addr)) {
-                       printk("VPA deregistration of cpu %u (hw_cpu_id %d) "
-                                       "failed\n", smp_processor_id(),
-                                       hard_smp_processor_id());
+               ret = unregister_vpa(hwcpu);
+               if (ret) {
+                       pr_err("WARNING: VPA deregistration for cpu %d "
+                              "(hw %d) failed with %d\n", cpu, hwcpu, ret);
                 }
         }
  }
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c

index f7205d344efd7577c8069bfc565dc21506101e4f..c9a29dae8c0538359e60a3a377484277770c1bc3 100644 (file)
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -67,9 +67,8 @@ void vpa_init(int cpu)
         ret = register_vpa(hwcpu, addr);
  
         if (ret) {
-               printk(KERN_ERR "WARNING: vpa_init: VPA registration for "
-                               "cpu %d (hw %d) of area %lx returns %ld\n",
-                               cpu, hwcpu, addr, ret);
+               pr_err("WARNING: VPA registration for cpu %d (hw %d) of area "
+                      "%lx failed with %ld\n", cpu, hwcpu, addr, ret);
                 return;
         }
         /*
@@ -80,10 +79,9 @@ void vpa_init(int cpu)
         if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
                 ret = register_slb_shadow(hwcpu, addr);
                 if (ret)
-                       printk(KERN_ERR
-                              "WARNING: vpa_init: SLB shadow buffer "
-                              "registration for cpu %d (hw %d) of area %lx "
-                              "returns %ld\n", cpu, hwcpu, addr, ret);
+                       pr_err("WARNING: SLB shadow buffer registration for "
+                              "cpu %d (hw %d) of area %lx failed with %ld\n",
+                              cpu, hwcpu, addr, ret);
         }
  
         /*
@@ -100,8 +98,9 @@ void vpa_init(int cpu)
                 dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES;
                 ret = register_dtl(hwcpu, __pa(dtl));
                 if (ret)
-                       pr_warn("DTL registration failed for cpu %d (%ld)\n",
-                               cpu, ret);
+                       pr_err("WARNING: DTL registration of cpu %d (hw %d) "
+                              "failed with %ld\n", smp_processor_id(),
+                              hwcpu, ret);
                 lppaca_of(cpu).dtl_enable_mask = 2;
         }
  }
@@ -204,7 +203,7 @@ static void pSeries_lpar_hptab_clear(void)
                 unsigned long ptel;
         } ptes[4];
         long lpar_rc;
-       int i, j;
+       unsigned long i, j;
  
         /* Read in batches of 4,
          * invalidate only valid entries not in the VRMA
diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h

index 4bf21207d7d3b17e95e3a2e1cda7387fd531dd0f..41c24c146d6a9e8f06403272944a116860091ce3 100644 (file)
--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
+++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h
@@ -53,9 +53,9 @@ static inline long vpa_call(unsigned long flags, unsigned long cpu,
         return plpar_hcall_norets(H_REGISTER_VPA, flags, cpu, vpa);
  }
  
-static inline long unregister_vpa(unsigned long cpu, unsigned long vpa)
+static inline long unregister_vpa(unsigned long cpu)
  {
-       return vpa_call(0x5, cpu, vpa);
+       return vpa_call(0x5, cpu, 0);
  }
  
  static inline long register_vpa(unsigned long cpu, unsigned long vpa)
@@ -63,9 +63,9 @@ static inline long register_vpa(unsigned long cpu, unsigned long vpa)
         return vpa_call(0x1, cpu, vpa);
  }
  
-static inline long unregister_slb_shadow(unsigned long cpu, unsigned long vpa)
+static inline long unregister_slb_shadow(unsigned long cpu)
  {
-       return vpa_call(0x7, cpu, vpa);
+       return vpa_call(0x7, cpu, 0);
  }
  
  static inline long register_slb_shadow(unsigned long cpu, unsigned long vpa)
@@ -73,9 +73,9 @@ static inline long register_slb_shadow(unsigned long cpu, unsigned long vpa)
         return vpa_call(0x3, cpu, vpa);
  }
  
-static inline long unregister_dtl(unsigned long cpu, unsigned long vpa)
+static inline long unregister_dtl(unsigned long cpu)
  {
-       return vpa_call(0x6, cpu, vpa);
+       return vpa_call(0x6, cpu, 0);
  }
  
  static inline long register_dtl(unsigned long cpu, unsigned long vpa)
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c

index d00e52926b71e3ae76a08d809881008a620ed626..0969fd98c4faa8741e7e18f445755b8ad9048322 100644 (file)
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -324,8 +324,9 @@ static int alloc_dispatch_logs(void)
         dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES;
         ret = register_dtl(hard_smp_processor_id(), __pa(dtl));
         if (ret)
-               pr_warn("DTL registration failed for boot cpu %d (%d)\n",
-                       smp_processor_id(), ret);
+               pr_err("WARNING: DTL registration of cpu %d (hw %d) failed "
+                      "with %d\n", smp_processor_id(),
+                      hard_smp_processor_id(), ret);
         get_paca()->lppaca_ptr->dtl_enable_mask = 2;
  
         return 0;
diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c

index 2de8551df40fc2ab6fc9bd0570a6fe83bf9994bf..c65f75aa7ff7be40f4f16bda3fcfd39d3eb45fd5 100644 (file)
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -54,6 +54,7 @@
  #define ODSR_CLEAR             0x1c00
  #define LTLEECSR_ENABLE_ALL    0xFFC000FC
  #define ESCSR_CLEAR            0x07120204
+#define IECSR_CLEAR            0x80000000
  
  #define RIO_PORT1_EDCSR                0x0640
  #define RIO_PORT2_EDCSR                0x0680
@@ -1089,11 +1090,11 @@ static void port_error_handler(struct rio_mport *port, int offset)
  
         if (offset == 0) {
                 out_be32((u32 *)(rio_regs_win + RIO_PORT1_EDCSR), 0);
-               out_be32((u32 *)(rio_regs_win + RIO_PORT1_IECSR), 0);
+               out_be32((u32 *)(rio_regs_win + RIO_PORT1_IECSR), IECSR_CLEAR);
                 out_be32((u32 *)(rio_regs_win + RIO_ESCSR), ESCSR_CLEAR);
         } else {
                 out_be32((u32 *)(rio_regs_win + RIO_PORT2_EDCSR), 0);
-               out_be32((u32 *)(rio_regs_win + RIO_PORT2_IECSR), 0);
+               out_be32((u32 *)(rio_regs_win + RIO_PORT2_IECSR), IECSR_CLEAR);
                 out_be32((u32 *)(rio_regs_win + RIO_PORT2_ESCSR), ESCSR_CLEAR);
         }
  }
diff --git a/arch/powerpc/sysdev/ppc4xx_pci.c b/arch/powerpc/sysdev/ppc4xx_pci.c

index a59ba96d2c216b296005f6a30c17523c1562e1c4..dbfe96bc878a2578c9171266012323b085e6c8b6 100644 (file)
--- a/arch/powerpc/sysdev/ppc4xx_pci.c
+++ b/arch/powerpc/sysdev/ppc4xx_pci.c
@@ -655,8 +655,6 @@ struct ppc4xx_pciex_hwops
  
  static struct ppc4xx_pciex_hwops *ppc4xx_pciex_hwops;
  
-#ifdef CONFIG_44x
-
  static int __init ppc4xx_pciex_wait_on_sdr(struct ppc4xx_pciex_port *port,
                                            unsigned int sdr_offset,
                                            unsigned int mask,
@@ -688,6 +686,7 @@ static int __init ppc4xx_pciex_port_reset_sdr(struct ppc4xx_pciex_port *port)
         return 0;
  }
  
+
  static void __init ppc4xx_pciex_check_link_sdr(struct ppc4xx_pciex_port *port)
  {
         printk(KERN_INFO "PCIE%d: Checking link...\n", port->index);
@@ -718,6 +717,8 @@ static void __init ppc4xx_pciex_check_link_sdr(struct ppc4xx_pciex_port *port)
                 printk(KERN_INFO "PCIE%d: No device detected.\n", port->index);
  }
  
+#ifdef CONFIG_44x
+
  /* Check various reset bits of the 440SPe PCIe core */
  static int __init ppc440spe_pciex_check_reset(struct device_node *np)
  {
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S

index 08ab9aa6a0d574027f0dcd48ea9840bf2984ee96..7526db6bf501f8a5394f56dfc0cadfc343ef82e6 100644 (file)
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -665,12 +665,6 @@ ENTRY(sys32_poll_wrapper)
         lgfr    %r4,%r4                 # long
         jg      sys_poll                # branch to system call
  
-ENTRY(compat_sys_nfsservctl_wrapper)
-       lgfr    %r2,%r2                 # int
-       llgtr   %r3,%r3                 # struct compat_nfsctl_arg*
-       llgtr   %r4,%r4                 # union compat_nfsctl_res*
-       jg      compat_sys_nfsservctl   # branch to system call
-
  ENTRY(sys32_setresgid16_wrapper)
         llgfr   %r2,%r2                 # __kernel_old_gid_emu31_t
         llgfr   %r3,%r3                 # __kernel_old_gid_emu31_t
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c

index 068f8465c4ee07c0156978517f9efe88dcc5e96a..f297456dba7a9eb3282fe0377097e99f97a488d0 100644 (file)
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -396,17 +396,19 @@ static __init void detect_machine_facilities(void)
  static __init void rescue_initrd(void)
  {
  #ifdef CONFIG_BLK_DEV_INITRD
+       unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20);
         /*
-        * Move the initrd right behind the bss section in case it starts
-        * within the bss section. So we don't overwrite it when the bss
-        * section gets cleared.
+        * Just like in case of IPL from VM reader we make sure there is a
+        * gap of 4MB between end of kernel and start of initrd.
+        * That way we can also be sure that saving an NSS will succeed,
+        * which however only requires different segments.
          */
         if (!INITRD_START || !INITRD_SIZE)
                 return;
-       if (INITRD_START >= (unsigned long) __bss_stop)
+       if (INITRD_START >= min_initrd_addr)
                 return;
-       memmove(__bss_stop, (void *) INITRD_START, INITRD_SIZE);
-       INITRD_START = (unsigned long) __bss_stop;
+       memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE);
+       INITRD_START = min_initrd_addr;
  #endif
  }
  
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c

index 04361d5a42794524419bd1898ac78ef755491449..48c710206366308270e70a846c268c9e5cc8ea9f 100644 (file)
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -1220,7 +1220,7 @@ static int __init reipl_fcp_init(void)
         /* sysfs: create fcp kset for mixing attr group and bin attrs */
         reipl_fcp_kset = kset_create_and_add(IPL_FCP_STR, NULL,
                                              &reipl_kset->kobj);
-       if (!reipl_kset) {
+       if (!reipl_fcp_kset) {
                 free_page((unsigned long) reipl_block_fcp);
                 return -ENOMEM;
         }
@@ -1618,7 +1618,8 @@ static struct shutdown_action vmcmd_action = {SHUTDOWN_ACTION_VMCMD_STR,
  
  static void stop_run(struct shutdown_trigger *trigger)
  {
-       if (strcmp(trigger->name, ON_PANIC_STR) == 0)
+       if (strcmp(trigger->name, ON_PANIC_STR) == 0 ||
+           strcmp(trigger->name, ON_RESTART_STR) == 0)
                 disabled_wait((unsigned long) __builtin_return_address(0));
         while (sigp(smp_processor_id(), sigp_stop) == sigp_busy)
                 cpu_relax();
@@ -1717,7 +1718,7 @@ static void do_panic(void)
  /* on restart */
  
  static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR,
-       &reipl_action};
+       &stop_action};
  
  static ssize_t on_restart_show(struct kobject *kobj,
                                struct kobj_attribute *attr, char *page)
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S

index 6ee39ef8fe4add9f0c47dd33ba52aa3050842ead..73eb08c874fb450ef6ba0464d4aa4ca845bb6911 100644 (file)
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -177,7 +177,7 @@ SYSCALL(sys_getresuid16,sys_ni_syscall,sys32_getresuid16_wrapper)   /* 165 old get
  NI_SYSCALL                                                     /* for vm86 */
  NI_SYSCALL                                                     /* old sys_query_module */
  SYSCALL(sys_poll,sys_poll,sys32_poll_wrapper)
-SYSCALL(sys_nfsservctl,sys_nfsservctl,compat_sys_nfsservctl_wrapper)
+NI_SYSCALL                                                     /* old nfsservctl */
  SYSCALL(sys_setresgid16,sys_ni_syscall,sys32_setresgid16_wrapper)      /* 170 old setresgid16 syscall */
  SYSCALL(sys_getresgid16,sys_ni_syscall,sys32_getresgid16_wrapper)      /* old getresgid16 syscall */
  SYSCALL(sys_prctl,sys_prctl,sys32_prctl_wrapper)
diff --git a/arch/sh/include/asm/ptrace.h b/arch/sh/include/asm/ptrace.h

index b97baf81a87bb8afda8a4d1558786cd14afa4f0b..2d3679b2447f262c4c83eebe5e6ff89890c2e0a2 100644 (file)
--- a/arch/sh/include/asm/ptrace.h
+++ b/arch/sh/include/asm/ptrace.h
@@ -123,7 +123,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
  struct perf_event;
  struct perf_sample_data;
  
-extern void ptrace_triggered(struct perf_event *bp, int nmi,
+extern void ptrace_triggered(struct perf_event *bp,
                       struct perf_sample_data *data, struct pt_regs *regs);
  
  #define task_pt_regs(task) \
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7757.c b/arch/sh/kernel/cpu/sh4a/setup-sh7757.c

index e915deafac89ba8af2774272a3ec04545bae9c1a..05559295d2ca8e8fa184e50bcca0a4b224e9d308 100644 (file)
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7757.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7757.c
@@ -15,6 +15,7 @@
  #include <linux/serial_sci.h>
  #include <linux/io.h>
  #include <linux/mm.h>
+#include <linux/dma-mapping.h>
  #include <linux/sh_timer.h>
  #include <linux/sh_dma.h>
  
diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c

index 32114e0941ae7616aae53de88a9f79b895e973db..db4ecd731a003792783ddbc524fcc60f020d4bf7 100644 (file)
--- a/arch/sh/kernel/idle.c
+++ b/arch/sh/kernel/idle.c
@@ -22,7 +22,7 @@
  #include <linux/atomic.h>
  #include <asm/smp.h>
  
-static void (*pm_idle)(void);
+void (*pm_idle)(void);
  
  static int hlt_counter;
  
diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S

index 39b051de4c7ca4d6c7d13fb983232bf2be1f26f6..293e39c59c00522c08e99a9877fcafc36836152b 100644 (file)
--- a/arch/sh/kernel/syscalls_32.S
+++ b/arch/sh/kernel/syscalls_32.S
@@ -185,7 +185,7 @@ ENTRY(sys_call_table)
         .long sys_ni_syscall    /* vm86 */
         .long sys_ni_syscall    /* old "query_module" */
         .long sys_poll
-       .long sys_nfsservctl
+       .long sys_ni_syscall    /* was nfsservctl */
         .long sys_setresgid16   /* 170 */
         .long sys_getresgid16
         .long sys_prctl
diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S

index 089c4d825d087a27585a295ee7509d409a76b786..ceb34b94afa9cb08c6f396cbd29c84d937375c2d 100644 (file)
--- a/arch/sh/kernel/syscalls_64.S
+++ b/arch/sh/kernel/syscalls_64.S
@@ -189,7 +189,7 @@ sys_call_table:
         .long sys_ni_syscall    /* vm86 */
         .long sys_ni_syscall    /* old "query_module" */
         .long sys_poll
-       .long sys_nfsservctl
+       .long sys_ni_syscall    /* was nfsservctl */
         .long sys_setresgid16           /* 170 */
         .long sys_getresgid16
         .long sys_prctl
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c

index d9006f8ffc142532d99b0ff539f88831f4027999..7bbef95c9d1b4eb8daa1ffd055d57ea5e8bc6af2 100644 (file)
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -316,6 +316,35 @@ static int handle_unaligned_ins(insn_size_t instruction, struct pt_regs *regs,
                         break;
                 }
                 break;
+
+       case 9: /* mov.w @(disp,PC),Rn */
+               srcu = (unsigned char __user *)regs->pc;
+               srcu += 4;
+               srcu += (instruction & 0x00FF) << 1;
+               dst = (unsigned char *)rn;
+               *(unsigned long *)dst = 0;
+
+#if !defined(__LITTLE_ENDIAN__)
+               dst += 2;
+#endif
+
+               if (ma->from(dst, srcu, 2))
+                       goto fetch_fault;
+               sign_extend(2, dst);
+               ret = 0;
+               break;
+
+       case 0xd: /* mov.l @(disp,PC),Rn */
+               srcu = (unsigned char __user *)(regs->pc & ~0x3);
+               srcu += 4;
+               srcu += (instruction & 0x00FF) << 2;
+               dst = (unsigned char *)rn;
+               *(unsigned long *)dst = 0;
+
+               if (ma->from(dst, srcu, 4))
+                       goto fetch_fault;
+               ret = 0;
+               break;
         }
         return ret;
  
@@ -466,6 +495,7 @@ int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
                 case 0x0500: /* mov.w @(disp,Rm),R0 */
                         goto simple;
                 case 0x0B00: /* bf   lab - no delayslot*/
+                       ret = 0;
                         break;
                 case 0x0F00: /* bf/s lab */
                         ret = handle_delayslot(regs, instruction, ma);
@@ -479,6 +509,7 @@ int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
                         }
                         break;
                 case 0x0900: /* bt   lab - no delayslot */
+                       ret = 0;
                         break;
                 case 0x0D00: /* bt/s lab */
                         ret = handle_delayslot(regs, instruction, ma);
@@ -494,6 +525,9 @@ int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
                 }
                 break;
  
+       case 0x9000: /* mov.w @(disp,Rm),Rn */
+               goto simple;
+
         case 0xA000: /* bra label */
                 ret = handle_delayslot(regs, instruction, ma);
                 if (ret==0)
@@ -507,6 +541,9 @@ int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
                         regs->pc += SH_PC_12BIT_OFFSET(instruction);
                 }
                 break;
+
+       case 0xD000: /* mov.l @(disp,Rm),Rn */
+               goto simple;
         }
         return ret;
  
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig

index 42c67beadcae2367e9cb7107bb125d188b3b4443..1a6f20d4e7e6b4648aed89fb7c986a672770ad29 100644 (file)
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -55,6 +55,7 @@ config SPARC64
         select PERF_USE_VMALLOC
         select IRQ_PREFLOW_FASTEOI
         select ARCH_HAVE_NMI_SAFE_CMPXCHG
+       select HAVE_C_RECORDMCOUNT
  
  config ARCH_DEFCONFIG
         string
diff --git a/arch/sparc/include/asm/sigcontext.h b/arch/sparc/include/asm/sigcontext.h

index a1607d1803547cd075090f6835839e0f3236910e..69914d748130a9b151d4f4c8f17a496fe2d5d39e 100644 (file)
--- a/arch/sparc/include/asm/sigcontext.h
+++ b/arch/sparc/include/asm/sigcontext.h
@@ -45,6 +45,19 @@ typedef struct {
         int                     si_mask;
  } __siginfo32_t;
  
+#define __SIGC_MAXWIN  7
+
+typedef struct {
+       unsigned long locals[8];
+       unsigned long ins[8];
+} __siginfo_reg_window;
+
+typedef struct {
+       int                     wsaved;
+       __siginfo_reg_window    reg_window[__SIGC_MAXWIN];
+       unsigned long           rwbuf_stkptrs[__SIGC_MAXWIN];
+} __siginfo_rwin_t;
+
  #ifdef CONFIG_SPARC64
  typedef struct {
         unsigned   int si_float_regs [64];
@@ -73,6 +86,7 @@ struct sigcontext {
                 unsigned long   ss_size;
         }                       sigc_stack;
         unsigned long           sigc_mask;
+       __siginfo_rwin_t *      sigc_rwin_save;
  };
  
  #else
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h

index 5f5b8bf3f50d39a5c4b8cae94231cdd87543bbfa..bcc98fc35281c73bed1ae1e35cb4d56aade44794 100644 (file)
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -131,6 +131,15 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
         *(volatile __u32 *)&lp->lock = ~0U;
  }
  
+static void inline arch_write_unlock(arch_rwlock_t *lock)
+{
+       __asm__ __volatile__(
+"      st              %%g0, [%0]"
+       : /* no outputs */
+       : "r" (lock)
+       : "memory");
+}
+
  static inline int arch_write_trylock(arch_rwlock_t *rw)
  {
         unsigned int val;
@@ -175,8 +184,6 @@ static inline int __arch_read_trylock(arch_rwlock_t *rw)
         res; \
  })
  
-#define arch_write_unlock(rw)  do { (rw)->lock = 0; } while(0)
-
  #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
  #define arch_read_lock_flags(rw, flags)   arch_read_lock(rw)
  #define arch_write_lock_flags(rw, flags)  arch_write_lock(rw)
diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h

index 073936a8b2755e1c15e637ff28c5bc607876f4dc..9689176949781fc5ae8985a794def4521e6ba72b 100644 (file)
--- a/arch/sparc/include/asm/spinlock_64.h
+++ b/arch/sparc/include/asm/spinlock_64.h
@@ -210,14 +210,8 @@ static int inline arch_write_trylock(arch_rwlock_t *lock)
         return result;
  }
  
-#define arch_read_lock(p)      arch_read_lock(p)
  #define arch_read_lock_flags(p, f) arch_read_lock(p)
-#define arch_read_trylock(p)   arch_read_trylock(p)
-#define arch_read_unlock(p)    arch_read_unlock(p)
-#define arch_write_lock(p)     arch_write_lock(p)
  #define arch_write_lock_flags(p, f) arch_write_lock(p)
-#define arch_write_unlock(p)   arch_write_unlock(p)
-#define arch_write_trylock(p)  arch_write_trylock(p)
  
  #define arch_read_can_lock(rw)         (!((rw)->lock & 0x80000000UL))
  #define arch_write_can_lock(rw)        (!(rw)->lock)
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile

index b90b4a1d070ad3e33b6b2ba6e3de59327c59677d..cb85458f89d2c495dd80e6f649e865eea6d71929 100644 (file)
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_SPARC32)   += sun4m_irq.o sun4c_irq.o sun4d_irq.o
  
  obj-y                   += process_$(BITS).o
  obj-y                   += signal_$(BITS).o
+obj-y                   += sigutil_$(BITS).o
  obj-$(CONFIG_SPARC32)   += ioport.o
  obj-y                   += setup_$(BITS).o
  obj-y                   += idprom.o
diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c

index 490e5418740df767b693fe7a7ef023e6714f4e29..7429b47c3acad8adb97ff177c4459fafcb1ae1e3 100644 (file)
--- a/arch/sparc/kernel/ds.c
+++ b/arch/sparc/kernel/ds.c
@@ -1256,13 +1256,14 @@ static int __init ds_init(void)
  {
         unsigned long hv_ret, major, minor;
  
-       hv_ret = sun4v_get_version(HV_GRP_REBOOT_DATA, &major, &minor);
-       if (hv_ret == HV_EOK) {
-               pr_info("SUN4V: Reboot data supported (maj=%lu,min=%lu).\n",
-                       major, minor);
-               reboot_data_supported = 1;
+       if (tlb_type == hypervisor) {
+               hv_ret = sun4v_get_version(HV_GRP_REBOOT_DATA, &major, &minor);
+               if (hv_ret == HV_EOK) {
+                       pr_info("SUN4V: Reboot data supported (maj=%lu,min=%lu).\n",
+                               major, minor);
+                       reboot_data_supported = 1;
+               }
         }
-
         kthread_run(ds_thread, NULL, "kldomd");
  
         return vio_register_driver(&ds_driver);
diff --git a/arch/sparc/kernel/irq.h b/arch/sparc/kernel/irq.h

index 100b9c204e78f5143c13ae9beb3a6256d28165e5..42851122bbd9c35b184a22454540fbc182e1c7e9 100644 (file)
--- a/arch/sparc/kernel/irq.h
+++ b/arch/sparc/kernel/irq.h
@@ -88,7 +88,7 @@ BTFIXUPDEF_CALL(void, set_irq_udt, int)
  #define set_irq_udt(cpu) BTFIXUP_CALL(set_irq_udt)(cpu)
  
  /* All SUN4D IPIs are sent on this IRQ, may be shared with hard IRQs */
-#define SUN4D_IPI_IRQ 14
+#define SUN4D_IPI_IRQ 13
  
  extern void sun4d_ipi_interrupt(void);
  
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c

index a19f04195478a425d22a3da2264b8cf2aa3ce966..1aaf8c180be5d82544df54983ccaaa4ef5c7c071 100644 (file)
--- a/arch/sparc/kernel/pcic.c
+++ b/arch/sparc/kernel/pcic.c
@@ -352,8 +352,8 @@ int __init pcic_probe(void)
         strcpy(pbm->prom_name, namebuf);
  
         {
-               extern volatile int t_nmi[1];
-               extern int pcic_nmi_trap_patch[1];
+               extern volatile int t_nmi[4];
+               extern int pcic_nmi_trap_patch[4];
  
                 t_nmi[0] = pcic_nmi_trap_patch[0];
                 t_nmi[1] = pcic_nmi_trap_patch[1];
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c

index 3e9daea1653d38afcfee2f04d70d79af732c6c2d..3c5bb784214f5944e5fd838cca5836f2d968a699 100644 (file)
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -440,8 +440,14 @@ static void __init init_sparc64_elf_hwcap(void)
                         cap |= AV_SPARC_VIS;
                 if (tlb_type == cheetah || tlb_type == cheetah_plus)
                         cap |= AV_SPARC_VIS | AV_SPARC_VIS2;
-               if (tlb_type == cheetah_plus)
-                       cap |= AV_SPARC_POPC;
+               if (tlb_type == cheetah_plus) {
+                       unsigned long impl, ver;
+
+                       __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver));
+                       impl = ((ver >> 32) & 0xffff);
+                       if (impl == PANTHER_IMPL)
+                               cap |= AV_SPARC_POPC;
+               }
                 if (tlb_type == hypervisor) {
                         if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1)
                                 cap |= AV_SPARC_ASI_BLK_INIT;
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c

index 75fad425e249bc40559f98d14ead5699839bbbb8..1ba95aff5d5958cdeb1142a24f18ea0c63717722 100644 (file)
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -29,6 +29,8 @@
  #include <asm/visasm.h>
  #include <asm/compat_signal.h>
  
+#include "sigutil.h"
+
  #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
  
  /* This magic should be in g_upper[0] for all upper parts
@@ -44,14 +46,14 @@ typedef struct {
  struct signal_frame32 {
         struct sparc_stackf32   ss;
         __siginfo32_t           info;
-       /* __siginfo_fpu32_t * */ u32 fpu_save;
+       /* __siginfo_fpu_t * */ u32 fpu_save;
         unsigned int            insns[2];
         unsigned int            extramask[_COMPAT_NSIG_WORDS - 1];
         unsigned int            extra_size; /* Should be sizeof(siginfo_extra_v8plus_t) */
         /* Only valid if (info.si_regs.psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS */
         siginfo_extra_v8plus_t  v8plus;
-       __siginfo_fpu_t         fpu_state;
-};
+       /* __siginfo_rwin_t * */u32 rwin_save;
+} __attribute__((aligned(8)));
  
  typedef struct compat_siginfo{
         int si_signo;
@@ -110,18 +112,14 @@ struct rt_signal_frame32 {
         compat_siginfo_t        info;
         struct pt_regs32        regs;
         compat_sigset_t         mask;
-       /* __siginfo_fpu32_t * */ u32 fpu_save;
+       /* __siginfo_fpu_t * */ u32 fpu_save;
         unsigned int            insns[2];
         stack_t32               stack;
         unsigned int            extra_size; /* Should be sizeof(siginfo_extra_v8plus_t) */
         /* Only valid if (regs.psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS */
         siginfo_extra_v8plus_t  v8plus;
-       __siginfo_fpu_t         fpu_state;
-};
-
-/* Align macros */
-#define SF_ALIGNEDSZ  (((sizeof(struct signal_frame32) + 15) & (~15)))
-#define RT_ALIGNEDSZ  (((sizeof(struct rt_signal_frame32) + 15) & (~15)))
+       /* __siginfo_rwin_t * */u32 rwin_save;
+} __attribute__((aligned(8)));
  
  int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
  {
@@ -192,30 +190,13 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
         return 0;
  }
  
-static int restore_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-       unsigned long *fpregs = current_thread_info()->fpregs;
-       unsigned long fprs;
-       int err;
-       
-       err = __get_user(fprs, &fpu->si_fprs);
-       fprs_write(0);
-       regs->tstate &= ~TSTATE_PEF;
-       if (fprs & FPRS_DL)
-               err |= copy_from_user(fpregs, &fpu->si_float_regs[0], (sizeof(unsigned int) * 32));
-       if (fprs & FPRS_DU)
-               err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32], (sizeof(unsigned int) * 32));
-       err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
-       err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr);
-       current_thread_info()->fpsaved[0] |= fprs;
-       return err;
-}
-
  void do_sigreturn32(struct pt_regs *regs)
  {
         struct signal_frame32 __user *sf;
+       compat_uptr_t fpu_save;
+       compat_uptr_t rwin_save;
         unsigned int psr;
-       unsigned pc, npc, fpu_save;
+       unsigned pc, npc;
         sigset_t set;
         unsigned seta[_COMPAT_NSIG_WORDS];
         int err, i;
@@ -273,8 +254,13 @@ void do_sigreturn32(struct pt_regs *regs)
         pt_regs_clear_syscall(regs);
  
         err |= __get_user(fpu_save, &sf->fpu_save);
-       if (fpu_save)
-               err |= restore_fpu_state32(regs, &sf->fpu_state);
+       if (!err && fpu_save)
+               err |= restore_fpu_state(regs, compat_ptr(fpu_save));
+       err |= __get_user(rwin_save, &sf->rwin_save);
+       if (!err && rwin_save) {
+               if (restore_rwin_state(compat_ptr(rwin_save)))
+                       goto segv;
+       }
         err |= __get_user(seta[0], &sf->info.si_mask);
         err |= copy_from_user(seta+1, &sf->extramask,
                               (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int));
@@ -300,7 +286,9 @@ segv:
  asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
  {
         struct rt_signal_frame32 __user *sf;
-       unsigned int psr, pc, npc, fpu_save, u_ss_sp;
+       unsigned int psr, pc, npc, u_ss_sp;
+       compat_uptr_t fpu_save;
+       compat_uptr_t rwin_save;
         mm_segment_t old_fs;
         sigset_t set;
         compat_sigset_t seta;
@@ -359,8 +347,8 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
         pt_regs_clear_syscall(regs);
  
         err |= __get_user(fpu_save, &sf->fpu_save);
-       if (fpu_save)
-               err |= restore_fpu_state32(regs, &sf->fpu_state);
+       if (!err && fpu_save)
+               err |= restore_fpu_state(regs, compat_ptr(fpu_save));
         err |= copy_from_user(&seta, &sf->mask, sizeof(compat_sigset_t));
         err |= __get_user(u_ss_sp, &sf->stack.ss_sp);
         st.ss_sp = compat_ptr(u_ss_sp);
@@ -376,6 +364,12 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
         do_sigaltstack((stack_t __user *) &st, NULL, (unsigned long)sf);
         set_fs(old_fs);
         
+       err |= __get_user(rwin_save, &sf->rwin_save);
+       if (!err && rwin_save) {
+               if (restore_rwin_state(compat_ptr(rwin_save)))
+                       goto segv;
+       }
+
         switch (_NSIG_WORDS) {
                 case 4: set.sig[3] = seta.sig[6] + (((long)seta.sig[7]) << 32);
                 case 3: set.sig[2] = seta.sig[4] + (((long)seta.sig[5]) << 32);
@@ -433,26 +427,6 @@ static void __user *get_sigframe(struct sigaction *sa, struct pt_regs *regs, uns
         return (void __user *) sp;
  }
  
-static int save_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-       unsigned long *fpregs = current_thread_info()->fpregs;
-       unsigned long fprs;
-       int err = 0;
-       
-       fprs = current_thread_info()->fpsaved[0];
-       if (fprs & FPRS_DL)
-               err |= copy_to_user(&fpu->si_float_regs[0], fpregs,
-                                   (sizeof(unsigned int) * 32));
-       if (fprs & FPRS_DU)
-               err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16,
-                                   (sizeof(unsigned int) * 32));
-       err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
-       err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr);
-       err |= __put_user(fprs, &fpu->si_fprs);
-
-       return err;
-}
-
  /* The I-cache flush instruction only works in the primary ASI, which
   * right now is the nucleus, aka. kernel space.
   *
@@ -515,18 +489,23 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
                          int signo, sigset_t *oldset)
  {
         struct signal_frame32 __user *sf;
+       int i, err, wsaved;
+       void __user *tail;
         int sigframe_size;
         u32 psr;
-       int i, err;
         unsigned int seta[_COMPAT_NSIG_WORDS];
  
         /* 1. Make sure everything is clean */
         synchronize_user_stack();
         save_and_clear_fpu();
         
-       sigframe_size = SF_ALIGNEDSZ;
-       if (!(current_thread_info()->fpsaved[0] & FPRS_FEF))
-               sigframe_size -= sizeof(__siginfo_fpu_t);
+       wsaved = get_thread_wsaved();
+
+       sigframe_size = sizeof(*sf);
+       if (current_thread_info()->fpsaved[0] & FPRS_FEF)
+               sigframe_size += sizeof(__siginfo_fpu_t);
+       if (wsaved)
+               sigframe_size += sizeof(__siginfo_rwin_t);
  
         sf = (struct signal_frame32 __user *)
                 get_sigframe(&ka->sa, regs, sigframe_size);
@@ -534,8 +513,7 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
         if (invalid_frame_pointer(sf, sigframe_size))
                 goto sigill;
  
-       if (get_thread_wsaved() != 0)
-               goto sigill;
+       tail = (sf + 1);
  
         /* 2. Save the current process state */
         if (test_thread_flag(TIF_32BIT)) {
@@ -560,11 +538,22 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
                           &sf->v8plus.asi);
  
         if (psr & PSR_EF) {
-               err |= save_fpu_state32(regs, &sf->fpu_state);
-               err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save);
+               __siginfo_fpu_t __user *fp = tail;
+               tail += sizeof(*fp);
+               err |= save_fpu_state(regs, fp);
+               err |= __put_user((u64)fp, &sf->fpu_save);
         } else {
                 err |= __put_user(0, &sf->fpu_save);
         }
+       if (wsaved) {
+               __siginfo_rwin_t __user *rwp = tail;
+               tail += sizeof(*rwp);
+               err |= save_rwin_state(wsaved, rwp);
+               err |= __put_user((u64)rwp, &sf->rwin_save);
+               set_thread_wsaved(0);
+       } else {
+               err |= __put_user(0, &sf->rwin_save);
+       }
  
         switch (_NSIG_WORDS) {
         case 4: seta[7] = (oldset->sig[3] >> 32);
@@ -580,10 +569,21 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
         err |= __copy_to_user(sf->extramask, seta + 1,
                               (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int));
  
-       err |= copy_in_user((u32 __user *)sf,
-                           (u32 __user *)(regs->u_regs[UREG_FP]),
-                           sizeof(struct reg_window32));
-       
+       if (!wsaved) {
+               err |= copy_in_user((u32 __user *)sf,
+                                   (u32 __user *)(regs->u_regs[UREG_FP]),
+                                   sizeof(struct reg_window32));
+       } else {
+               struct reg_window *rp;
+
+               rp = &current_thread_info()->reg_window[wsaved - 1];
+               for (i = 0; i < 8; i++)
+                       err |= __put_user(rp->locals[i], &sf->ss.locals[i]);
+               for (i = 0; i < 6; i++)
+                       err |= __put_user(rp->ins[i], &sf->ss.ins[i]);
+               err |= __put_user(rp->ins[6], &sf->ss.fp);
+               err |= __put_user(rp->ins[7], &sf->ss.callers_pc);
+       }       
         if (err)
                 goto sigsegv;
  
@@ -613,7 +613,6 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
                 err |= __put_user(0x91d02010, &sf->insns[1]); /*t 0x10*/
                 if (err)
                         goto sigsegv;
-
                 flush_signal_insns(address);
         }
         return 0;
@@ -632,18 +631,23 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs,
                             siginfo_t *info)
  {
         struct rt_signal_frame32 __user *sf;
+       int i, err, wsaved;
+       void __user *tail;
         int sigframe_size;
         u32 psr;
-       int i, err;
         compat_sigset_t seta;
  
         /* 1. Make sure everything is clean */
         synchronize_user_stack();
         save_and_clear_fpu();
         
-       sigframe_size = RT_ALIGNEDSZ;
-       if (!(current_thread_info()->fpsaved[0] & FPRS_FEF))
-               sigframe_size -= sizeof(__siginfo_fpu_t);
+       wsaved = get_thread_wsaved();
+
+       sigframe_size = sizeof(*sf);
+       if (current_thread_info()->fpsaved[0] & FPRS_FEF)
+               sigframe_size += sizeof(__siginfo_fpu_t);
+       if (wsaved)
+               sigframe_size += sizeof(__siginfo_rwin_t);
  
         sf = (struct rt_signal_frame32 __user *)
                 get_sigframe(&ka->sa, regs, sigframe_size);
@@ -651,8 +655,7 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs,
         if (invalid_frame_pointer(sf, sigframe_size))
                 goto sigill;
  
-       if (get_thread_wsaved() != 0)
-               goto sigill;
+       tail = (sf + 1);
  
         /* 2. Save the current process state */
         if (test_thread_flag(TIF_32BIT)) {
@@ -677,11 +680,22 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs,
                           &sf->v8plus.asi);
  
         if (psr & PSR_EF) {
-               err |= save_fpu_state32(regs, &sf->fpu_state);
-               err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save);
+               __siginfo_fpu_t __user *fp = tail;
+               tail += sizeof(*fp);
+               err |= save_fpu_state(regs, fp);
+               err |= __put_user((u64)fp, &sf->fpu_save);
         } else {
                 err |= __put_user(0, &sf->fpu_save);
         }
+       if (wsaved) {
+               __siginfo_rwin_t __user *rwp = tail;
+               tail += sizeof(*rwp);
+               err |= save_rwin_state(wsaved, rwp);
+               err |= __put_user((u64)rwp, &sf->rwin_save);
+               set_thread_wsaved(0);
+       } else {
+               err |= __put_user(0, &sf->rwin_save);
+       }
  
         /* Update the siginfo structure.  */
         err |= copy_siginfo_to_user32(&sf->info, info);
@@ -703,9 +717,21 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs,
         }
         err |= __copy_to_user(&sf->mask, &seta, sizeof(compat_sigset_t));
  
-       err |= copy_in_user((u32 __user *)sf,
-                           (u32 __user *)(regs->u_regs[UREG_FP]),
-                           sizeof(struct reg_window32));
+       if (!wsaved) {
+               err |= copy_in_user((u32 __user *)sf,
+                                   (u32 __user *)(regs->u_regs[UREG_FP]),
+                                   sizeof(struct reg_window32));
+       } else {
+               struct reg_window *rp;
+
+               rp = &current_thread_info()->reg_window[wsaved - 1];
+               for (i = 0; i < 8; i++)
+                       err |= __put_user(rp->locals[i], &sf->ss.locals[i]);
+               for (i = 0; i < 6; i++)
+                       err |= __put_user(rp->ins[i], &sf->ss.ins[i]);
+               err |= __put_user(rp->ins[6], &sf->ss.fp);
+               err |= __put_user(rp->ins[7], &sf->ss.callers_pc);
+       }
         if (err)
                 goto sigsegv;
         
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c

index 5e5c5fd03783c997f5c344025e8f4784182a0ddc..04ede8f04add3f397ca46c8ebee249d85c92213c 100644 (file)
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -26,6 +26,8 @@
  #include <asm/pgtable.h>
  #include <asm/cacheflush.h>    /* flush_sig_insns */
  
+#include "sigutil.h"
+
  #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
  
  extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
@@ -39,8 +41,8 @@ struct signal_frame {
         unsigned long           insns[2] __attribute__ ((aligned (8)));
         unsigned int            extramask[_NSIG_WORDS - 1];
         unsigned int            extra_size; /* Should be 0 */
-       __siginfo_fpu_t         fpu_state;
-};
+       __siginfo_rwin_t __user *rwin_save;
+} __attribute__((aligned(8)));
  
  struct rt_signal_frame {
         struct sparc_stackf     ss;
@@ -51,8 +53,8 @@ struct rt_signal_frame {
         unsigned int            insns[2];
         stack_t                 stack;
         unsigned int            extra_size; /* Should be 0 */
-       __siginfo_fpu_t         fpu_state;
-};
+       __siginfo_rwin_t __user *rwin_save;
+} __attribute__((aligned(8)));
  
  /* Align macros */
  #define SF_ALIGNEDSZ  (((sizeof(struct signal_frame) + 7) & (~7)))
@@ -79,43 +81,13 @@ asmlinkage int sys_sigsuspend(old_sigset_t set)
         return _sigpause_common(set);
  }
  
-static inline int
-restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-       int err;
-#ifdef CONFIG_SMP
-       if (test_tsk_thread_flag(current, TIF_USEDFPU))
-               regs->psr &= ~PSR_EF;
-#else
-       if (current == last_task_used_math) {
-               last_task_used_math = NULL;
-               regs->psr &= ~PSR_EF;
-       }
-#endif
-       set_used_math();
-       clear_tsk_thread_flag(current, TIF_USEDFPU);
-
-       if (!access_ok(VERIFY_READ, fpu, sizeof(*fpu)))
-               return -EFAULT;
-
-       err = __copy_from_user(&current->thread.float_regs[0], &fpu->si_float_regs[0],
-                              (sizeof(unsigned long) * 32));
-       err |= __get_user(current->thread.fsr, &fpu->si_fsr);
-       err |= __get_user(current->thread.fpqdepth, &fpu->si_fpqdepth);
-       if (current->thread.fpqdepth != 0)
-               err |= __copy_from_user(&current->thread.fpqueue[0],
-                                       &fpu->si_fpqueue[0],
-                                       ((sizeof(unsigned long) +
-                                       (sizeof(unsigned long *)))*16));
-       return err;
-}
-
  asmlinkage void do_sigreturn(struct pt_regs *regs)
  {
         struct signal_frame __user *sf;
         unsigned long up_psr, pc, npc;
         sigset_t set;
         __siginfo_fpu_t __user *fpu_save;
+       __siginfo_rwin_t __user *rwin_save;
         int err;
  
         /* Always make any pending restarted system calls return -EINTR */
@@ -150,9 +122,11 @@ asmlinkage void do_sigreturn(struct pt_regs *regs)
         pt_regs_clear_syscall(regs);
  
         err |= __get_user(fpu_save, &sf->fpu_save);
-
         if (fpu_save)
                 err |= restore_fpu_state(regs, fpu_save);
+       err |= __get_user(rwin_save, &sf->rwin_save);
+       if (rwin_save)
+               err |= restore_rwin_state(rwin_save);
  
         /* This is pretty much atomic, no amount locking would prevent
          * the races which exist anyways.
@@ -180,6 +154,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
         struct rt_signal_frame __user *sf;
         unsigned int psr, pc, npc;
         __siginfo_fpu_t __user *fpu_save;
+       __siginfo_rwin_t __user *rwin_save;
         mm_segment_t old_fs;
         sigset_t set;
         stack_t st;
@@ -207,8 +182,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
         pt_regs_clear_syscall(regs);
  
         err |= __get_user(fpu_save, &sf->fpu_save);
-
-       if (fpu_save)
+       if (!err && fpu_save)
                 err |= restore_fpu_state(regs, fpu_save);
         err |= __copy_from_user(&set, &sf->mask, sizeof(sigset_t));
         
@@ -228,6 +202,12 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
         do_sigaltstack((const stack_t __user *) &st, NULL, (unsigned long)sf);
         set_fs(old_fs);
  
+       err |= __get_user(rwin_save, &sf->rwin_save);
+       if (!err && rwin_save) {
+               if (restore_rwin_state(rwin_save))
+                       goto segv;
+       }
+
         sigdelsetmask(&set, ~_BLOCKABLE);
         spin_lock_irq(&current->sighand->siglock);
         current->blocked = set;
@@ -280,53 +260,23 @@ static inline void __user *get_sigframe(struct sigaction *sa, struct pt_regs *re
         return (void __user *) sp;
  }
  
-static inline int
-save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-       int err = 0;
-#ifdef CONFIG_SMP
-       if (test_tsk_thread_flag(current, TIF_USEDFPU)) {
-               put_psr(get_psr() | PSR_EF);
-               fpsave(&current->thread.float_regs[0], &current->thread.fsr,
-                      &current->thread.fpqueue[0], &current->thread.fpqdepth);
-               regs->psr &= ~(PSR_EF);
-               clear_tsk_thread_flag(current, TIF_USEDFPU);
-       }
-#else
-       if (current == last_task_used_math) {
-               put_psr(get_psr() | PSR_EF);
-               fpsave(&current->thread.float_regs[0], &current->thread.fsr,
-                      &current->thread.fpqueue[0], &current->thread.fpqdepth);
-               last_task_used_math = NULL;
-               regs->psr &= ~(PSR_EF);
-       }
-#endif
-       err |= __copy_to_user(&fpu->si_float_regs[0],
-                             &current->thread.float_regs[0],
-                             (sizeof(unsigned long) * 32));
-       err |= __put_user(current->thread.fsr, &fpu->si_fsr);
-       err |= __put_user(current->thread.fpqdepth, &fpu->si_fpqdepth);
-       if (current->thread.fpqdepth != 0)
-               err |= __copy_to_user(&fpu->si_fpqueue[0],
-                                     &current->thread.fpqueue[0],
-                                     ((sizeof(unsigned long) +
-                                     (sizeof(unsigned long *)))*16));
-       clear_used_math();
-       return err;
-}
-
  static int setup_frame(struct k_sigaction *ka, struct pt_regs *regs,
                        int signo, sigset_t *oldset)
  {
         struct signal_frame __user *sf;
-       int sigframe_size, err;
+       int sigframe_size, err, wsaved;
+       void __user *tail;
  
         /* 1. Make sure everything is clean */
         synchronize_user_stack();
  
-       sigframe_size = SF_ALIGNEDSZ;
-       if (!used_math())
-               sigframe_size -= sizeof(__siginfo_fpu_t);
+       wsaved = current_thread_info()->w_saved;
+
+       sigframe_size = sizeof(*sf);
+       if (used_math())
+               sigframe_size += sizeof(__siginfo_fpu_t);
+       if (wsaved)
+               sigframe_size += sizeof(__siginfo_rwin_t);
  
         sf = (struct signal_frame __user *)
                 get_sigframe(&ka->sa, regs, sigframe_size);
@@ -334,8 +284,7 @@ static int setup_frame(struct k_sigaction *ka, struct pt_regs *regs,
         if (invalid_frame_pointer(sf, sigframe_size))
                 goto sigill_and_return;
  
-       if (current_thread_info()->w_saved != 0)
-               goto sigill_and_return;
+       tail = sf + 1;
  
         /* 2. Save the current process state */
         err = __copy_to_user(&sf->info.si_regs, regs, sizeof(struct pt_regs));
@@ -343,17 +292,34 @@ static int setup_frame(struct k_sigaction *ka, struct pt_regs *regs,
         err |= __put_user(0, &sf->extra_size);
  
         if (used_math()) {
-               err |= save_fpu_state(regs, &sf->fpu_state);
-               err |= __put_user(&sf->fpu_state, &sf->fpu_save);
+               __siginfo_fpu_t __user *fp = tail;
+               tail += sizeof(*fp);
+               err |= save_fpu_state(regs, fp);
+               err |= __put_user(fp, &sf->fpu_save);
         } else {
                 err |= __put_user(0, &sf->fpu_save);
         }
+       if (wsaved) {
+               __siginfo_rwin_t __user *rwp = tail;
+               tail += sizeof(*rwp);
+               err |= save_rwin_state(wsaved, rwp);
+               err |= __put_user(rwp, &sf->rwin_save);
+       } else {
+               err |= __put_user(0, &sf->rwin_save);
+       }
  
         err |= __put_user(oldset->sig[0], &sf->info.si_mask);
         err |= __copy_to_user(sf->extramask, &oldset->sig[1],
                               (_NSIG_WORDS - 1) * sizeof(unsigned int));
-       err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
-                             sizeof(struct reg_window32));
+       if (!wsaved) {
+               err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
+                                     sizeof(struct reg_window32));
+       } else {
+               struct reg_window32 *rp;
+
+               rp = &current_thread_info()->reg_window[wsaved - 1];
+               err |= __copy_to_user(sf, rp, sizeof(struct reg_window32));
+       }
         if (err)
                 goto sigsegv;
         
@@ -399,21 +365,24 @@ static int setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
                           int signo, sigset_t *oldset, siginfo_t *info)
  {
         struct rt_signal_frame __user *sf;
-       int sigframe_size;
+       int sigframe_size, wsaved;
+       void __user *tail;
         unsigned int psr;
         int err;
  
         synchronize_user_stack();
-       sigframe_size = RT_ALIGNEDSZ;
-       if (!used_math())
-               sigframe_size -= sizeof(__siginfo_fpu_t);
+       wsaved = current_thread_info()->w_saved;
+       sigframe_size = sizeof(*sf);
+       if (used_math())
+               sigframe_size += sizeof(__siginfo_fpu_t);
+       if (wsaved)
+               sigframe_size += sizeof(__siginfo_rwin_t);
         sf = (struct rt_signal_frame __user *)
                 get_sigframe(&ka->sa, regs, sigframe_size);
         if (invalid_frame_pointer(sf, sigframe_size))
                 goto sigill;
-       if (current_thread_info()->w_saved != 0)
-               goto sigill;
  
+       tail = sf + 1;
         err  = __put_user(regs->pc, &sf->regs.pc);
         err |= __put_user(regs->npc, &sf->regs.npc);
         err |= __put_user(regs->y, &sf->regs.y);
@@ -425,11 +394,21 @@ static int setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
         err |= __put_user(0, &sf->extra_size);
  
         if (psr & PSR_EF) {
-               err |= save_fpu_state(regs, &sf->fpu_state);
-               err |= __put_user(&sf->fpu_state, &sf->fpu_save);
+               __siginfo_fpu_t *fp = tail;
+               tail += sizeof(*fp);
+               err |= save_fpu_state(regs, fp);
+               err |= __put_user(fp, &sf->fpu_save);
         } else {
                 err |= __put_user(0, &sf->fpu_save);
         }
+       if (wsaved) {
+               __siginfo_rwin_t *rwp = tail;
+               tail += sizeof(*rwp);
+               err |= save_rwin_state(wsaved, rwp);
+               err |= __put_user(rwp, &sf->rwin_save);
+       } else {
+               err |= __put_user(0, &sf->rwin_save);
+       }
         err |= __copy_to_user(&sf->mask, &oldset->sig[0], sizeof(sigset_t));
         
         /* Setup sigaltstack */
@@ -437,8 +416,15 @@ static int setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
         err |= __put_user(sas_ss_flags(regs->u_regs[UREG_FP]), &sf->stack.ss_flags);
         err |= __put_user(current->sas_ss_size, &sf->stack.ss_size);
         
-       err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
-                             sizeof(struct reg_window32));
+       if (!wsaved) {
+               err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
+                                     sizeof(struct reg_window32));
+       } else {
+               struct reg_window32 *rp;
+
+               rp = &current_thread_info()->reg_window[wsaved - 1];
+               err |= __copy_to_user(sf, rp, sizeof(struct reg_window32));
+       }
  
         err |= copy_siginfo_to_user(&sf->info, info);
  
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c

index 006fe4515886dc6ae2a7a8e6cc9b6df9c16fda46..47509df3b893acfb365ec503cbe00f2dabeea2ac 100644 (file)
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -34,6 +34,7 @@
  
  #include "entry.h"
  #include "systbls.h"
+#include "sigutil.h"
  
  #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
  
@@ -236,7 +237,7 @@ struct rt_signal_frame {
         __siginfo_fpu_t __user  *fpu_save;
         stack_t                 stack;
         sigset_t                mask;
-       __siginfo_fpu_t         fpu_state;
+       __siginfo_rwin_t        *rwin_save;
  };
  
  static long _sigpause_common(old_sigset_t set)
@@ -266,33 +267,12 @@ asmlinkage long sys_sigsuspend(old_sigset_t set)
         return _sigpause_common(set);
  }
  
-static inline int
-restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-       unsigned long *fpregs = current_thread_info()->fpregs;
-       unsigned long fprs;
-       int err;
-
-       err = __get_user(fprs, &fpu->si_fprs);
-       fprs_write(0);
-       regs->tstate &= ~TSTATE_PEF;
-       if (fprs & FPRS_DL)
-               err |= copy_from_user(fpregs, &fpu->si_float_regs[0],
-                              (sizeof(unsigned int) * 32));
-       if (fprs & FPRS_DU)
-               err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32],
-                              (sizeof(unsigned int) * 32));
-       err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
-       err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr);
-       current_thread_info()->fpsaved[0] |= fprs;
-       return err;
-}
-
  void do_rt_sigreturn(struct pt_regs *regs)
  {
         struct rt_signal_frame __user *sf;
         unsigned long tpc, tnpc, tstate;
         __siginfo_fpu_t __user *fpu_save;
+       __siginfo_rwin_t __user *rwin_save;
         sigset_t set;
         int err;
  
@@ -325,8 +305,8 @@ void do_rt_sigreturn(struct pt_regs *regs)
         regs->tstate |= (tstate & (TSTATE_ASI | TSTATE_ICC | TSTATE_XCC));
  
         err |= __get_user(fpu_save, &sf->fpu_save);
-       if (fpu_save)
-               err |= restore_fpu_state(regs, &sf->fpu_state);
+       if (!err && fpu_save)
+               err |= restore_fpu_state(regs, fpu_save);
  
         err |= __copy_from_user(&set, &sf->mask, sizeof(sigset_t));
         err |= do_sigaltstack(&sf->stack, NULL, (unsigned long)sf);
@@ -334,6 +314,12 @@ void do_rt_sigreturn(struct pt_regs *regs)
         if (err)
                 goto segv;
  
+       err |= __get_user(rwin_save, &sf->rwin_save);
+       if (!err && rwin_save) {
+               if (restore_rwin_state(rwin_save))
+                       goto segv;
+       }
+
         regs->tpc = tpc;
         regs->tnpc = tnpc;
  
@@ -351,34 +337,13 @@ segv:
  }
  
  /* Checks if the fp is valid */
-static int invalid_frame_pointer(void __user *fp, int fplen)
+static int invalid_frame_pointer(void __user *fp)
  {
         if (((unsigned long) fp) & 15)
                 return 1;
         return 0;
  }
  
-static inline int
-save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
-{
-       unsigned long *fpregs = current_thread_info()->fpregs;
-       unsigned long fprs;
-       int err = 0;
-       
-       fprs = current_thread_info()->fpsaved[0];
-       if (fprs & FPRS_DL)
-               err |= copy_to_user(&fpu->si_float_regs[0], fpregs,
-                                   (sizeof(unsigned int) * 32));
-       if (fprs & FPRS_DU)
-               err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16,
-                                   (sizeof(unsigned int) * 32));
-       err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
-       err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr);
-       err |= __put_user(fprs, &fpu->si_fprs);
-
-       return err;
-}
-
  static inline void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, unsigned long framesize)
  {
         unsigned long sp = regs->u_regs[UREG_FP] + STACK_BIAS;
@@ -414,34 +379,48 @@ setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
                int signo, sigset_t *oldset, siginfo_t *info)
  {
         struct rt_signal_frame __user *sf;
-       int sigframe_size, err;
+       int wsaved, err, sf_size;
+       void __user *tail;
  
         /* 1. Make sure everything is clean */
         synchronize_user_stack();
         save_and_clear_fpu();
         
-       sigframe_size = sizeof(struct rt_signal_frame);
-       if (!(current_thread_info()->fpsaved[0] & FPRS_FEF))
-               sigframe_size -= sizeof(__siginfo_fpu_t);
+       wsaved = get_thread_wsaved();
  
+       sf_size = sizeof(struct rt_signal_frame);
+       if (current_thread_info()->fpsaved[0] & FPRS_FEF)
+               sf_size += sizeof(__siginfo_fpu_t);
+       if (wsaved)
+               sf_size += sizeof(__siginfo_rwin_t);
         sf = (struct rt_signal_frame __user *)
-               get_sigframe(ka, regs, sigframe_size);
-       
-       if (invalid_frame_pointer (sf, sigframe_size))
-               goto sigill;
+               get_sigframe(ka, regs, sf_size);
  
-       if (get_thread_wsaved() != 0)
+       if (invalid_frame_pointer (sf))
                 goto sigill;
  
+       tail = (sf + 1);
+
         /* 2. Save the current process state */
         err = copy_to_user(&sf->regs, regs, sizeof (*regs));
  
         if (current_thread_info()->fpsaved[0] & FPRS_FEF) {
-               err |= save_fpu_state(regs, &sf->fpu_state);
-               err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save);
+               __siginfo_fpu_t __user *fpu_save = tail;
+               tail += sizeof(__siginfo_fpu_t);
+               err |= save_fpu_state(regs, fpu_save);
+               err |= __put_user((u64)fpu_save, &sf->fpu_save);
         } else {
                 err |= __put_user(0, &sf->fpu_save);
         }
+       if (wsaved) {
+               __siginfo_rwin_t __user *rwin_save = tail;
+               tail += sizeof(__siginfo_rwin_t);
+               err |= save_rwin_state(wsaved, rwin_save);
+               err |= __put_user((u64)rwin_save, &sf->rwin_save);
+               set_thread_wsaved(0);
+       } else {
+               err |= __put_user(0, &sf->rwin_save);
+       }
         
         /* Setup sigaltstack */
         err |= __put_user(current->sas_ss_sp, &sf->stack.ss_sp);
@@ -450,10 +429,17 @@ setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
  
         err |= copy_to_user(&sf->mask, oldset, sizeof(sigset_t));
  
-       err |= copy_in_user((u64 __user *)sf,
-                           (u64 __user *)(regs->u_regs[UREG_FP]+STACK_BIAS),
-                           sizeof(struct reg_window));
+       if (!wsaved) {
+               err |= copy_in_user((u64 __user *)sf,
+                                   (u64 __user *)(regs->u_regs[UREG_FP] +
+                                                  STACK_BIAS),
+                                   sizeof(struct reg_window));
+       } else {
+               struct reg_window *rp;
  
+               rp = &current_thread_info()->reg_window[wsaved - 1];
+               err |= copy_to_user(sf, rp, sizeof(struct reg_window));
+       }
         if (info)
                 err |= copy_siginfo_to_user(&sf->info, info);
         else {
diff --git a/arch/sparc/kernel/sigutil.h b/arch/sparc/kernel/sigutil.h

new file mode 100644 (file)

index 0000000..d223aa4
--- /dev/null
+++ b/arch/sparc/kernel/sigutil.h
@@ -0,0 +1,9 @@
+#ifndef _SIGUTIL_H
+#define _SIGUTIL_H
+
+int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu);
+int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu);
+int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin);
+int restore_rwin_state(__siginfo_rwin_t __user *rp);
+
+#endif /* _SIGUTIL_H */
diff --git a/arch/sparc/kernel/sigutil_32.c b/arch/sparc/kernel/sigutil_32.c

new file mode 100644 (file)

index 0000000..35c7897
--- /dev/null
+++ b/arch/sparc/kernel/sigutil_32.c
@@ -0,0 +1,120 @@
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/thread_info.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+
+#include <asm/sigcontext.h>
+#include <asm/fpumacro.h>
+#include <asm/ptrace.h>
+
+#include "sigutil.h"
+
+int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+       int err = 0;
+#ifdef CONFIG_SMP
+       if (test_tsk_thread_flag(current, TIF_USEDFPU)) {
+               put_psr(get_psr() | PSR_EF);
+               fpsave(&current->thread.float_regs[0], &current->thread.fsr,
+                      &current->thread.fpqueue[0], &current->thread.fpqdepth);
+               regs->psr &= ~(PSR_EF);
+               clear_tsk_thread_flag(current, TIF_USEDFPU);
+       }
+#else
+       if (current == last_task_used_math) {
+               put_psr(get_psr() | PSR_EF);
+               fpsave(&current->thread.float_regs[0], &current->thread.fsr,
+                      &current->thread.fpqueue[0], &current->thread.fpqdepth);
+               last_task_used_math = NULL;
+               regs->psr &= ~(PSR_EF);
+       }
+#endif
+       err |= __copy_to_user(&fpu->si_float_regs[0],
+                             &current->thread.float_regs[0],
+                             (sizeof(unsigned long) * 32));
+       err |= __put_user(current->thread.fsr, &fpu->si_fsr);
+       err |= __put_user(current->thread.fpqdepth, &fpu->si_fpqdepth);
+       if (current->thread.fpqdepth != 0)
+               err |= __copy_to_user(&fpu->si_fpqueue[0],
+                                     &current->thread.fpqueue[0],
+                                     ((sizeof(unsigned long) +
+                                     (sizeof(unsigned long *)))*16));
+       clear_used_math();
+       return err;
+}
+
+int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+       int err;
+#ifdef CONFIG_SMP
+       if (test_tsk_thread_flag(current, TIF_USEDFPU))
+               regs->psr &= ~PSR_EF;
+#else
+       if (current == last_task_used_math) {
+               last_task_used_math = NULL;
+               regs->psr &= ~PSR_EF;
+       }
+#endif
+       set_used_math();
+       clear_tsk_thread_flag(current, TIF_USEDFPU);
+
+       if (!access_ok(VERIFY_READ, fpu, sizeof(*fpu)))
+               return -EFAULT;
+
+       err = __copy_from_user(&current->thread.float_regs[0], &fpu->si_float_regs[0],
+                              (sizeof(unsigned long) * 32));
+       err |= __get_user(current->thread.fsr, &fpu->si_fsr);
+       err |= __get_user(current->thread.fpqdepth, &fpu->si_fpqdepth);
+       if (current->thread.fpqdepth != 0)
+               err |= __copy_from_user(&current->thread.fpqueue[0],
+                                       &fpu->si_fpqueue[0],
+                                       ((sizeof(unsigned long) +
+                                       (sizeof(unsigned long *)))*16));
+       return err;
+}
+
+int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin)
+{
+       int i, err = __put_user(wsaved, &rwin->wsaved);
+
+       for (i = 0; i < wsaved; i++) {
+               struct reg_window32 *rp;
+               unsigned long fp;
+
+               rp = &current_thread_info()->reg_window[i];
+               fp = current_thread_info()->rwbuf_stkptrs[i];
+               err |= copy_to_user(&rwin->reg_window[i], rp,
+                                   sizeof(struct reg_window32));
+               err |= __put_user(fp, &rwin->rwbuf_stkptrs[i]);
+       }
+       return err;
+}
+
+int restore_rwin_state(__siginfo_rwin_t __user *rp)
+{
+       struct thread_info *t = current_thread_info();
+       int i, wsaved, err;
+
+       __get_user(wsaved, &rp->wsaved);
+       if (wsaved > NSWINS)
+               return -EFAULT;
+
+       err = 0;
+       for (i = 0; i < wsaved; i++) {
+               err |= copy_from_user(&t->reg_window[i],
+                                     &rp->reg_window[i],
+                                     sizeof(struct reg_window32));
+               err |= __get_user(t->rwbuf_stkptrs[i],
+                                 &rp->rwbuf_stkptrs[i]);
+       }
+       if (err)
+               return err;
+
+       t->w_saved = wsaved;
+       synchronize_user_stack();
+       if (t->w_saved)
+               return -EFAULT;
+       return 0;
+
+}
diff --git a/arch/sparc/kernel/sigutil_64.c b/arch/sparc/kernel/sigutil_64.c

new file mode 100644 (file)

index 0000000..e7dc508
--- /dev/null
+++ b/arch/sparc/kernel/sigutil_64.c
@@ -0,0 +1,93 @@
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/thread_info.h>
+#include <linux/uaccess.h>
+
+#include <asm/sigcontext.h>
+#include <asm/fpumacro.h>
+#include <asm/ptrace.h>
+
+#include "sigutil.h"
+
+int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+       unsigned long *fpregs = current_thread_info()->fpregs;
+       unsigned long fprs;
+       int err = 0;
+       
+       fprs = current_thread_info()->fpsaved[0];
+       if (fprs & FPRS_DL)
+               err |= copy_to_user(&fpu->si_float_regs[0], fpregs,
+                                   (sizeof(unsigned int) * 32));
+       if (fprs & FPRS_DU)
+               err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16,
+                                   (sizeof(unsigned int) * 32));
+       err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
+       err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr);
+       err |= __put_user(fprs, &fpu->si_fprs);
+
+       return err;
+}
+
+int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+       unsigned long *fpregs = current_thread_info()->fpregs;
+       unsigned long fprs;
+       int err;
+
+       err = __get_user(fprs, &fpu->si_fprs);
+       fprs_write(0);
+       regs->tstate &= ~TSTATE_PEF;
+       if (fprs & FPRS_DL)
+               err |= copy_from_user(fpregs, &fpu->si_float_regs[0],
+                              (sizeof(unsigned int) * 32));
+       if (fprs & FPRS_DU)
+               err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32],
+                              (sizeof(unsigned int) * 32));
+       err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
+       err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr);
+       current_thread_info()->fpsaved[0] |= fprs;
+       return err;
+}
+
+int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin)
+{
+       int i, err = __put_user(wsaved, &rwin->wsaved);
+
+       for (i = 0; i < wsaved; i++) {
+               struct reg_window *rp = &current_thread_info()->reg_window[i];
+               unsigned long fp = current_thread_info()->rwbuf_stkptrs[i];
+
+               err |= copy_to_user(&rwin->reg_window[i], rp,
+                                   sizeof(struct reg_window));
+               err |= __put_user(fp, &rwin->rwbuf_stkptrs[i]);
+       }
+       return err;
+}
+
+int restore_rwin_state(__siginfo_rwin_t __user *rp)
+{
+       struct thread_info *t = current_thread_info();
+       int i, wsaved, err;
+
+       __get_user(wsaved, &rp->wsaved);
+       if (wsaved > NSWINS)
+               return -EFAULT;
+
+       err = 0;
+       for (i = 0; i < wsaved; i++) {
+               err |= copy_from_user(&t->reg_window[i],
+                                     &rp->reg_window[i],
+                                     sizeof(struct reg_window));
+               err |= __get_user(t->rwbuf_stkptrs[i],
+                                 &rp->rwbuf_stkptrs[i]);
+       }
+       if (err)
+               return err;
+
+       set_thread_wsaved(wsaved);
+       synchronize_user_stack();
+       if (get_thread_wsaved())
+               return -EFAULT;
+       return 0;
+}
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S

index 44e5faf1ad5f47dbee83ff3b00e77f479d907d0d..d97f3eb72e064d70cf1a6dbb9cbd8d8cac4c2d40 100644 (file)
--- a/arch/sparc/kernel/sys32.S
+++ b/arch/sparc/kernel/sys32.S
@@ -81,7 +81,6 @@ SIGN2(sys32_fadvise64, compat_sys_fadvise64, %o0, %o4)
  SIGN2(sys32_fadvise64_64, compat_sys_fadvise64_64, %o0, %o5)
  SIGN2(sys32_bdflush, sys_bdflush, %o0, %o1)
  SIGN1(sys32_mlockall, sys_mlockall, %o0)
-SIGN1(sys32_nfsservctl, compat_sys_nfsservctl, %o0)
  SIGN1(sys32_clock_nanosleep, compat_sys_clock_nanosleep, %o1)
  SIGN1(sys32_timer_settime, compat_sys_timer_settime, %o1)
  SIGN1(sys32_io_submit, compat_sys_io_submit, %o1)
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S

index 6e492d59f6b1d7fa2d9f7974527ad56b5d7cc967..09d8ec454450bcfcbfa3ef7e041305c5fe1a7501 100644 (file)
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -67,7 +67,7 @@ sys_call_table:
  /*235*/        .long sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys_mlockall
  /*240*/        .long sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler
  /*245*/        .long sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, sys_sched_rr_get_interval, sys_nanosleep
-/*250*/        .long sys_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
+/*250*/        .long sys_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_ni_syscall
  /*255*/        .long sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
  /*260*/        .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
  /*265*/        .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S

index f566518483b5bcda9998d635adabc18a18354373..edbec45d46884c9e1d33ab92be65119f0137369d 100644 (file)
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -68,7 +68,7 @@ sys_call_table32:
         .word compat_sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys32_mlockall
  /*240*/        .word sys_munlockall, sys32_sched_setparam, sys32_sched_getparam, sys32_sched_setscheduler, sys32_sched_getscheduler
         .word sys_sched_yield, sys32_sched_get_priority_max, sys32_sched_get_priority_min, sys32_sched_rr_get_interval, compat_sys_nanosleep
-/*250*/        .word sys_mremap, compat_sys_sysctl, sys32_getsid, sys_fdatasync, sys32_nfsservctl
+/*250*/        .word sys_mremap, compat_sys_sysctl, sys32_getsid, sys_fdatasync, sys_nis_syscall
         .word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep
  /*260*/        .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun
         .word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy
@@ -145,7 +145,7 @@ sys_call_table:
         .word sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys_mlockall
  /*240*/        .word sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler
         .word sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, sys_sched_rr_get_interval, sys_nanosleep
-/*250*/        .word sys_64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl
+/*250*/        .word sys_64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nis_syscall
         .word sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
  /*260*/        .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
         .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy
diff --git a/arch/um/Kconfig.x86 b/arch/um/Kconfig.x86

index d31ecf346b4eced920a6e648b50e3eb2c08efb29..21bebe63df6660d08e0bd50ac6b8cb2486b76355 100644 (file)
--- a/arch/um/Kconfig.x86
+++ b/arch/um/Kconfig.x86
@@ -10,6 +10,10 @@ config CMPXCHG_LOCAL
         bool
         default n
  
+config CMPXCHG_DOUBLE
+       bool
+       default n
+
  source "arch/x86/Kconfig.cpu"
  
  endmenu
diff --git a/arch/um/Makefile b/arch/um/Makefile

index fab8121d2b32a2059ea24ac2a70b5da3c221a888..c0f712cc7c5fb1c0a216df717c70daeac0dab138 100644 (file)
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -41,7 +41,7 @@ KBUILD_CPPFLAGS += -I$(srctree)/$(ARCH_DIR)/sys-$(SUBARCH)
  KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ -DSUBARCH=\"$(SUBARCH)\" \
         $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap      \
         -Din6addr_loopback=kernel_in6addr_loopback \
-       -Din6addr_any=kernel_in6addr_any
+       -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr
  
  KBUILD_AFLAGS += $(ARCH_INCLUDE)
  
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c

index d51c404239a8f0d0e7ec5c7c010d4eff8ee43036..364c8a15c4c33a6113a2ff36f526a88089a062e5 100644 (file)
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -399,8 +399,8 @@ int line_setup_irq(int fd, int input, int output, struct line *line, void *data)
   * is done under a spinlock.  Checking whether the device is in use is
   * line->tty->count > 1, also under the spinlock.
   *
- * tty->count serves to decide whether the device should be enabled or
- * disabled on the host.  If it's equal to 1, then we are doing the
+ * line->count serves to decide whether the device should be enabled or
+ * disabled on the host.  If it's equal to 0, then we are doing the
   * first open or last close.  Otherwise, open and close just return.
   */
  
@@ -414,16 +414,16 @@ int line_open(struct line *lines, struct tty_struct *tty)
                 goto out_unlock;
  
         err = 0;
-       if (tty->count > 1)
+       if (line->count++)
                 goto out_unlock;
  
-       spin_unlock(&line->count_lock);
-
+       BUG_ON(tty->driver_data);
         tty->driver_data = line;
         line->tty = tty;
  
+       spin_unlock(&line->count_lock);
         err = enable_chan(line);
-       if (err)
+       if (err) /* line_close() will be called by our caller */
                 return err;
  
         INIT_DELAYED_WORK(&line->task, line_timer_cb);
@@ -436,7 +436,7 @@ int line_open(struct line *lines, struct tty_struct *tty)
         chan_window_size(&line->chan_list, &tty->winsize.ws_row,
                          &tty->winsize.ws_col);
  
-       return err;
+       return 0;
  
  out_unlock:
         spin_unlock(&line->count_lock);
@@ -460,17 +460,16 @@ void line_close(struct tty_struct *tty, struct file * filp)
         flush_buffer(line);
  
         spin_lock(&line->count_lock);
-       if (!line->valid)
-               goto out_unlock;
+       BUG_ON(!line->valid);
  
-       if (tty->count > 1)
+       if (--line->count)
                 goto out_unlock;
  
-       spin_unlock(&line->count_lock);
-
         line->tty = NULL;
         tty->driver_data = NULL;
  
+       spin_unlock(&line->count_lock);
+
         if (line->sigio) {
                 unregister_winch(tty);
                 line->sigio = 0;
@@ -498,7 +497,7 @@ static int setup_one_line(struct line *lines, int n, char *init, int init_prio,
  
         spin_lock(&line->count_lock);
  
-       if (line->tty != NULL) {
+       if (line->count) {
                 *error_out = "Device is already open";
                 goto out;
         }
@@ -722,41 +721,53 @@ struct winch {
         int pid;
         struct tty_struct *tty;
         unsigned long stack;
+       struct work_struct work;
  };
  
-static void free_winch(struct winch *winch, int free_irq_ok)
+static void __free_winch(struct work_struct *work)
  {
-       if (free_irq_ok)
-               free_irq(WINCH_IRQ, winch);
-
-       list_del(&winch->list);
+       struct winch *winch = container_of(work, struct winch, work);
+       free_irq(WINCH_IRQ, winch);
  
         if (winch->pid != -1)
                 os_kill_process(winch->pid, 1);
-       if (winch->fd != -1)
-               os_close_file(winch->fd);
         if (winch->stack != 0)
                 free_stack(winch->stack, 0);
         kfree(winch);
  }
  
+static void free_winch(struct winch *winch)
+{
+       int fd = winch->fd;
+       winch->fd = -1;
+       if (fd != -1)
+               os_close_file(fd);
+       list_del(&winch->list);
+       __free_winch(&winch->work);
+}
+
  static irqreturn_t winch_interrupt(int irq, void *data)
  {
         struct winch *winch = data;
         struct tty_struct *tty;
         struct line *line;
+       int fd = winch->fd;
         int err;
         char c;
  
-       if (winch->fd != -1) {
-               err = generic_read(winch->fd, &c, NULL);
+       if (fd != -1) {
+               err = generic_read(fd, &c, NULL);
                 if (err < 0) {
                         if (err != -EAGAIN) {
+                               winch->fd = -1;
+                               list_del(&winch->list);
+                               os_close_file(fd);
                                 printk(KERN_ERR "winch_interrupt : "
                                        "read failed, errno = %d\n", -err);
                                 printk(KERN_ERR "fd %d is losing SIGWINCH "
                                        "support\n", winch->tty_fd);
-                               free_winch(winch, 0);
+                               INIT_WORK(&winch->work, __free_winch);
+                               schedule_work(&winch->work);
                                 return IRQ_HANDLED;
                         }
                         goto out;
@@ -828,7 +839,7 @@ static void unregister_winch(struct tty_struct *tty)
         list_for_each_safe(ele, next, &winch_handlers) {
                 winch = list_entry(ele, struct winch, list);
                 if (winch->tty == tty) {
-                       free_winch(winch, 1);
+                       free_winch(winch);
                         break;
                 }
         }
@@ -844,7 +855,7 @@ static void winch_cleanup(void)
  
         list_for_each_safe(ele, next, &winch_handlers) {
                 winch = list_entry(ele, struct winch, list);
-               free_winch(winch, 1);
+               free_winch(winch);
         }
  
         spin_unlock(&winch_handler_lock);
diff --git a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c

index 8ac7146c237f69ae9052397767c23140055512f3..2e1de57286045ab9cf606250059a4d094b486ab2 100644 (file)
--- a/arch/um/drivers/xterm.c
+++ b/arch/um/drivers/xterm.c
@@ -123,6 +123,7 @@ static int xterm_open(int input, int output, int primary, void *d,
                 err = -errno;
                 printk(UM_KERN_ERR "xterm_open : unlink failed, errno = %d\n",
                        errno);
+               close(fd);
                 return err;
         }
         close(fd);
diff --git a/arch/um/include/asm/ptrace-generic.h b/arch/um/include/asm/ptrace-generic.h

index ae084ad1a3a0a9aeab8d609de05563a4b5d7fc33..1a7d2757fe0524aeb7878801d0d1c49079755772 100644 (file)
--- a/arch/um/include/asm/ptrace-generic.h
+++ b/arch/um/include/asm/ptrace-generic.h
@@ -42,10 +42,6 @@ extern long subarch_ptrace(struct task_struct *child, long request,
         unsigned long addr, unsigned long data);
  extern unsigned long getreg(struct task_struct *child, int regno);
  extern int putreg(struct task_struct *child, int regno, unsigned long value);
-extern int get_fpregs(struct user_i387_struct __user *buf,
-                     struct task_struct *child);
-extern int set_fpregs(struct user_i387_struct __user *buf,
-                     struct task_struct *child);
  
  extern int arch_copy_tls(struct task_struct *new);
  extern void clear_flushed_tls(struct task_struct *task);
diff --git a/arch/um/include/shared/line.h b/arch/um/include/shared/line.h

index 72f4f25af2478e5c4d14d0a3072a45d10f21a211..63df3ca02ac2fbc5c367afaeeaec26010f8b55c0 100644 (file)
--- a/arch/um/include/shared/line.h
+++ b/arch/um/include/shared/line.h
@@ -33,6 +33,7 @@ struct line_driver {
  struct line {
         struct tty_struct *tty;
         spinlock_t count_lock;
+       unsigned long count;
         int valid;
  
         char *init_str;
diff --git a/arch/um/include/shared/registers.h b/arch/um/include/shared/registers.h

index b0b4589e0ebce804c3a6189da1835620069410cf..f1e0aa56c52a859fb4302791ad2f267d7b0a171c 100644 (file)
--- a/arch/um/include/shared/registers.h
+++ b/arch/um/include/shared/registers.h
@@ -16,7 +16,7 @@ extern int restore_fpx_registers(int pid, unsigned long *fp_regs);
  extern int save_registers(int pid, struct uml_pt_regs *regs);
  extern int restore_registers(int pid, struct uml_pt_regs *regs);
  extern int init_registers(int pid);
-extern void get_safe_registers(unsigned long *regs);
+extern void get_safe_registers(unsigned long *regs, unsigned long *fp_regs);
  extern unsigned long get_thread_reg(int reg, jmp_buf *buf);
  extern int get_fp_registers(int pid, unsigned long *regs);
  extern int put_fp_registers(int pid, unsigned long *regs);
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c

index fab4371184f6e49affeba13f3fc0965ec7f29727..21c1ae7c3d7579fbbd17c66a0029805bebbd6916 100644 (file)
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -202,7 +202,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
                 arch_copy_thread(&current->thread.arch, &p->thread.arch);
         }
         else {
-               get_safe_registers(p->thread.regs.regs.gp);
+               get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp);
                 p->thread.request.u.thread = current->thread.request.u.thread;
                 handler = new_thread_handler;
         }
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c

index 701b672c11225aaf6ffcdd9fe9485bccacbd4726..c9da32b0c707015c19db39b49785cd8b0610a35a 100644 (file)
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -50,23 +50,11 @@ long arch_ptrace(struct task_struct *child, long request,
         void __user *vp = p;
  
         switch (request) {
-       /* read word at location addr. */
-       case PTRACE_PEEKTEXT:
-       case PTRACE_PEEKDATA:
-               ret = generic_ptrace_peekdata(child, addr, data);
-               break;
-
         /* read the word at location addr in the USER area. */
         case PTRACE_PEEKUSR:
                 ret = peek_user(child, addr, data);
                 break;
  
-       /* write the word at location addr. */
-       case PTRACE_POKETEXT:
-       case PTRACE_POKEDATA:
-               ret = generic_ptrace_pokedata(child, addr, data);
-               break;
-
         /* write the word at location addr in the USER area */
         case PTRACE_POKEUSR:
                 ret = poke_user(child, addr, data);
@@ -106,16 +94,6 @@ long arch_ptrace(struct task_struct *child, long request,
                 ret = 0;
                 break;
         }
-#endif
-#ifdef PTRACE_GETFPREGS
-       case PTRACE_GETFPREGS: /* Get the child FPU state. */
-               ret = get_fpregs(vp, child);
-               break;
-#endif
-#ifdef PTRACE_SETFPREGS
-       case PTRACE_SETFPREGS: /* Set the child FPU state. */
-               ret = set_fpregs(vp, child);
-               break;
  #endif
         case PTRACE_GET_THREAD_AREA:
                 ret = ptrace_get_thread_area(child, addr, vp);
@@ -153,12 +131,6 @@ long arch_ptrace(struct task_struct *child, long request,
                 ret = -EIO;
                 break;
         }
-#endif
-#ifdef PTRACE_ARCH_PRCTL
-       case PTRACE_ARCH_PRCTL:
-               /* XXX Calls ptrace on the host - needs some SMP thinking */
-               ret = arch_prctl(child, data, (void __user *) addr);
-               break;
  #endif
         default:
                 ret = ptrace_request(child, request, addr, data);
diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c

index 830fe6a1518ae71dfa4deef46727b6fd367fbb18..b866b9e3bef9ddf212ae4e0f74d489ab66f88827 100644 (file)
--- a/arch/um/os-Linux/registers.c
+++ b/arch/um/os-Linux/registers.c
@@ -8,6 +8,8 @@
  #include <string.h>
  #include <sys/ptrace.h>
  #include "sysdep/ptrace.h"
+#include "sysdep/ptrace_user.h"
+#include "registers.h"
  
  int save_registers(int pid, struct uml_pt_regs *regs)
  {
@@ -32,6 +34,7 @@ int restore_registers(int pid, struct uml_pt_regs *regs)
  /* This is set once at boot time and not changed thereafter */
  
  static unsigned long exec_regs[MAX_REG_NR];
+static unsigned long exec_fp_regs[FP_SIZE];
  
  int init_registers(int pid)
  {
@@ -42,10 +45,14 @@ int init_registers(int pid)
                 return -errno;
  
         arch_init_registers(pid);
+       get_fp_registers(pid, exec_fp_regs);
         return 0;
  }
  
-void get_safe_registers(unsigned long *regs)
+void get_safe_registers(unsigned long *regs, unsigned long *fp_regs)
  {
         memcpy(regs, exec_regs, sizeof(exec_regs));
+
+       if (fp_regs)
+               memcpy(fp_regs, exec_fp_regs, sizeof(exec_fp_regs));
  }
diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c

index d261f170d120862010cbea1facfe6d4bcc8ab69d..e771398be5f3fe97cf54fee51e5408d63ccaeea8 100644 (file)
--- a/arch/um/os-Linux/skas/mem.c
+++ b/arch/um/os-Linux/skas/mem.c
@@ -39,7 +39,7 @@ static unsigned long syscall_regs[MAX_REG_NR];
  
  static int __init init_syscall_regs(void)
  {
-       get_safe_registers(syscall_regs);
+       get_safe_registers(syscall_regs, NULL);
         syscall_regs[REGS_IP_INDEX] = STUB_CODE +
                 ((unsigned long) &batch_syscall_stub -
                  (unsigned long) &__syscall_stub_start);
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c

index d6e0a2234b869267e7bfcdf15170716086a55154..dee0e8cf8ad0c32e2a0f6315751fd96f3a0caf5a 100644 (file)
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -373,6 +373,9 @@ void userspace(struct uml_pt_regs *regs)
                 if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp))
                         fatal_sigsegv();
  
+               if (put_fp_registers(pid, regs->fp))
+                       fatal_sigsegv();
+
                 /* Now we set local_using_sysemu to be used for one loop */
                 local_using_sysemu = get_using_sysemu();
  
@@ -399,6 +402,12 @@ void userspace(struct uml_pt_regs *regs)
                         fatal_sigsegv();
                 }
  
+               if (get_fp_registers(pid, regs->fp)) {
+                       printk(UM_KERN_ERR "userspace -  get_fp_registers failed, "
+                              "errno = %d\n", errno);
+                       fatal_sigsegv();
+               }
+
                 UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */
  
                 if (WIFSTOPPED(status)) {
@@ -457,10 +466,11 @@ void userspace(struct uml_pt_regs *regs)
  }
  
  static unsigned long thread_regs[MAX_REG_NR];
+static unsigned long thread_fp_regs[FP_SIZE];
  
  static int __init init_thread_regs(void)
  {
-       get_safe_registers(thread_regs);
+       get_safe_registers(thread_regs, thread_fp_regs);
         /* Set parent's instruction pointer to start of clone-stub */
         thread_regs[REGS_IP_INDEX] = STUB_CODE +
                                 (unsigned long) stub_clone_handler -
@@ -503,6 +513,13 @@ int copy_context_skas0(unsigned long new_stack, int pid)
                 return err;
         }
  
+       err = put_fp_registers(pid, thread_fp_regs);
+       if (err < 0) {
+               printk(UM_KERN_ERR "copy_context_skas0 : put_fp_registers "
+                      "failed, pid = %d, err = %d\n", pid, err);
+               return err;
+       }
+
         /* set a well known return code for detection of child write failure */
         child_data->err = 12345678;
  
diff --git a/arch/um/sys-i386/asm/ptrace.h b/arch/um/sys-i386/asm/ptrace.h

index 0273e4d09af7b65e47c5dab30936f5d1acd69643..5d2a59112537b17fff28cb627747f5d33f48abb0 100644 (file)
--- a/arch/um/sys-i386/asm/ptrace.h
+++ b/arch/um/sys-i386/asm/ptrace.h
@@ -42,11 +42,6 @@
   */
  struct user_desc;
  
-extern int get_fpxregs(struct user_fxsr_struct __user *buf,
-                      struct task_struct *child);
-extern int set_fpxregs(struct user_fxsr_struct __user *buf,
-                      struct task_struct *tsk);
-
  extern int ptrace_get_thread_area(struct task_struct *child, int idx,
                                    struct user_desc __user *user_desc);
  
diff --git a/arch/um/sys-i386/ptrace.c b/arch/um/sys-i386/ptrace.c

index d23b2d3ea3841cff23de5ed43edbcc2b1fe68375..3375c271785157e392ad18147c90d1a61eb74f1f 100644 (file)
--- a/arch/um/sys-i386/ptrace.c
+++ b/arch/um/sys-i386/ptrace.c
@@ -145,7 +145,7 @@ int peek_user(struct task_struct *child, long addr, long data)
         return put_user(tmp, (unsigned long __user *) data);
  }
  
-int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
+static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
  {
         int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
         struct user_i387_struct fpregs;
@@ -161,7 +161,7 @@ int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
         return n;
  }
  
-int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
+static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
  {
         int n, cpu = ((struct thread_info *) child->stack)->cpu;
         struct user_i387_struct fpregs;
@@ -174,7 +174,7 @@ int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
                                     (unsigned long *) &fpregs);
  }
  
-int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
+static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
  {
         int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
         struct user_fxsr_struct fpregs;
@@ -190,7 +190,7 @@ int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
         return n;
  }
  
-int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
+static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
  {
         int n, cpu = ((struct thread_info *) child->stack)->cpu;
         struct user_fxsr_struct fpregs;
@@ -206,5 +206,23 @@ int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
  long subarch_ptrace(struct task_struct *child, long request,
                     unsigned long addr, unsigned long data)
  {
-       return -EIO;
+       int ret = -EIO;
+       void __user *datap = (void __user *) data;
+       switch (request) {
+       case PTRACE_GETFPREGS: /* Get the child FPU state. */
+               ret = get_fpregs(datap, child);
+               break;
+       case PTRACE_SETFPREGS: /* Set the child FPU state. */
+               ret = set_fpregs(datap, child);
+               break;
+       case PTRACE_GETFPXREGS: /* Get the child FPU state. */
+               ret = get_fpxregs(datap, child);
+               break;
+       case PTRACE_SETFPXREGS: /* Set the child FPU state. */
+               ret = set_fpxregs(datap, child);
+               break;
+       default:
+               ret = -EIO;
+       }
+       return ret;
  }
diff --git a/arch/um/sys-i386/shared/sysdep/ptrace.h b/arch/um/sys-i386/shared/sysdep/ptrace.h

index d50e62e070707ce6ca88b9e8f450ac5eec61d98e..c398a5076111792a6618d40b4b87fd75c3981e6c 100644 (file)
--- a/arch/um/sys-i386/shared/sysdep/ptrace.h
+++ b/arch/um/sys-i386/shared/sysdep/ptrace.h
@@ -53,6 +53,7 @@ extern int sysemu_supported;
  
  struct uml_pt_regs {
         unsigned long gp[MAX_REG_NR];
+       unsigned long fp[HOST_FPX_SIZE];
         struct faultinfo faultinfo;
         long syscall;
         int is_user;
diff --git a/arch/um/sys-x86_64/ptrace.c b/arch/um/sys-x86_64/ptrace.c

index f43613643cdb71bf3100065830471965cc246541..4005506834fddb42002f59a5f46f69bdef0ed6df 100644 (file)
--- a/arch/um/sys-x86_64/ptrace.c
+++ b/arch/um/sys-x86_64/ptrace.c
@@ -145,7 +145,7 @@ int is_syscall(unsigned long addr)
         return instr == 0x050f;
  }
  
-int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
+static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
  {
         int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
         long fpregs[HOST_FP_SIZE];
@@ -162,7 +162,7 @@ int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
         return n;
  }
  
-int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
+static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
  {
         int n, cpu = ((struct thread_info *) child->stack)->cpu;
         long fpregs[HOST_FP_SIZE];
@@ -182,12 +182,16 @@ long subarch_ptrace(struct task_struct *child, long request,
         void __user *datap = (void __user *) data;
  
         switch (request) {
-       case PTRACE_GETFPXREGS: /* Get the child FPU state. */
+       case PTRACE_GETFPREGS: /* Get the child FPU state. */
                 ret = get_fpregs(datap, child);
                 break;
-       case PTRACE_SETFPXREGS: /* Set the child FPU state. */
+       case PTRACE_SETFPREGS: /* Set the child FPU state. */
                 ret = set_fpregs(datap, child);
                 break;
+       case PTRACE_ARCH_PRCTL:
+               /* XXX Calls ptrace on the host - needs some SMP thinking */
+               ret = arch_prctl(child, data, (void __user *) addr);
+               break;
         }
  
         return ret;
diff --git a/arch/um/sys-x86_64/shared/sysdep/ptrace.h b/arch/um/sys-x86_64/shared/sysdep/ptrace.h

index fdba5457947a0cc7f16cfd34d30a63bb05120e0d..8ee8f8e12af167b805fab8c86242c2fd8cb35ef9 100644 (file)
--- a/arch/um/sys-x86_64/shared/sysdep/ptrace.h
+++ b/arch/um/sys-x86_64/shared/sysdep/ptrace.h
@@ -85,6 +85,7 @@
  
  struct uml_pt_regs {
         unsigned long gp[MAX_REG_NR];
+       unsigned long fp[HOST_FP_SIZE];
         struct faultinfo faultinfo;
         long syscall;
         int is_user;
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S

index a0e866d233eeb833ef9740e1247962d298cc5971..54edb207ff3a0cb181811219cdc46500c8ba66d5 100644 (file)
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -672,7 +672,7 @@ ia32_sys_call_table:
         .quad sys32_vm86_warning        /* vm86 */ 
         .quad quiet_ni_syscall  /* query_module */
         .quad sys_poll
-       .quad compat_sys_nfsservctl
+       .quad quiet_ni_syscall /* old nfsservctl */
         .quad sys_setresgid16   /* 170 */
         .quad sys_getresgid16
         .quad sys_prctl
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h

index 4554cc6fb96afec6625814ba2fbd983eb0f7f00e..091508b533b46810603bb3dfa86a207d24e1e9c0 100644 (file)
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -16,7 +16,6 @@
  #endif
  
  .macro altinstruction_entry orig alt feature orig_len alt_len
-       .align 8
         .long \orig - .
         .long \alt - .
         .word \feature
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h

index 23fb6d79f2094356a5f471d321d883eb5a9c68cf..37ad100a2210e842212fdcffd895370895a87512 100644 (file)
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -48,9 +48,6 @@ struct alt_instr {
         u16 cpuid;              /* cpuid bit set for replacement */
         u8  instrlen;           /* length of original instruction */
         u8  replacementlen;     /* length of new instruction, <= instrlen */
-#ifdef CONFIG_X86_64
-       u32 pad2;
-#endif
  };
  
  extern void alternative_instructions(void);
@@ -83,7 +80,6 @@ static inline int alternatives_text_reserved(void *start, void *end)
                                                                         \
        "661:\n\t" oldinstr "\n662:\n"                                   \
        ".section .altinstructions,\"a\"\n"                              \
-      _ASM_ALIGN "\n"                                                  \
        "         .long 661b - .\n"                      /* label           */   \
        "         .long 663f - .\n"                      /* new instruction */   \
        "         .word " __stringify(feature) "\n"      /* feature bit     */   \
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h

index 4258aac99a6e8b6493164929c3c75173cb2941c6..88b23a43f34037ac840e1e44acf6a29cb7dc5861 100644 (file)
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -332,7 +332,6 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
                 asm goto("1: jmp %l[t_no]\n"
                          "2:\n"
                          ".section .altinstructions,\"a\"\n"
-                        _ASM_ALIGN "\n"
                          " .long 1b - .\n"
                          " .long 0\n"           /* no replacement */
                          " .word %P0\n"         /* feature bit */
@@ -350,7 +349,6 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
                 asm volatile("1: movb $0,%0\n"
                              "2:\n"
                              ".section .altinstructions,\"a\"\n"
-                            _ASM_ALIGN "\n"
                              " .long 1b - .\n"
                              " .long 3f - .\n"
                              " .word %P1\n"             /* feature bit */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h

index 7b439d9aea2a5c30a22f1f3b8ba17db7f4011a39..41935fadfdfcca18be2ca724b34e1c45d3116ac1 100644 (file)
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -27,8 +27,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
  
         desc->base2             = (info->base_addr & 0xff000000) >> 24;
         /*
-        * Don't allow setting of the lm bit. It is useless anyway
-        * because 64bit system calls require __USER_CS:
+        * Don't allow setting of the lm bit. It would confuse
+        * user_64bit_mode and would get overridden by sysret anyway.
          */
         desc->l                 = 0;
  }
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h

index f9a320984a105e2c7a82bfd61d4ab76aadee8141..7e50f06393aae8869eb5d5e0ab7cf00de1a911d9 100644 (file)
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -17,7 +17,6 @@
   *  Vectors   0 ...  31 : system traps and exceptions - hardcoded events
   *  Vectors  32 ... 127 : device interrupts
   *  Vector  128         : legacy int80 syscall interface
- *  Vector  204         : legacy x86_64 vsyscall emulation
   *  Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts
   *  Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
   *
@@ -51,9 +50,6 @@
  #ifdef CONFIG_X86_32
  # define SYSCALL_VECTOR                        0x80
  #endif
-#ifdef CONFIG_X86_64
-# define VSYSCALL_EMU_VECTOR           0xcc
-#endif
  
  /*
   * Vectors 0x30-0x3f are used for ISA interrupts.
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h

index 2c76521631112153c3c40e96fcf4275c1c50abe9..8e8b9a4987ee0225d0a7e38504436f53f4c81d30 100644 (file)
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -41,6 +41,7 @@
  
  #include <asm/desc_defs.h>
  #include <asm/kmap_types.h>
+#include <asm/pgtable_types.h>
  
  struct page;
  struct thread_struct;
@@ -63,6 +64,11 @@ struct paravirt_callee_save {
  struct pv_info {
         unsigned int kernel_rpl;
         int shared_kernel_pmd;
+
+#ifdef CONFIG_X86_64
+       u16 extra_user_64bit_cs;  /* __USER_CS if none */
+#endif
+
         int paravirt_enabled;
         const char *name;
  };
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h

index 94e7618fcac8d6fb8bd61c1b356b522187b1218a..35664547125b40f663d09c3e556f7699e630586b 100644 (file)
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -131,6 +131,9 @@ struct pt_regs {
  #ifdef __KERNEL__
  
  #include <linux/init.h>
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt_types.h>
+#endif
  
  struct cpuinfo_x86;
  struct task_struct;
@@ -187,6 +190,22 @@ static inline int v8086_mode(struct pt_regs *regs)
  #endif
  }
  
+#ifdef CONFIG_X86_64
+static inline bool user_64bit_mode(struct pt_regs *regs)
+{
+#ifndef CONFIG_PARAVIRT
+       /*
+        * On non-paravirt systems, this is the only long mode CPL 3
+        * selector.  We do not allow long mode selectors in the LDT.
+        */
+       return regs->cs == __USER_CS;
+#else
+       /* Headers are too twisted for this to go in paravirt.h. */
+       return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
+#endif
+}
+#endif
+
  /*
   * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
   * when it traps.  The previous stack will be directly underneath the saved
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h

index a518c0a4504465e6ac46068ba6d34a68d66a6fd3..c59cc97fe6c1478225863d0d814bece995652f5a 100644 (file)
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -44,7 +44,7 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
                 : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
  #elif defined(__x86_64__)
         __asm__ (
-               "mul %[mul_frac] ; shrd $32, %[hi], %[lo]"
+               "mulq %[mul_frac] ; shrd $32, %[hi], %[lo]"
                 : [lo]"=a"(product),
                   [hi]"=d"(tmp)
                 : "0"(delta),
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h

index 2bae0a513b40ebbcfd23a621c82a0365a4cb5062..0012d0902c5f1c38dca380413b1f79becb9ad204 100644 (file)
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -40,7 +40,6 @@ asmlinkage void alignment_check(void);
  asmlinkage void machine_check(void);
  #endif /* CONFIG_X86_MCE */
  asmlinkage void simd_coprocessor_error(void);
-asmlinkage void emulate_vsyscall(void);
  
  dotraplinkage void do_divide_error(struct pt_regs *, long);
  dotraplinkage void do_debug(struct pt_regs *, long);
@@ -67,7 +66,6 @@ dotraplinkage void do_alignment_check(struct pt_regs *, long);
  dotraplinkage void do_machine_check(struct pt_regs *, long);
  #endif
  dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long);
-dotraplinkage void do_emulate_vsyscall(struct pt_regs *, long);
  #ifdef CONFIG_X86_32
  dotraplinkage void do_iret_error(struct pt_regs *, long);
  #endif
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h

index 705bf139288c1f8014b3b9d9389d01f606740153..2010405734442f296432c047e2bbedf70b9614a5 100644 (file)
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -414,7 +414,7 @@ __SYSCALL(__NR_query_module, sys_ni_syscall)
  __SYSCALL(__NR_quotactl, sys_quotactl)
  
  #define __NR_nfsservctl                                180
-__SYSCALL(__NR_nfsservctl, sys_nfsservctl)
+__SYSCALL(__NR_nfsservctl, sys_ni_syscall)
  
  /* reserved for LiS/STREAMS */
  #define __NR_getpmsg                           181
@@ -681,6 +681,8 @@ __SYSCALL(__NR_syncfs, sys_syncfs)
  __SYSCALL(__NR_sendmmsg, sys_sendmmsg)
  #define __NR_setns                             308
  __SYSCALL(__NR_setns, sys_setns)
+#define __NR_getcpu                            309
+__SYSCALL(__NR_getcpu, sys_getcpu)
  
  #ifndef __NO_STUBS
  #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h

index 60107072c28b71d646507a455e82c0657af24c9f..eaea1d31f753092cd19ff6cf126f6c7c0f7cedb7 100644 (file)
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -27,6 +27,12 @@ extern struct timezone sys_tz;
  
  extern void map_vsyscall(void);
  
+/*
+ * Called on instruction fetch fault in vsyscall page.
+ * Returns true if handled.
+ */
+extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
+
  #endif /* __KERNEL__ */
  
  #endif /* _ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h

index 64a619d47d341739ff34b5236e6bf85db0a79b06..7ff4669580cfc8bd0db462d3d0c07cf94cfddca0 100644 (file)
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -39,7 +39,7 @@ typedef struct xpaddr {
      ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
  
  extern unsigned long *machine_to_phys_mapping;
-extern unsigned int   machine_to_phys_order;
+extern unsigned long  machine_to_phys_nr;
  
  extern unsigned long get_phys_to_machine(unsigned long pfn);
  extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
@@ -87,7 +87,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
         if (xen_feature(XENFEAT_auto_translated_physmap))
                 return mfn;
  
-       if (unlikely((mfn >> machine_to_phys_order) != 0)) {
+       if (unlikely(mfn >= machine_to_phys_nr)) {
                 pfn = ~0;
                 goto try_override;
         }
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile

index 04105574c8e9b9bc306b275469252febebdb6b45..82f2912155a5194f06d0b23c21bc9c9c090406a2 100644 (file)
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -17,19 +17,6 @@ CFLAGS_REMOVE_ftrace.o = -pg
  CFLAGS_REMOVE_early_printk.o = -pg
  endif
  
-#
-# vsyscalls (which work on the user stack) should have
-# no stack-protector checks:
-#
-nostackp := $(call cc-option, -fno-stack-protector)
-CFLAGS_vsyscall_64.o   := $(PROFILING) -g0 $(nostackp)
-CFLAGS_hpet.o          := $(nostackp)
-CFLAGS_paravirt.o      := $(nostackp)
-GCOV_PROFILE_vsyscall_64.o     := n
-GCOV_PROFILE_hpet.o            := n
-GCOV_PROFILE_tsc.o             := n
-GCOV_PROFILE_paravirt.o                := n
-
  obj-y                  := process_$(BITS).o signal.o entry_$(BITS).o
  obj-y                  += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
  obj-y                  += time.o ioport.o ldt.o dumpstack.o
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c

index adc66c3a1fef2417be8741d334d5f8460c23ff10..34b18594e72467212f0e430c329e9fa523f3aa09 100644 (file)
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -207,7 +207,6 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri
             ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
             APIC_DM_INIT;
         uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
-       mdelay(10);
  
         val = (1UL << UVH_IPI_INT_SEND_SHFT) |
             (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c

index 08119a37e53c1f11b044ce5c865c7ad9339c4fd0..6b96110bb0c33078bdc05de2b727fc5bcb03eb71 100644 (file)
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -149,7 +149,6 @@ struct set_mtrr_data {
   */
  static int mtrr_rendezvous_handler(void *info)
  {
-#ifdef CONFIG_SMP
         struct set_mtrr_data *data = info;
  
         /*
@@ -171,7 +170,6 @@ static int mtrr_rendezvous_handler(void *info)
         } else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {
                 mtrr_if->set_all();
         }
-#endif
         return 0;
  }
  
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c

index 4ee3abf20ed6118a45e0e6068962f4c181eeecaf..cfa62ec090ece1dfb48420c5641cdb52b58feffa 100644 (file)
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1900,6 +1900,9 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
  
         perf_callchain_store(entry, regs->ip);
  
+       if (!current->mm)
+               return;
+
         if (perf_callchain_user32(regs, entry))
                 return;
  
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c

index 45fbb8f7f549e1cd447228731df4c8d952c4b6e3..f88af2c2a561f2f5e680ba6eabe545a217ae4316 100644 (file)
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1590,6 +1590,7 @@ static __init int intel_pmu_init(void)
                 break;
  
         case 42: /* SandyBridge */
+       case 45: /* SandyBridge, "Romely-EP" */
                 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
  
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S

index 5c1a91974918d1b6104c9068ed4eef41ff6e30ab..f3f6f5344001ee47b17eb0ff029179751dfd5171 100644 (file)
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -54,6 +54,7 @@
  #include <asm/ftrace.h>
  #include <asm/irq_vectors.h>
  #include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
  
  /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
  #include <linux/elf-em.h>
@@ -873,12 +874,7 @@ ENTRY(simd_coprocessor_error)
  661:   pushl_cfi $do_general_protection
  662:
  .section .altinstructions,"a"
-       .balign 4
-       .long 661b
-       .long 663f
-       .word X86_FEATURE_XMM
-       .byte 662b-661b
-       .byte 664f-663f
+       altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f
  .previous
  .section .altinstr_replacement,"ax"
  663:   pushl $do_simd_coprocessor_error
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S

index e13329d800c8549746c0958c23276c2929a38382..6419bb05ecd56e653033a1752410625b36440000 100644 (file)
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1111,7 +1111,6 @@ zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
  zeroentry coprocessor_error do_coprocessor_error
  errorentry alignment_check do_alignment_check
  zeroentry simd_coprocessor_error do_simd_coprocessor_error
-zeroentry emulate_vsyscall do_emulate_vsyscall
  
  
         /* Reload gs selector with exception handling */
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c

index 613a7931ecc180182ff3c2b7470b045c6889e664..d90272e6bc40bc75f34dfbcf158ffce3769203ab 100644 (file)
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -307,6 +307,10 @@ struct pv_info pv_info = {
         .paravirt_enabled = 0,
         .kernel_rpl = 0,
         .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
+
+#ifdef CONFIG_X86_64
+       .extra_user_64bit_cs = __USER_CS,
+#endif
  };
  
  struct pv_init_ops pv_init_ops = {
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c

index 7977f0cfe339076a713ae773bbb221d9cdf406e2..c346d116148866758ca5e602684eb91fcb77f219 100644 (file)
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -74,7 +74,7 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
  
  #ifdef CONFIG_X86_64
                 case 0x40 ... 0x4f:
-                       if (regs->cs != __USER_CS)
+                       if (!user_64bit_mode(regs))
                                 /* 32-bit mode: register increment */
                                 return 0;
                         /* 64-bit mode: REX prefix */
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S

index fbb0a045a1a23bc9bdc2f9dd23c6c9673e2e13f7..bc19be332bc99544ccbe426975b8b0cdb136f293 100644 (file)
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -168,7 +168,7 @@ ENTRY(sys_call_table)
         .long ptregs_vm86
         .long sys_ni_syscall    /* Old sys_query_module */
         .long sys_poll
-       .long sys_nfsservctl
+       .long sys_ni_syscall    /* Old nfsservctl */
         .long sys_setresgid16   /* 170 */
         .long sys_getresgid16
         .long sys_prctl
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c

index 9682ec50180c1b732996773bfa4feafa18c6c116..6913369c234c93db814848b264464bc1452572e3 100644 (file)
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -872,12 +872,6 @@ void __init trap_init(void)
         set_bit(SYSCALL_VECTOR, used_vectors);
  #endif
  
-#ifdef CONFIG_X86_64
-       BUG_ON(test_bit(VSYSCALL_EMU_VECTOR, used_vectors));
-       set_system_intr_gate(VSYSCALL_EMU_VECTOR, &emulate_vsyscall);
-       set_bit(VSYSCALL_EMU_VECTOR, used_vectors);
-#endif
-
         /*
          * Should be a barrier for any external CPU state:
          */
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S

index 4aa9c54a9b76e3becf1c9e88fc5b547408e46450..0f703f10901a96d6b2d24e9f93559d62bcc2f63a 100644 (file)
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -71,7 +71,6 @@ PHDRS {
         text PT_LOAD FLAGS(5);          /* R_E */
         data PT_LOAD FLAGS(6);          /* RW_ */
  #ifdef CONFIG_X86_64
-       user PT_LOAD FLAGS(5);          /* R_E */
  #ifdef CONFIG_SMP
         percpu PT_LOAD FLAGS(6);        /* RW_ */
  #endif
@@ -154,44 +153,16 @@ SECTIONS
  
  #ifdef CONFIG_X86_64
  
-#define VSYSCALL_ADDR (-10*1024*1024)
-
-#define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET)
-#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
-
-#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
-#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
-
-       . = ALIGN(4096);
-       __vsyscall_0 = .;
-
-       . = VSYSCALL_ADDR;
-       .vsyscall : AT(VLOAD(.vsyscall)) {
-               *(.vsyscall_0)
-
-               . = 1024;
-               *(.vsyscall_1)
-
-               . = 2048;
-               *(.vsyscall_2)
-
-               . = 4096;  /* Pad the whole page. */
-       } :user =0xcc
-       . = ALIGN(__vsyscall_0 + PAGE_SIZE, PAGE_SIZE);
-
-#undef VSYSCALL_ADDR
-#undef VLOAD_OFFSET
-#undef VLOAD
-#undef VVIRT_OFFSET
-#undef VVIRT
-
+       . = ALIGN(PAGE_SIZE);
         __vvar_page = .;
  
         .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) {
+               /* work around gold bug 13023 */
+               __vvar_beginning_hack = .;
  
-             /* Place all vvars at the offsets in asm/vvar.h. */
-#define EMIT_VVAR(name, offset)                \
-               . = offset;             \
+               /* Place all vvars at the offsets in asm/vvar.h. */
+#define EMIT_VVAR(name, offset)                        \
+               . = __vvar_beginning_hack + offset;     \
                 *(.vvar_ ## name)
  #define __VVAR_KERNEL_LDS
  #include <asm/vvar.h>
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c

index dda7dff9cef7e624be6239ca463789e09eedd200..18ae83dd1cd7379ad79d0f9755fd6a0f8f369995 100644 (file)
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -18,9 +18,6 @@
   *  use the vDSO.
   */
  
-/* Disable profiling for userspace code: */
-#define DISABLE_BRANCH_PROFILING
-
  #include <linux/time.h>
  #include <linux/init.h>
  #include <linux/kernel.h>
@@ -50,12 +47,36 @@
  #include <asm/vgtod.h>
  #include <asm/traps.h>
  
+#define CREATE_TRACE_POINTS
+#include "vsyscall_trace.h"
+
  DEFINE_VVAR(int, vgetcpu_mode);
  DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
  {
         .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
  };
  
+static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
+
+static int __init vsyscall_setup(char *str)
+{
+       if (str) {
+               if (!strcmp("emulate", str))
+                       vsyscall_mode = EMULATE;
+               else if (!strcmp("native", str))
+                       vsyscall_mode = NATIVE;
+               else if (!strcmp("none", str))
+                       vsyscall_mode = NONE;
+               else
+                       return -EINVAL;
+
+               return 0;
+       }
+
+       return -EINVAL;
+}
+early_param("vsyscall", vsyscall_setup);
+
  void update_vsyscall_tz(void)
  {
         unsigned long flags;
@@ -100,7 +121,7 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
  
         printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
                level, tsk->comm, task_pid_nr(tsk),
-              message, regs->ip - 2, regs->cs,
+              message, regs->ip, regs->cs,
                regs->sp, regs->ax, regs->si, regs->di);
  }
  
@@ -118,46 +139,39 @@ static int addr_to_vsyscall_nr(unsigned long addr)
         return nr;
  }
  
-void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
+bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
  {
         struct task_struct *tsk;
         unsigned long caller;
         int vsyscall_nr;
         long ret;
  
-       local_irq_enable();
-
         /*
-        * Real 64-bit user mode code has cs == __USER_CS.  Anything else
-        * is bogus.
+        * No point in checking CS -- the only way to get here is a user mode
+        * trap to a high address, which means that we're in 64-bit user code.
          */
-       if (regs->cs != __USER_CS) {
-               /*
-                * If we trapped from kernel mode, we might as well OOPS now
-                * instead of returning to some random address and OOPSing
-                * then.
-                */
-               BUG_ON(!user_mode(regs));
  
-               /* Compat mode and non-compat 32-bit CS should both segfault. */
-               warn_bad_vsyscall(KERN_WARNING, regs,
-                                 "illegal int 0xcc from 32-bit mode");
-               goto sigsegv;
+       WARN_ON_ONCE(address != regs->ip);
+
+       if (vsyscall_mode == NONE) {
+               warn_bad_vsyscall(KERN_INFO, regs,
+                                 "vsyscall attempted with vsyscall=none");
+               return false;
         }
  
-       /*
-        * x86-ism here: regs->ip points to the instruction after the int 0xcc,
-        * and int 0xcc is two bytes long.
-        */
-       vsyscall_nr = addr_to_vsyscall_nr(regs->ip - 2);
+       vsyscall_nr = addr_to_vsyscall_nr(address);
+
+       trace_emulate_vsyscall(vsyscall_nr);
+
         if (vsyscall_nr < 0) {
                 warn_bad_vsyscall(KERN_WARNING, regs,
-                                 "illegal int 0xcc (exploit attempt?)");
+                                 "misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround");
                 goto sigsegv;
         }
  
         if (get_user(caller, (unsigned long __user *)regs->sp) != 0) {
-               warn_bad_vsyscall(KERN_WARNING, regs, "int 0xcc with bad stack (exploit attempt?)");
+               warn_bad_vsyscall(KERN_WARNING, regs,
+                                 "vsyscall with bad stack (exploit attempt?)");
                 goto sigsegv;
         }
  
@@ -202,13 +216,11 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
         regs->ip = caller;
         regs->sp += 8;
  
-       local_irq_disable();
-       return;
+       return true;
  
  sigsegv:
-       regs->ip -= 2;  /* The faulting instruction should be the int 0xcc. */
         force_sig(SIGSEGV, current);
-       local_irq_disable();
+       return true;
  }
  
  /*
@@ -256,15 +268,21 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
  
  void __init map_vsyscall(void)
  {
-       extern char __vsyscall_0;
-       unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
+       extern char __vsyscall_page;
+       unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
         extern char __vvar_page;
         unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page);
  
-       /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */
-       __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
+       __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall,
+                    vsyscall_mode == NATIVE
+                    ? PAGE_KERNEL_VSYSCALL
+                    : PAGE_KERNEL_VVAR);
+       BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_FIRST_PAGE) !=
+                    (unsigned long)VSYSCALL_START);
+
         __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR);
-       BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != (unsigned long)VVAR_ADDRESS);
+       BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) !=
+                    (unsigned long)VVAR_ADDRESS);
  }
  
  static int __init vsyscall_init(void)
diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/kernel/vsyscall_emu_64.S

index ffa845eae5ca5f407410e342a4e67c6f98e4a2cb..c9596a9af15985c7f668bd18fc3f8547a8bcec31 100644 (file)
--- a/arch/x86/kernel/vsyscall_emu_64.S
+++ b/arch/x86/kernel/vsyscall_emu_64.S
@@ -7,21 +7,31 @@
   */
  
  #include <linux/linkage.h>
+
  #include <asm/irq_vectors.h>
+#include <asm/page_types.h>
+#include <asm/unistd_64.h>
+
+__PAGE_ALIGNED_DATA
+       .globl __vsyscall_page
+       .balign PAGE_SIZE, 0xcc
+       .type __vsyscall_page, @object
+__vsyscall_page:
+
+       mov $__NR_gettimeofday, %rax
+       syscall
+       ret
  
-/* The unused parts of the page are filled with 0xcc by the linker script. */
+       .balign 1024, 0xcc
+       mov $__NR_time, %rax
+       syscall
+       ret
  
-.section .vsyscall_0, "a"
-ENTRY(vsyscall_0)
-       int $VSYSCALL_EMU_VECTOR
-END(vsyscall_0)
+       .balign 1024, 0xcc
+       mov $__NR_getcpu, %rax
+       syscall
+       ret
  
-.section .vsyscall_1, "a"
-ENTRY(vsyscall_1)
-       int $VSYSCALL_EMU_VECTOR
-END(vsyscall_1)
+       .balign 4096, 0xcc
  
-.section .vsyscall_2, "a"
-ENTRY(vsyscall_2)
-       int $VSYSCALL_EMU_VECTOR
-END(vsyscall_2)
+       .size __vsyscall_page, 4096
diff --git a/arch/x86/kernel/vsyscall_trace.h b/arch/x86/kernel/vsyscall_trace.h

new file mode 100644 (file)

index 0000000..a8b2ede
--- /dev/null
+++ b/arch/x86/kernel/vsyscall_trace.h
@@ -0,0 +1,29 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM vsyscall
+
+#if !defined(__VSYSCALL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __VSYSCALL_TRACE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(emulate_vsyscall,
+
+           TP_PROTO(int nr),
+
+           TP_ARGS(nr),
+
+           TP_STRUCT__entry(__field(int, nr)),
+
+           TP_fast_assign(
+                          __entry->nr = nr;
+                          ),
+
+           TP_printk("nr = %d", __entry->nr)
+);
+
+#endif
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../arch/x86/kernel
+#define TRACE_INCLUDE_FILE vsyscall_trace
+#include <trace/define_trace.h>
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig

index 988724b236b645466f337677fb2b2c950a252dc1..ff5790d8e990f0c382fa43c5ff2d3b5d7ed46c48 100644 (file)
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -22,6 +22,8 @@ config KVM
         depends on HAVE_KVM
         # for device assignment:
         depends on PCI
+       # for TASKSTATS/TASK_DELAY_ACCT:
+       depends on NET
         select PREEMPT_NOTIFIERS
         select MMU_NOTIFIER
         select ANON_INODES
@@ -31,6 +33,7 @@ config KVM
         select KVM_ASYNC_PF
         select USER_RETURN_NOTIFIER
         select KVM_MMIO
+       select TASKSTATS
         select TASK_DELAY_ACCT
         ---help---
           Support hosting fully virtualized guest machines using hardware
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c

index 4d09df054e391822aa7a5e634a4ff2f4f8afabc0..0d17c8c50acd54334b6c92335dad6e4c954ca18f 100644 (file)
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -17,6 +17,7 @@
  #include <asm/traps.h>                 /* dotraplinkage, ...           */
  #include <asm/pgalloc.h>               /* pgd_*(), ...                 */
  #include <asm/kmemcheck.h>             /* kmemcheck_*(), ...           */
+#include <asm/vsyscall.h>
  
  /*
   * Page fault error code bits:
@@ -105,7 +106,7 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr,
                  * but for now it's good enough to assume that long
                  * mode only uses well known segments or kernel.
                  */
-               return (!user_mode(regs)) || (regs->cs == __USER_CS);
+               return (!user_mode(regs) || user_64bit_mode(regs));
  #endif
         case 0x60:
                 /* 0x64 thru 0x67 are valid prefixes in all modes. */
@@ -720,6 +721,18 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
                 if (is_errata100(regs, address))
                         return;
  
+#ifdef CONFIG_X86_64
+               /*
+                * Instruction fetch faults in the vsyscall page might need
+                * emulation.
+                */
+               if (unlikely((error_code & PF_INSTR) &&
+                            ((address & ~0xfff) == VSYSCALL_START))) {
+                       if (emulate_vsyscall(regs, address))
+                               return;
+               }
+#endif
+
                 if (unlikely(show_unhandled_signals))
                         show_signal_msg(regs, error_code, address, tsk);
  
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c

index ae3cb23cd89b86cdecbc6e202fb7e2d912079cf5..039d91315bc56bfcf6b8693c5ea3890ba7de4363 100644 (file)
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -360,6 +360,20 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
                 }
         }
  
+       /* After the PCI-E bus has been walked and all devices discovered,
+        * configure any settings of the fabric that might be necessary.
+        */
+       if (bus) {
+               struct pci_bus *child;
+               list_for_each_entry(child, &bus->children, node) {
+                       struct pci_dev *self = child->self;
+                       if (!self)
+                               continue;
+
+                       pcie_bus_configure_settings(child, self->pcie_mpss);
+               }
+       }
+
         if (!bus)
                 kfree(sd);
  
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c

index 7000e74b30877bea018f46946f8d99b1275e7624..58425adc22c6ae156b01b916fe6703428b6391f9 100644 (file)
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -689,7 +689,9 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
                         irq_attr.trigger = 1;
                         irq_attr.polarity = 1;
                         io_apic_set_pci_routing(NULL, pentry->irq, &irq_attr);
-               }
+               } else
+                       pentry->irq = 0; /* No irq */
+
                 switch (pentry->type) {
                 case SFI_DEV_TYPE_IPC:
                         /* ID as IRQ is a hack that will go away */
diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c

index 8b9940e78e2fa6da6751f115f4d8b0ce87f5b53a..7cce722667b83dd06b506d06b771a572c0843598 100644 (file)
--- a/arch/x86/platform/olpc/olpc.c
+++ b/arch/x86/platform/olpc/olpc.c
@@ -161,13 +161,13 @@ restart:
         if (inbuf && inlen) {
                 /* write data to EC */
                 for (i = 0; i < inlen; i++) {
+                       pr_devel("olpc-ec:  sending cmd arg 0x%x\n", inbuf[i]);
+                       outb(inbuf[i], 0x68);
                         if (wait_on_ibf(0x6c, 0)) {
                                 printk(KERN_ERR "olpc-ec:  timeout waiting for"
                                                 " EC accept data!\n");
                                 goto err;
                         }
-                       pr_devel("olpc-ec:  sending cmd arg 0x%x\n", inbuf[i]);
-                       outb(inbuf[i], 0x68);
                 }
         }
         if (outbuf && outlen) {
diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S

index 1b979c12ba85b0d17443e1037580667649cbee1a..01f5e3b4613cb88212dee66e191711ae8bd814c6 100644 (file)
--- a/arch/x86/vdso/vdso.S
+++ b/arch/x86/vdso/vdso.S
@@ -9,6 +9,7 @@ __PAGE_ALIGNED_DATA
  vdso_start:
         .incbin "arch/x86/vdso/vdso.so"
  vdso_end:
+       .align PAGE_SIZE /* extra data here leaks to userspace. */
  
  .previous
  
diff --git a/arch/x86/vdso/vdso32/sysenter.S b/arch/x86/vdso/vdso32/sysenter.S

index e2800affa754d66d8ac3533f25834f0412ec9aee..e354bceee0e0490d29940736b9716f5b2be32d62 100644 (file)
--- a/arch/x86/vdso/vdso32/sysenter.S
+++ b/arch/x86/vdso/vdso32/sysenter.S
@@ -43,7 +43,7 @@ __kernel_vsyscall:
         .space 7,0x90
  
         /* 14: System call restart point is here! (SYSENTER_RETURN-2) */
-       jmp .Lenter_kernel
+       int $0x80
         /* 16: System call normal return point is here! */
  VDSO32_SYSENTER_RETURN:        /* Symbol used by sysenter.c via vdso32-syms.h */
         pop %ebp
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile

index 3326204e251f3108650944ef1d558c98745c1935..add2c2d729cef9a0bc804a0cafd6477f142c4df3 100644 (file)
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -15,7 +15,7 @@ obj-y         := enlighten.o setup.o multicalls.o mmu.o irq.o \
                         grant-table.o suspend.o platform-pci-unplug.o \
                         p2m.o
  
-obj-$(CONFIG_FTRACE) += trace.o
+obj-$(CONFIG_EVENT_TRACING) += trace.o
  
  obj-$(CONFIG_SMP)              += smp.o
  obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c

index 974a528458a04d3257234ceb70991b7f01eb3748..2d69617950f7c94a8e14c7e5fe7ad8730aa26eeb 100644 (file)
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -77,8 +77,8 @@ EXPORT_SYMBOL_GPL(xen_domain_type);
  
  unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
  EXPORT_SYMBOL(machine_to_phys_mapping);
-unsigned int   machine_to_phys_order;
-EXPORT_SYMBOL(machine_to_phys_order);
+unsigned long  machine_to_phys_nr;
+EXPORT_SYMBOL(machine_to_phys_nr);
  
  struct start_info *xen_start_info;
  EXPORT_SYMBOL_GPL(xen_start_info);
@@ -951,6 +951,10 @@ static const struct pv_info xen_info __initconst = {
         .paravirt_enabled = 1,
         .shared_kernel_pmd = 0,
  
+#ifdef CONFIG_X86_64
+       .extra_user_64bit_cs = FLAT_USER_CS64,
+#endif
+
         .name = "Xen",
  };
  
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c

index f987bde77c490666fa27e728b509ce9d8d0ab1b6..3dd53f997b11e768b72f11819c25d04ff39ff639 100644 (file)
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1713,15 +1713,17 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
  void __init xen_setup_machphys_mapping(void)
  {
         struct xen_machphys_mapping mapping;
-       unsigned long machine_to_phys_nr_ents;
  
         if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
                 machine_to_phys_mapping = (unsigned long *)mapping.v_start;
-               machine_to_phys_nr_ents = mapping.max_mfn + 1;
+               machine_to_phys_nr = mapping.max_mfn + 1;
         } else {
-               machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
+               machine_to_phys_nr = MACH2PHYS_NR_ENTRIES;
         }
-       machine_to_phys_order = fls(machine_to_phys_nr_ents - 1);
+#ifdef CONFIG_X86_32
+       WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1))
+               < machine_to_phys_mapping);
+#endif
  }
  
  #ifdef CONFIG_X86_64
@@ -1916,6 +1918,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
  # endif
  #else
         case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
+       case VVAR_PAGE:
  #endif
         case FIX_TEXT_POKE0:
         case FIX_TEXT_POKE1:
@@ -1956,7 +1959,8 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
  #ifdef CONFIG_X86_64
         /* Replicate changes to map the vsyscall page into the user
            pagetable vsyscall mapping. */
-       if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
+       if ((idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) ||
+           idx == VVAR_PAGE) {
                 unsigned long vaddr = __fix_to_virt(idx);
                 set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
         }
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c

index df118a825f395cbb4e79b9282594caa77aa6a4ca..46d6d21dbdbec60800e38f996c7a89ebad2f1ae2 100644 (file)
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -184,6 +184,19 @@ static unsigned long __init xen_set_identity(const struct e820entry *list,
                                         PFN_UP(start_pci), PFN_DOWN(last));
         return identity;
  }
+
+static unsigned long __init xen_get_max_pages(void)
+{
+       unsigned long max_pages = MAX_DOMAIN_PAGES;
+       domid_t domid = DOMID_SELF;
+       int ret;
+
+       ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid);
+       if (ret > 0)
+               max_pages = ret;
+       return min(max_pages, MAX_DOMAIN_PAGES);
+}
+
  /**
   * machine_specific_memory_setup - Hook for machine specific memory setup.
   **/
@@ -292,6 +305,14 @@ char * __init xen_memory_setup(void)
  
         sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
  
+       extra_limit = xen_get_max_pages();
+       if (max_pfn + extra_pages > extra_limit) {
+               if (extra_limit > max_pfn)
+                       extra_pages = extra_limit - max_pfn;
+               else
+                       extra_pages = 0;
+       }
+
         extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820);
  
         /*
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c

index b4533a86d7e410668e24033cae32d8382bf4111d..041d4fe9dfe4b858773bc30ba1ea05ddab501637 100644 (file)
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -32,6 +32,7 @@
  #include <xen/page.h>
  #include <xen/events.h>
  
+#include <xen/hvc-console.h>
  #include "xen-ops.h"
  #include "mmu.h"
  
@@ -207,6 +208,15 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
         unsigned cpu;
         unsigned int i;
  
+       if (skip_ioapic_setup) {
+               char *m = (max_cpus == 0) ?
+                       "The nosmp parameter is incompatible with Xen; " \
+                       "use Xen dom0_max_vcpus=1 parameter" :
+                       "The noapic parameter is incompatible with Xen";
+
+               xen_raw_printk(m);
+               panic(m);
+       }
         xen_init_lock_cpu(0);
  
         smp_store_cpu_info(0);
@@ -521,10 +531,7 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
         native_smp_prepare_cpus(max_cpus);
         WARN_ON(xen_smp_intr_init(0));
  
-       if (!xen_have_vector_callback)
-               return;
         xen_init_lock_cpu(0);
-       xen_init_spinlocks();
  }
  
  static int __cpuinit xen_hvm_cpu_up(unsigned int cpu)
@@ -546,6 +553,8 @@ static void xen_hvm_cpu_die(unsigned int cpu)
  
  void __init xen_hvm_smp_init(void)
  {
+       if (!xen_have_vector_callback)
+               return;
         smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
         smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
         smp_ops.cpu_up = xen_hvm_cpu_up;
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c

index 5158c505bef9772400d263c3e895178820f5b305..163b4679556e300615095cc995fa0eac6ea7cf45 100644 (file)
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -168,9 +168,10 @@ cycle_t xen_clocksource_read(void)
          struct pvclock_vcpu_time_info *src;
         cycle_t ret;
  
-       src = &get_cpu_var(xen_vcpu)->time;
+       preempt_disable_notrace();
+       src = &__get_cpu_var(xen_vcpu)->time;
         ret = pvclock_clocksource_read(src);
-       put_cpu_var(xen_vcpu);
+       preempt_enable_notrace();
         return ret;
  }
  
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S

index 22a2093b58623cca3472a03c3950cad4395a884d..b040b0e518caf0a0fe382999b36c2c04e7344860 100644 (file)
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -113,11 +113,13 @@ xen_iret_start_crit:
  
         /*
          * If there's something pending, mask events again so we can
-        * jump back into xen_hypervisor_callback
+        * jump back into xen_hypervisor_callback. Otherwise do not
+        * touch XEN_vcpu_info_mask.
          */
-       sete XEN_vcpu_info_mask(%eax)
+       jne 1f
+       movb $1, XEN_vcpu_info_mask(%eax)
  
-       popl %eax
+1:     popl %eax
  
         /*
          * From this point on the registers are restored and the stack
diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h

index a6f934f37f1abe463432eb43644dbce871280690..798ee6d285a10d9b350e11f1a53c212f8f72c803 100644 (file)
--- a/arch/xtensa/include/asm/unistd.h
+++ b/arch/xtensa/include/asm/unistd.h
@@ -455,7 +455,7 @@ __SYSCALL(203, sys_reboot, 3)
  #define __NR_quotactl                          204
  __SYSCALL(204, sys_quotactl, 4)
  #define __NR_nfsservctl                        205
-__SYSCALL(205, sys_nfsservctl, 3)
+__SYSCALL(205, sys_ni_syscall, 0)
  #define __NR__sysctl                           206
  __SYSCALL(206, sys_sysctl, 1)
  #define __NR_bdflush                           207
diff --git a/block/Kconfig b/block/Kconfig

index 60be1e0455daddb636998a03eb6d47e60d1eae81..e97934eececacbbf4e62d96da3887b624bffeedd 100644 (file)
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -65,6 +65,16 @@ config BLK_DEV_BSG
  
           If unsure, say Y.
  
+config BLK_DEV_BSGLIB
+       bool "Block layer SG support v4 helper lib"
+       default n
+       select BLK_DEV_BSG
+       help
+         Subsystems will normally enable this if needed. Users will not
+         normally need to manually enable this.
+
+         If unsure, say N.
+
  config BLK_DEV_INTEGRITY
         bool "Block layer data integrity support"
         ---help---
diff --git a/block/Makefile b/block/Makefile

index 0fec4b3fab511bc065261121f279e6f64038c35c..514c6e4f427a80f269bef08473cc048e5125b968 100644 (file)
--- a/block/Makefile
+++ b/block/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
                         blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o
  
  obj-$(CONFIG_BLK_DEV_BSG)      += bsg.o
+obj-$(CONFIG_BLK_DEV_BSGLIB)   += bsg-lib.o
  obj-$(CONFIG_BLK_CGROUP)       += blk-cgroup.o
  obj-$(CONFIG_BLK_DEV_THROTTLING)       += blk-throttle.o
  obj-$(CONFIG_IOSCHED_NOOP)     += noop-iosched.o
diff --git a/block/blk-core.c b/block/blk-core.c

index b627558c461fa7a1d9eaf6ea447db093852c8274..90e1ffdeb415914ffaad20b31f54c8e22d05ce79 100644 (file)
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1702,6 +1702,7 @@ EXPORT_SYMBOL_GPL(blk_rq_check_limits);
  int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
  {
         unsigned long flags;
+       int where = ELEVATOR_INSERT_BACK;
  
         if (blk_rq_check_limits(q, rq))
                 return -EIO;
@@ -1718,7 +1719,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
          */
         BUG_ON(blk_queued_rq(rq));
  
-       add_acct_request(q, rq, ELEVATOR_INSERT_BACK);
+       if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA))
+               where = ELEVATOR_INSERT_FLUSH;
+
+       add_acct_request(q, rq, where);
         spin_unlock_irqrestore(q->queue_lock, flags);
  
         return 0;
@@ -2275,7 +2279,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
   *     %false - we are done with this request
   *     %true  - still buffers pending for this request
   **/
-static bool __blk_end_bidi_request(struct request *rq, int error,
+bool __blk_end_bidi_request(struct request *rq, int error,
                                    unsigned int nr_bytes, unsigned int bidi_bytes)
  {
         if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
diff --git a/block/blk-flush.c b/block/blk-flush.c

index bb21e4c36f70ca437162356edd87d2ce3a265a46..491eb30a242db435b55a7ba962e4acce5af81a15 100644 (file)
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -95,11 +95,12 @@ static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq)
  {
         unsigned int policy = 0;
  
+       if (blk_rq_sectors(rq))
+               policy |= REQ_FSEQ_DATA;
+
         if (fflags & REQ_FLUSH) {
                 if (rq->cmd_flags & REQ_FLUSH)
                         policy |= REQ_FSEQ_PREFLUSH;
-               if (blk_rq_sectors(rq))
-                       policy |= REQ_FSEQ_DATA;
                 if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA))
                         policy |= REQ_FSEQ_POSTFLUSH;
         }
@@ -122,7 +123,7 @@ static void blk_flush_restore_request(struct request *rq)
  
         /* make @rq a normal request */
         rq->cmd_flags &= ~REQ_FLUSH_SEQ;
-       rq->end_io = NULL;
+       rq->end_io = rq->flush.saved_end_io;
  }
  
  /**
@@ -300,9 +301,6 @@ void blk_insert_flush(struct request *rq)
         unsigned int fflags = q->flush_flags;   /* may change, cache */
         unsigned int policy = blk_flush_policy(fflags, rq);
  
-       BUG_ON(rq->end_io);
-       BUG_ON(!rq->bio || rq->bio != rq->biotail);
-
         /*
          * @policy now records what operations need to be done.  Adjust
          * REQ_FLUSH and FUA for the driver.
@@ -311,6 +309,19 @@ void blk_insert_flush(struct request *rq)
         if (!(fflags & REQ_FUA))
                 rq->cmd_flags &= ~REQ_FUA;
  
+       /*
+        * An empty flush handed down from a stacking driver may
+        * translate into nothing if the underlying device does not
+        * advertise a write-back cache.  In this case, simply
+        * complete the request.
+        */
+       if (!policy) {
+               __blk_end_bidi_request(rq, 0, 0, 0);
+               return;
+       }
+
+       BUG_ON(!rq->bio || rq->bio != rq->biotail);
+
         /*
          * If there's data but flush is not necessary, the request can be
          * processed directly without going through flush machinery.  Queue
@@ -319,6 +330,7 @@ void blk_insert_flush(struct request *rq)
         if ((policy & REQ_FSEQ_DATA) &&
             !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
                 list_add_tail(&rq->queuelist, &q->queue_head);
+               blk_run_queue_async(q);
                 return;
         }
  
@@ -329,6 +341,7 @@ void blk_insert_flush(struct request *rq)
         memset(&rq->flush, 0, sizeof(rq->flush));
         INIT_LIST_HEAD(&rq->flush.list);
         rq->cmd_flags |= REQ_FLUSH_SEQ;
+       rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
         rq->end_io = flush_data_end_io;
  
         blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
diff --git a/block/blk-softirq.c b/block/blk-softirq.c

index 475fab809a80cce9eb5573fbb392597d8dd9b838..58340d0cb23a82d40edf95b77874a5781d49bcdb 100644 (file)
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -124,6 +124,14 @@ void __blk_complete_request(struct request *req)
         } else
                 ccpu = cpu;
  
+       /*
+        * If current CPU and requested CPU are in the same group, running
+        * softirq in current CPU. One might concern this is just like
+        * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is
+        * running in interrupt handler, and currently I/O controller doesn't
+        * support multiple interrupts, so current CPU is unique actually. This
+        * avoids IPI sending from current CPU to the first CPU of a group.
+        */
         if (ccpu == cpu || ccpu == group_cpu) {
                 struct list_head *list;
  do_local:
diff --git a/block/blk-throttle.c b/block/blk-throttle.c

index f6a794120505668084929afdacd0b5e17f8fca8d..a19f58c6fc3a5b5012aabb9f4891c0bbe19ce476 100644 (file)
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -746,7 +746,7 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg,
  static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
  {
         bool rw = bio_data_dir(bio);
-       bool sync = bio->bi_rw & REQ_SYNC;
+       bool sync = rw_is_sync(bio->bi_rw);
  
         /* Charge the bio to the group */
         tg->bytes_disp[rw] += bio->bi_size;
@@ -1150,7 +1150,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
  
                 if (tg_no_rule_group(tg, rw)) {
                         blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size,
-                                       rw, bio->bi_rw & REQ_SYNC);
+                                       rw, rw_is_sync(bio->bi_rw));
                         rcu_read_unlock();
                         return 0;
                 }
diff --git a/block/blk.h b/block/blk.h

index d6586287adc9ccf44e0adc556b552c7b1404ca4b..20b900a377c9d8ba85ab8ba45f10c4deb64652f4 100644 (file)
--- a/block/blk.h
+++ b/block/blk.h
@@ -17,6 +17,8 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
                       struct bio *bio);
  void blk_dequeue_request(struct request *rq);
  void __blk_queue_free_tags(struct request_queue *q);
+bool __blk_end_bidi_request(struct request *rq, int error,
+                           unsigned int nr_bytes, unsigned int bidi_bytes);
  
  void blk_rq_timed_out_timer(unsigned long data);
  void blk_delete_timer(struct request *);
diff --git a/block/bsg-lib.c b/block/bsg-lib.c

new file mode 100644 (file)

index 0000000..6690e6e
--- /dev/null
+++ b/block/bsg-lib.c
@@ -0,0 +1,298 @@
+/*
+ *  BSG helper library
+ *
+ *  Copyright (C) 2008   James Smart, Emulex Corporation
+ *  Copyright (C) 2011   Red Hat, Inc.  All rights reserved.
+ *  Copyright (C) 2011   Mike Christie
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/delay.h>
+#include <linux/scatterlist.h>
+#include <linux/bsg-lib.h>
+#include <linux/module.h>
+#include <scsi/scsi_cmnd.h>
+
+/**
+ * bsg_destroy_job - routine to teardown/delete a bsg job
+ * @job: bsg_job that is to be torn down
+ */
+static void bsg_destroy_job(struct bsg_job *job)
+{
+       put_device(job->dev);   /* release reference for the request */
+
+       kfree(job->request_payload.sg_list);
+       kfree(job->reply_payload.sg_list);
+       kfree(job);
+}
+
+/**
+ * bsg_job_done - completion routine for bsg requests
+ * @job: bsg_job that is complete
+ * @result: job reply result
+ * @reply_payload_rcv_len: length of payload recvd
+ *
+ * The LLD should call this when the bsg job has completed.
+ */
+void bsg_job_done(struct bsg_job *job, int result,
+                 unsigned int reply_payload_rcv_len)
+{
+       struct request *req = job->req;
+       struct request *rsp = req->next_rq;
+       int err;
+
+       err = job->req->errors = result;
+       if (err < 0)
+               /* we're only returning the result field in the reply */
+               job->req->sense_len = sizeof(u32);
+       else
+               job->req->sense_len = job->reply_len;
+       /* we assume all request payload was transferred, residual == 0 */
+       req->resid_len = 0;
+
+       if (rsp) {
+               WARN_ON(reply_payload_rcv_len > rsp->resid_len);
+
+               /* set reply (bidi) residual */
+               rsp->resid_len -= min(reply_payload_rcv_len, rsp->resid_len);
+       }
+       blk_complete_request(req);
+}
+EXPORT_SYMBOL_GPL(bsg_job_done);
+
+/**
+ * bsg_softirq_done - softirq done routine for destroying the bsg requests
+ * @rq: BSG request that holds the job to be destroyed
+ */
+static void bsg_softirq_done(struct request *rq)
+{
+       struct bsg_job *job = rq->special;
+
+       blk_end_request_all(rq, rq->errors);
+       bsg_destroy_job(job);
+}
+
+static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
+{
+       size_t sz = (sizeof(struct scatterlist) * req->nr_phys_segments);
+
+       BUG_ON(!req->nr_phys_segments);
+
+       buf->sg_list = kzalloc(sz, GFP_KERNEL);
+       if (!buf->sg_list)
+               return -ENOMEM;
+       sg_init_table(buf->sg_list, req->nr_phys_segments);
+       buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list);
+       buf->payload_len = blk_rq_bytes(req);
+       return 0;
+}
+
+/**
+ * bsg_create_job - create the bsg_job structure for the bsg request
+ * @dev: device that is being sent the bsg request
+ * @req: BSG request that needs a job structure
+ */
+static int bsg_create_job(struct device *dev, struct request *req)
+{
+       struct request *rsp = req->next_rq;
+       struct request_queue *q = req->q;
+       struct bsg_job *job;
+       int ret;
+
+       BUG_ON(req->special);
+
+       job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL);
+       if (!job)
+               return -ENOMEM;
+
+       req->special = job;
+       job->req = req;
+       if (q->bsg_job_size)
+               job->dd_data = (void *)&job[1];
+       job->request = req->cmd;
+       job->request_len = req->cmd_len;
+       job->reply = req->sense;
+       job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer
+                                                * allocated */
+       if (req->bio) {
+               ret = bsg_map_buffer(&job->request_payload, req);
+               if (ret)
+                       goto failjob_rls_job;
+       }
+       if (rsp && rsp->bio) {
+               ret = bsg_map_buffer(&job->reply_payload, rsp);
+               if (ret)
+                       goto failjob_rls_rqst_payload;
+       }
+       job->dev = dev;
+       /* take a reference for the request */
+       get_device(job->dev);
+       return 0;
+
+failjob_rls_rqst_payload:
+       kfree(job->request_payload.sg_list);
+failjob_rls_job:
+       kfree(job);
+       return -ENOMEM;
+}
+
+/*
+ * bsg_goose_queue - restart queue in case it was stopped
+ * @q: request q to be restarted
+ */
+void bsg_goose_queue(struct request_queue *q)
+{
+       if (!q)
+               return;
+
+       blk_run_queue_async(q);
+}
+EXPORT_SYMBOL_GPL(bsg_goose_queue);
+
+/**
+ * bsg_request_fn - generic handler for bsg requests
+ * @q: request queue to manage
+ *
+ * On error the create_bsg_job function should return a -Exyz error value
+ * that will be set to the req->errors.
+ *
+ * Drivers/subsys should pass this to the queue init function.
+ */
+void bsg_request_fn(struct request_queue *q)
+{
+       struct device *dev = q->queuedata;
+       struct request *req;
+       struct bsg_job *job;
+       int ret;
+
+       if (!get_device(dev))
+               return;
+
+       while (1) {
+               req = blk_fetch_request(q);
+               if (!req)
+                       break;
+               spin_unlock_irq(q->queue_lock);
+
+               ret = bsg_create_job(dev, req);
+               if (ret) {
+                       req->errors = ret;
+                       blk_end_request_all(req, ret);
+                       spin_lock_irq(q->queue_lock);
+                       continue;
+               }
+
+               job = req->special;
+               ret = q->bsg_job_fn(job);
+               spin_lock_irq(q->queue_lock);
+               if (ret)
+                       break;
+       }
+
+       spin_unlock_irq(q->queue_lock);
+       put_device(dev);
+       spin_lock_irq(q->queue_lock);
+}
+EXPORT_SYMBOL_GPL(bsg_request_fn);
+
+/**
+ * bsg_setup_queue - Create and add the bsg hooks so we can receive requests
+ * @dev: device to attach bsg device to
+ * @q: request queue setup by caller
+ * @name: device to give bsg device
+ * @job_fn: bsg job handler
+ * @dd_job_size: size of LLD data needed for each job
+ *
+ * The caller should have setup the reuqest queue with bsg_request_fn
+ * as the request_fn.
+ */
+int bsg_setup_queue(struct device *dev, struct request_queue *q,
+                   char *name, bsg_job_fn *job_fn, int dd_job_size)
+{
+       int ret;
+
+       q->queuedata = dev;
+       q->bsg_job_size = dd_job_size;
+       q->bsg_job_fn = job_fn;
+       queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
+       blk_queue_softirq_done(q, bsg_softirq_done);
+       blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
+
+       ret = bsg_register_queue(q, dev, name, NULL);
+       if (ret) {
+               printk(KERN_ERR "%s: bsg interface failed to "
+                      "initialize - register queue\n", dev->kobj.name);
+               return ret;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(bsg_setup_queue);
+
+/**
+ * bsg_remove_queue - Deletes the bsg dev from the q
+ * @q: the request_queue that is to be torn down.
+ *
+ * Notes:
+ *   Before unregistering the queue empty any requests that are blocked
+ */
+void bsg_remove_queue(struct request_queue *q)
+{
+       struct request *req; /* block request */
+       int counts; /* totals for request_list count and starved */
+
+       if (!q)
+               return;
+
+       /* Stop taking in new requests */
+       spin_lock_irq(q->queue_lock);
+       blk_stop_queue(q);
+
+       /* drain all requests in the queue */
+       while (1) {
+               /* need the lock to fetch a request
+                * this may fetch the same reqeust as the previous pass
+                */
+               req = blk_fetch_request(q);
+               /* save requests in use and starved */
+               counts = q->rq.count[0] + q->rq.count[1] +
+                        q->rq.starved[0] + q->rq.starved[1];
+               spin_unlock_irq(q->queue_lock);
+               /* any requests still outstanding? */
+               if (counts == 0)
+                       break;
+
+               /* This may be the same req as the previous iteration,
+                * always send the blk_end_request_all after a prefetch.
+                * It is not okay to not end the request because the
+                * prefetch started the request.
+                */
+               if (req) {
+                       /* return -ENXIO to indicate that this queue is
+                        * going away
+                        */
+                       req->errors = -ENXIO;
+                       blk_end_request_all(req, -ENXIO);
+               }
+
+               msleep(200); /* allow bsg to possibly finish */
+               spin_lock_irq(q->queue_lock);
+       }
+       bsg_unregister_queue(q);
+}
+EXPORT_SYMBOL_GPL(bsg_remove_queue);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c

index 1f96ad6254f1eea18478b80f3a58de4173c9f8ef..a33bd4377c615e789509431c5f8a7583939149a4 100644 (file)
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -130,6 +130,8 @@ struct cfq_queue {
         unsigned long slice_end;
         long slice_resid;
  
+       /* pending metadata requests */
+       int meta_pending;
         /* number of requests that are on the dispatch list or inside driver */
         int dispatched;
  
@@ -682,6 +684,9 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2,
         if (rq_is_sync(rq1) != rq_is_sync(rq2))
                 return rq_is_sync(rq1) ? rq1 : rq2;
  
+       if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META)
+               return rq1->cmd_flags & REQ_META ? rq1 : rq2;
+
         s1 = blk_rq_pos(rq1);
         s2 = blk_rq_pos(rq2);
  
@@ -1209,6 +1214,9 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
  
         hlist_del_init(&cfqg->cfqd_node);
  
+       BUG_ON(cfqd->nr_blkcg_linked_grps <= 0);
+       cfqd->nr_blkcg_linked_grps--;
+
         /*
          * Put the reference taken at the time of creation so that when all
          * queues are gone, group can be destroyed.
@@ -1604,6 +1612,10 @@ static void cfq_remove_request(struct request *rq)
         cfqq->cfqd->rq_queued--;
         cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
                                         rq_data_dir(rq), rq_is_sync(rq));
+       if (rq->cmd_flags & REQ_META) {
+               WARN_ON(!cfqq->meta_pending);
+               cfqq->meta_pending--;
+       }
  }
  
  static int cfq_merge(struct request_queue *q, struct request **req,
@@ -3356,6 +3368,13 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
             RB_EMPTY_ROOT(&cfqq->sort_list))
                 return true;
  
+       /*
+        * So both queues are sync. Let the new request get disk time if
+        * it's a metadata request and the current queue is doing regular IO.
+        */
+       if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending)
+               return true;
+
         /*
          * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
          */
@@ -3420,6 +3439,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
         struct cfq_io_context *cic = RQ_CIC(rq);
  
         cfqd->rq_queued++;
+       if (rq->cmd_flags & REQ_META)
+               cfqq->meta_pending++;
  
         cfq_update_io_thinktime(cfqd, cfqq, cic);
         cfq_update_io_seektime(cfqd, cfqq, rq);
diff --git a/block/genhd.c b/block/genhd.c

index 5cb51c55f6d8d57309888aa2810b89f44d876301..e2f67902dd024ed2f07abdfc190c73558d2ddb7b 100644 (file)
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1146,17 +1146,17 @@ static int diskstats_show(struct seq_file *seqf, void *v)
                 cpu = part_stat_lock();
                 part_round_stats(cpu, hd);
                 part_stat_unlock();
-               seq_printf(seqf, "%4d %7d %s %lu %lu %llu "
-                          "%u %lu %lu %llu %u %u %u %u\n",
+               seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
+                          "%u %lu %lu %lu %u %u %u %u\n",
                            MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
                            disk_name(gp, hd->partno, buf),
                            part_stat_read(hd, ios[READ]),
                            part_stat_read(hd, merges[READ]),
-                          (unsigned long long)part_stat_read(hd, sectors[READ]),
+                          part_stat_read(hd, sectors[READ]),
                            jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
                            part_stat_read(hd, ios[WRITE]),
                            part_stat_read(hd, merges[WRITE]),
-                          (unsigned long long)part_stat_read(hd, sectors[WRITE]),
+                          part_stat_read(hd, sectors[WRITE]),
                            jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
                            part_in_flight(hd),
                            jiffies_to_msecs(part_stat_read(hd, io_ticks)),
diff --git a/drivers/acpi/acpica/acconfig.h b/drivers/acpi/acpica/acconfig.h

index bc533dde16c47785d2c0321a173039eeede4e69b..f895a244ca7ea187588356475e6ca4d700804b52 100644 (file)
--- a/drivers/acpi/acpica/acconfig.h
+++ b/drivers/acpi/acpica/acconfig.h
@@ -121,7 +121,7 @@
  
  /* Maximum sleep allowed via Sleep() operator */
  
-#define ACPI_MAX_SLEEP                  20000  /* Two seconds */
+#define ACPI_MAX_SLEEP                  2000   /* Two seconds */
  
  /******************************************************************************
   *
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig

index c34aa51af4eed85a99bc9aad7720e4dd582f47a8..e3f47872ec222949d13f239fdaf9e00eb5985984 100644 (file)
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -13,6 +13,7 @@ config ACPI_APEI_GHES
         bool "APEI Generic Hardware Error Source"
         depends on ACPI_APEI && X86
         select ACPI_HED
+       select IRQ_WORK
         select LLIST
         select GENERIC_ALLOCATOR
         help
diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c

index 8041248fce9ba245a6eeeedf4adb4dd7a2b5ee20..61540360d5ce815f67e67250874ad357f3e328e6 100644 (file)
--- a/drivers/acpi/apei/apei-base.c
+++ b/drivers/acpi/apei/apei-base.c
@@ -618,7 +618,7 @@ int apei_osc_setup(void)
         };
  
         capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE;
-       capbuf[OSC_SUPPORT_TYPE] = 0;
+       capbuf[OSC_SUPPORT_TYPE] = 1;
         capbuf[OSC_CONTROL_TYPE] = 0;
  
         if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig

index ca3e6be44a0473a9f532ee6ed6af23ac3cab1cde..5987e0ba8c2de7b4548325211d98798d4abc7307 100644 (file)
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -468,6 +468,15 @@ config PATA_ICSIDE
           interface card.  This is not required for ICS partition support.
           If you are unsure, say N to this.
  
+config PATA_IMX
+       tristate "PATA support for Freescale iMX"
+       depends on ARCH_MXC
+       help
+         This option enables support for the PATA host available on Freescale
+          iMX SoCs.
+
+         If unsure, say N.
+
  config PATA_IT8213
         tristate "IT8213 PATA support (Experimental)"
         depends on PCI && EXPERIMENTAL
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile

index 8ac64e1aa051c0043a1c4cc24ff49efdcfd74635..9550d691fd19bc92b2d7a53f4beca3bc85350cb8 100644 (file)
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_PATA_HPT37X)     += pata_hpt37x.o
  obj-$(CONFIG_PATA_HPT3X2N)     += pata_hpt3x2n.o
  obj-$(CONFIG_PATA_HPT3X3)      += pata_hpt3x3.o
  obj-$(CONFIG_PATA_ICSIDE)      += pata_icside.o
+obj-$(CONFIG_PATA_IMX)         += pata_imx.o
  obj-$(CONFIG_PATA_IT8213)      += pata_it8213.o
  obj-$(CONFIG_PATA_IT821X)      += pata_it821x.o
  obj-$(CONFIG_PATA_JMICRON)     += pata_jmicron.o
diff --git a/drivers/ata/pata_imx.c b/drivers/ata/pata_imx.c

new file mode 100644 (file)

index 0000000..ca9d9ca
--- /dev/null
+++ b/drivers/ata/pata_imx.c
@@ -0,0 +1,253 @@
+/*
+ * Freescale iMX PATA driver
+ *
+ * Copyright (C) 2011 Arnaud Patard <arnaud.patard@rtp-net.org>
+ *
+ * Based on pata_platform - Copyright (C) 2006 - 2007  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * TODO:
+ * - dmaengine support
+ * - check if timing stuff needed
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <scsi/scsi_host.h>
+#include <linux/ata.h>
+#include <linux/libata.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+
+#define DRV_NAME "pata_imx"
+
+#define PATA_IMX_ATA_CONTROL           0x24
+#define PATA_IMX_ATA_CTRL_FIFO_RST_B   (1<<7)
+#define PATA_IMX_ATA_CTRL_ATA_RST_B    (1<<6)
+#define PATA_IMX_ATA_CTRL_IORDY_EN     (1<<0)
+#define PATA_IMX_ATA_INT_EN            0x2C
+#define PATA_IMX_ATA_INTR_ATA_INTRQ2   (1<<3)
+#define PATA_IMX_DRIVE_DATA            0xA0
+#define PATA_IMX_DRIVE_CONTROL         0xD8
+
+struct pata_imx_priv {
+       struct clk *clk;
+       /* timings/interrupt/control regs */
+       u8 *host_regs;
+       u32 ata_ctl;
+};
+
+static int pata_imx_set_mode(struct ata_link *link, struct ata_device **unused)
+{
+       struct ata_device *dev;
+       struct ata_port *ap = link->ap;
+       struct pata_imx_priv *priv = ap->host->private_data;
+       u32 val;
+
+       ata_for_each_dev(dev, link, ENABLED) {
+               dev->pio_mode = dev->xfer_mode = XFER_PIO_0;
+               dev->xfer_shift = ATA_SHIFT_PIO;
+               dev->flags |= ATA_DFLAG_PIO;
+
+               val = __raw_readl(priv->host_regs + PATA_IMX_ATA_CONTROL);
+               if (ata_pio_need_iordy(dev))
+                       val |= PATA_IMX_ATA_CTRL_IORDY_EN;
+               else
+                       val &= ~PATA_IMX_ATA_CTRL_IORDY_EN;
+               __raw_writel(val, priv->host_regs + PATA_IMX_ATA_CONTROL);
+
+               ata_dev_printk(dev, KERN_INFO, "configured for PIO\n");
+       }
+       return 0;
+}
+
+static struct scsi_host_template pata_imx_sht = {
+       ATA_PIO_SHT(DRV_NAME),
+};
+
+static struct ata_port_operations pata_imx_port_ops = {
+       .inherits               = &ata_sff_port_ops,
+       .sff_data_xfer          = ata_sff_data_xfer_noirq,
+       .cable_detect           = ata_cable_unknown,
+       .set_mode               = pata_imx_set_mode,
+};
+
+static void pata_imx_setup_port(struct ata_ioports *ioaddr)
+{
+       /* Fixup the port shift for platforms that need it */
+       ioaddr->data_addr       = ioaddr->cmd_addr + (ATA_REG_DATA    << 2);
+       ioaddr->error_addr      = ioaddr->cmd_addr + (ATA_REG_ERR     << 2);
+       ioaddr->feature_addr    = ioaddr->cmd_addr + (ATA_REG_FEATURE << 2);
+       ioaddr->nsect_addr      = ioaddr->cmd_addr + (ATA_REG_NSECT   << 2);
+       ioaddr->lbal_addr       = ioaddr->cmd_addr + (ATA_REG_LBAL    << 2);
+       ioaddr->lbam_addr       = ioaddr->cmd_addr + (ATA_REG_LBAM    << 2);
+       ioaddr->lbah_addr       = ioaddr->cmd_addr + (ATA_REG_LBAH    << 2);
+       ioaddr->device_addr     = ioaddr->cmd_addr + (ATA_REG_DEVICE  << 2);
+       ioaddr->status_addr     = ioaddr->cmd_addr + (ATA_REG_STATUS  << 2);
+       ioaddr->command_addr    = ioaddr->cmd_addr + (ATA_REG_CMD     << 2);
+}
+
+static int __devinit pata_imx_probe(struct platform_device *pdev)
+{
+       struct ata_host *host;
+       struct ata_port *ap;
+       struct pata_imx_priv *priv;
+       int irq = 0;
+       struct resource *io_res;
+
+       io_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (io_res == NULL)
+               return -EINVAL;
+
+       irq = platform_get_irq(pdev, 0);
+       if (irq <= 0)
+               return -EINVAL;
+
+       priv = devm_kzalloc(&pdev->dev,
+                               sizeof(struct pata_imx_priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       priv->clk = clk_get(&pdev->dev, NULL);
+       if (IS_ERR(priv->clk)) {
+               dev_err(&pdev->dev, "Failed to get clock\n");
+               return PTR_ERR(priv->clk);
+       }
+
+       clk_enable(priv->clk);
+
+       host = ata_host_alloc(&pdev->dev, 1);
+       if (!host)
+               goto free_priv;
+
+       host->private_data = priv;
+       ap = host->ports[0];
+
+       ap->ops = &pata_imx_port_ops;
+       ap->pio_mask = ATA_PIO0;
+       ap->flags |= ATA_FLAG_SLAVE_POSS;
+
+       priv->host_regs = devm_ioremap(&pdev->dev, io_res->start,
+               resource_size(io_res));
+       if (!priv->host_regs) {
+               dev_err(&pdev->dev, "failed to map IO/CTL base\n");
+               goto free_priv;
+       }
+
+       ap->ioaddr.cmd_addr = priv->host_regs + PATA_IMX_DRIVE_DATA;
+       ap->ioaddr.ctl_addr = priv->host_regs + PATA_IMX_DRIVE_CONTROL;
+
+       ap->ioaddr.altstatus_addr = ap->ioaddr.ctl_addr;
+
+       pata_imx_setup_port(&ap->ioaddr);
+
+       ata_port_desc(ap, "cmd 0x%llx ctl 0x%llx",
+               (unsigned long long)io_res->start + PATA_IMX_DRIVE_DATA,
+               (unsigned long long)io_res->start + PATA_IMX_DRIVE_CONTROL);
+
+       /* deassert resets */
+       __raw_writel(PATA_IMX_ATA_CTRL_FIFO_RST_B |
+                       PATA_IMX_ATA_CTRL_ATA_RST_B,
+                       priv->host_regs + PATA_IMX_ATA_CONTROL);
+       /* enable interrupts */
+       __raw_writel(PATA_IMX_ATA_INTR_ATA_INTRQ2,
+                       priv->host_regs + PATA_IMX_ATA_INT_EN);
+
+       /* activate */
+       return ata_host_activate(host, irq, ata_sff_interrupt, 0,
+                               &pata_imx_sht);
+
+free_priv:
+       clk_disable(priv->clk);
+       clk_put(priv->clk);
+       return -ENOMEM;
+}
+
+static int __devexit pata_imx_remove(struct platform_device *pdev)
+{
+       struct ata_host *host = dev_get_drvdata(&pdev->dev);
+       struct pata_imx_priv *priv = host->private_data;
+
+       ata_host_detach(host);
+
+       __raw_writel(0, priv->host_regs + PATA_IMX_ATA_INT_EN);
+
+       clk_disable(priv->clk);
+       clk_put(priv->clk);
+
+       return 0;
+}
+
+#ifdef CONFIG_PM
+static int pata_imx_suspend(struct device *dev)
+{
+       struct ata_host *host = dev_get_drvdata(dev);
+       struct pata_imx_priv *priv = host->private_data;
+       int ret;
+
+       ret = ata_host_suspend(host, PMSG_SUSPEND);
+       if (!ret) {
+               __raw_writel(0, priv->host_regs + PATA_IMX_ATA_INT_EN);
+               priv->ata_ctl =
+                       __raw_readl(priv->host_regs + PATA_IMX_ATA_CONTROL);
+               clk_disable(priv->clk);
+       }
+
+       return ret;
+}
+
+static int pata_imx_resume(struct device *dev)
+{
+       struct ata_host *host = dev_get_drvdata(dev);
+       struct pata_imx_priv *priv = host->private_data;
+
+       clk_enable(priv->clk);
+
+       __raw_writel(priv->ata_ctl, priv->host_regs + PATA_IMX_ATA_CONTROL);
+
+       __raw_writel(PATA_IMX_ATA_INTR_ATA_INTRQ2,
+                       priv->host_regs + PATA_IMX_ATA_INT_EN);
+
+       ata_host_resume(host);
+
+       return 0;
+}
+
+static const struct dev_pm_ops pata_imx_pm_ops = {
+       .suspend        = pata_imx_suspend,
+       .resume         = pata_imx_resume,
+};
+#endif
+
+static struct platform_driver pata_imx_driver = {
+       .probe          = pata_imx_probe,
+       .remove         = __devexit_p(pata_imx_remove),
+       .driver = {
+               .name           = DRV_NAME,
+               .owner          = THIS_MODULE,
+#ifdef CONFIG_PM
+               .pm             = &pata_imx_pm_ops,
+#endif
+       },
+};
+
+static int __init pata_imx_init(void)
+{
+       return platform_driver_register(&pata_imx_driver);
+}
+
+static void __exit pata_imx_exit(void)
+{
+       platform_driver_unregister(&pata_imx_driver);
+}
+module_init(pata_imx_init);
+module_exit(pata_imx_exit);
+
+MODULE_AUTHOR("Arnaud Patard <arnaud.patard@rtp-net.org>");
+MODULE_DESCRIPTION("low-level driver for iMX PATA");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c

index 65e4be6be22061c882c9763661b319c0725f3d5d..8e9f5048a10a9dab8b2705a579d401d972fe036a 100644 (file)
--- a/drivers/ata/pata_via.c
+++ b/drivers/ata/pata_via.c
@@ -124,6 +124,17 @@ static const struct via_isa_bridge {
         { NULL }
  };
  
+static const struct dmi_system_id no_atapi_dma_dmi_table[] = {
+       {
+               .ident = "AVERATEC 3200",
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "AVERATEC"),
+                       DMI_MATCH(DMI_BOARD_NAME, "3200"),
+               },
+       },
+       { }
+};
+
  struct via_port {
         u8 cached_device;
  };
@@ -355,6 +366,13 @@ static unsigned long via_mode_filter(struct ata_device *dev, unsigned long mask)
                         mask &= ~ ATA_MASK_UDMA;
                 }
         }
+
+       if (dev->class == ATA_DEV_ATAPI &&
+           dmi_check_system(no_atapi_dma_dmi_table)) {
+               ata_dev_warn(dev, "controller locks up on ATAPI DMA, forcing PIO\n");
+               mask &= ATA_MASK_PIO;
+       }
+
         return mask;
  }
  
diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c

index 0a9a774a7e1e9f54c9b2188b251b2a5105c800ea..5c4237452f5072a97a82eca00e95d55c15ce2542 100644 (file)
--- a/drivers/ata/sata_dwc_460ex.c
+++ b/drivers/ata/sata_dwc_460ex.c
@@ -1329,7 +1329,7 @@ static int sata_dwc_port_start(struct ata_port *ap)
                         dev_err(ap->dev, "%s: dma_alloc_coherent failed\n",
                                  __func__);
                         err = -ENOMEM;
-                       goto CLEANUP;
+                       goto CLEANUP_ALLOC;
                 }
         }
  
@@ -1349,15 +1349,13 @@ static int sata_dwc_port_start(struct ata_port *ap)
         /* Clear any error bits before libata starts issuing commands */
         clear_serror();
         ap->private_data = hsdevp;
+       dev_dbg(ap->dev, "%s: done\n", __func__);
+       return 0;
  
+CLEANUP_ALLOC:
+       kfree(hsdevp);
  CLEANUP:
-       if (err) {
-               sata_dwc_port_stop(ap);
-               dev_dbg(ap->dev, "%s: fail\n", __func__);
-       } else {
-               dev_dbg(ap->dev, "%s: done\n", __func__);
-       }
-
+       dev_dbg(ap->dev, "%s: fail. ap->id = %d\n", __func__, ap->print_id);
         return err;
  }
  
diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c

index 98c1d780f552505aac129cf2133b98576161e6ca..9dfb40b8c2c9f7727cf879d38ace79369f948c6e 100644 (file)
--- a/drivers/ata/sata_sil.c
+++ b/drivers/ata/sata_sil.c
@@ -438,7 +438,7 @@ static void sil_host_intr(struct ata_port *ap, u32 bmdma2)
         u8 status;
  
         if (unlikely(bmdma2 & SIL_DMA_SATA_IRQ)) {
-               u32 serror;
+               u32 serror = 0xffffffff;
  
                 /* SIEN doesn't mask SATA IRQs on some 3112s.  Those
                  * controllers continue to assert IRQ as long as
diff --git a/drivers/base/devres.c b/drivers/base/devres.c

index cf7a0c78805278e64f195921b3991c577691da66..65cd74832450507b9f7b1949b6ca85c648109ff1 100644 (file)
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -397,6 +397,7 @@ static int remove_nodes(struct device *dev,
  
  static int release_nodes(struct device *dev, struct list_head *first,
                          struct list_head *end, unsigned long flags)
+       __releases(&dev->devres_lock)
  {
         LIST_HEAD(todo);
         int cnt;
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c

index 33e1bed68fddf771ae9b12a716114215b59b5f68..a4760e095ff51b36a58871cdf9329bf4573f6c1d 100644 (file)
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -376,7 +376,7 @@ int devtmpfs_mount(const char *mntdir)
         return err;
  }
  
-static __initdata DECLARE_COMPLETION(setup_done);
+static DECLARE_COMPLETION(setup_done);
  
  static int handle(const char *name, mode_t mode, struct device *dev)
  {
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c

index bbb03e6f7255b715e894080e66a7ccfae34b2c24..06ed6b4e7df5ecc0d236cd73ee2690933f8e8ee7 100644 (file)
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -521,11 +521,6 @@ static int _request_firmware(const struct firmware **firmware_p,
         if (!firmware_p)
                 return -EINVAL;
  
-       if (WARN_ON(usermodehelper_is_disabled())) {
-               dev_err(device, "firmware: %s will not be loaded\n", name);
-               return -EBUSY;
-       }
-
         *firmware_p = firmware = kzalloc(sizeof(*firmware), GFP_KERNEL);
         if (!firmware) {
                 dev_err(device, "%s: kmalloc(struct firmware) failed\n",
@@ -539,6 +534,12 @@ static int _request_firmware(const struct firmware **firmware_p,
                 return 0;
         }
  
+       if (WARN_ON(usermodehelper_is_disabled())) {
+               dev_err(device, "firmware: %s will not be loaded\n", name);
+               retval = -EBUSY;
+               goto out;
+       }
+
         if (uevent)
                 dev_dbg(device, "firmware: requesting %s\n", name);
  
diff --git a/drivers/base/platform.c b/drivers/base/platform.c

index 0cad9c7f6bb50f68bdb0a371e44d51530a02ec06..99a5272d7c2fde1d3b148fc0fd2150fc9ffb8121 100644 (file)
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -33,7 +33,7 @@ EXPORT_SYMBOL_GPL(platform_bus);
  
  /**
   * arch_setup_pdev_archdata - Allow manipulation of archdata before its used
- * @dev: platform device
+ * @pdev: platform device
   *
   * This is called before platform_device_add() such that any pdev_archdata may
   * be setup before the platform_notifier is called.  So if a user needs to
diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c

index a846b2f95cfbc5bfbf253f5955da6cb8cd07804f..2c18d584066d561cc08f35f864a41d2c9471b890 100644 (file)
--- a/drivers/base/power/clock_ops.c
+++ b/drivers/base/power/clock_ops.c
@@ -19,7 +19,7 @@
  
  struct pm_clk_data {
         struct list_head clock_list;
-       struct mutex lock;
+       spinlock_t lock;
  };
  
  enum pce_status {
@@ -73,9 +73,9 @@ int pm_clk_add(struct device *dev, const char *con_id)
                 }
         }
  
-       mutex_lock(&pcd->lock);
+       spin_lock_irq(&pcd->lock);
         list_add_tail(&ce->node, &pcd->clock_list);
-       mutex_unlock(&pcd->lock);
+       spin_unlock_irq(&pcd->lock);
         return 0;
  }
  
@@ -83,8 +83,8 @@ int pm_clk_add(struct device *dev, const char *con_id)
   * __pm_clk_remove - Destroy PM clock entry.
   * @ce: PM clock entry to destroy.
   *
- * This routine must be called under the mutex protecting the PM list of clocks
- * corresponding the the @ce's device.
+ * This routine must be called under the spinlock protecting the PM list of
+ * clocks corresponding the the @ce's device.
   */
  static void __pm_clk_remove(struct pm_clock_entry *ce)
  {
@@ -123,7 +123,7 @@ void pm_clk_remove(struct device *dev, const char *con_id)
         if (!pcd)
                 return;
  
-       mutex_lock(&pcd->lock);
+       spin_lock_irq(&pcd->lock);
  
         list_for_each_entry(ce, &pcd->clock_list, node) {
                 if (!con_id && !ce->con_id) {
@@ -137,7 +137,7 @@ void pm_clk_remove(struct device *dev, const char *con_id)
                 }
         }
  
-       mutex_unlock(&pcd->lock);
+       spin_unlock_irq(&pcd->lock);
  }
  
  /**
@@ -158,7 +158,7 @@ int pm_clk_init(struct device *dev)
         }
  
         INIT_LIST_HEAD(&pcd->clock_list);
-       mutex_init(&pcd->lock);
+       spin_lock_init(&pcd->lock);
         dev->power.subsys_data = pcd;
         return 0;
  }
@@ -181,12 +181,12 @@ void pm_clk_destroy(struct device *dev)
  
         dev->power.subsys_data = NULL;
  
-       mutex_lock(&pcd->lock);
+       spin_lock_irq(&pcd->lock);
  
         list_for_each_entry_safe_reverse(ce, c, &pcd->clock_list, node)
                 __pm_clk_remove(ce);
  
-       mutex_unlock(&pcd->lock);
+       spin_unlock_irq(&pcd->lock);
  
         kfree(pcd);
  }
@@ -220,13 +220,14 @@ int pm_clk_suspend(struct device *dev)
  {
         struct pm_clk_data *pcd = __to_pcd(dev);
         struct pm_clock_entry *ce;
+       unsigned long flags;
  
         dev_dbg(dev, "%s()\n", __func__);
  
         if (!pcd)
                 return 0;
  
-       mutex_lock(&pcd->lock);
+       spin_lock_irqsave(&pcd->lock, flags);
  
         list_for_each_entry_reverse(ce, &pcd->clock_list, node) {
                 if (ce->status == PCE_STATUS_NONE)
@@ -238,7 +239,7 @@ int pm_clk_suspend(struct device *dev)
                 }
         }
  
-       mutex_unlock(&pcd->lock);
+       spin_unlock_irqrestore(&pcd->lock, flags);
  
         return 0;
  }
@@ -251,13 +252,14 @@ int pm_clk_resume(struct device *dev)
  {
         struct pm_clk_data *pcd = __to_pcd(dev);
         struct pm_clock_entry *ce;
+       unsigned long flags;
  
         dev_dbg(dev, "%s()\n", __func__);
  
         if (!pcd)
                 return 0;
  
-       mutex_lock(&pcd->lock);
+       spin_lock_irqsave(&pcd->lock, flags);
  
         list_for_each_entry(ce, &pcd->clock_list, node) {
                 if (ce->status == PCE_STATUS_NONE)
@@ -269,7 +271,7 @@ int pm_clk_resume(struct device *dev)
                 }
         }
  
-       mutex_unlock(&pcd->lock);
+       spin_unlock_irqrestore(&pcd->lock, flags);
  
         return 0;
  }
@@ -344,6 +346,7 @@ int pm_clk_suspend(struct device *dev)
  {
         struct pm_clk_data *pcd = __to_pcd(dev);
         struct pm_clock_entry *ce;
+       unsigned long flags;
  
         dev_dbg(dev, "%s()\n", __func__);
  
@@ -351,12 +354,12 @@ int pm_clk_suspend(struct device *dev)
         if (!pcd || !dev->driver)
                 return 0;
  
-       mutex_lock(&pcd->lock);
+       spin_lock_irqsave(&pcd->lock, flags);
  
         list_for_each_entry_reverse(ce, &pcd->clock_list, node)
                 clk_disable(ce->clk);
  
-       mutex_unlock(&pcd->lock);
+       spin_unlock_irqrestore(&pcd->lock, flags);
  
         return 0;
  }
@@ -369,6 +372,7 @@ int pm_clk_resume(struct device *dev)
  {
         struct pm_clk_data *pcd = __to_pcd(dev);
         struct pm_clock_entry *ce;
+       unsigned long flags;
  
         dev_dbg(dev, "%s()\n", __func__);
  
@@ -376,12 +380,12 @@ int pm_clk_resume(struct device *dev)
         if (!pcd || !dev->driver)
                 return 0;
  
-       mutex_lock(&pcd->lock);
+       spin_lock_irqsave(&pcd->lock, flags);
  
         list_for_each_entry(ce, &pcd->clock_list, node)
                 clk_enable(ce->clk);
  
-       mutex_unlock(&pcd->lock);
+       spin_unlock_irqrestore(&pcd->lock, flags);
  
         return 0;
  }
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c

index e18566a0fedd4c9f7492eab168ffed2bc8d36932..1c374579407c14316b35e8b9d7faff71077b18f6 100644 (file)
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -460,6 +460,21 @@ static int pm_genpd_runtime_resume(struct device *dev)
         return 0;
  }
  
+/**
+ * pm_genpd_poweroff_unused - Power off all PM domains with no devices in use.
+ */
+void pm_genpd_poweroff_unused(void)
+{
+       struct generic_pm_domain *genpd;
+
+       mutex_lock(&gpd_list_lock);
+
+       list_for_each_entry(genpd, &gpd_list, gpd_list_node)
+               genpd_queue_power_off_work(genpd);
+
+       mutex_unlock(&gpd_list_lock);
+}
+
  #else
  
  static inline void genpd_power_off_work_fn(struct work_struct *work) {}
@@ -1255,18 +1270,3 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
         list_add(&genpd->gpd_list_node, &gpd_list);
         mutex_unlock(&gpd_list_lock);
  }
-
-/**
- * pm_genpd_poweroff_unused - Power off all PM domains with no devices in use.
- */
-void pm_genpd_poweroff_unused(void)
-{
-       struct generic_pm_domain *genpd;
-
-       mutex_lock(&gpd_list_lock);
-
-       list_for_each_entry(genpd, &gpd_list, gpd_list_node)
-               genpd_queue_power_off_work(genpd);
-
-       mutex_unlock(&gpd_list_lock);
-}
diff --git a/drivers/base/regmap/regmap-i2c.c b/drivers/base/regmap/regmap-i2c.c

index c2231ff06cbcfebfe4a5ef6c0aee1ffa6ecc28fb..c4f7a45cd2c3386a95776081eab5350bb0034017 100644 (file)
--- a/drivers/base/regmap/regmap-i2c.c
+++ b/drivers/base/regmap/regmap-i2c.c
@@ -113,3 +113,4 @@ struct regmap *regmap_init_i2c(struct i2c_client *i2c,
  }
  EXPORT_SYMBOL_GPL(regmap_init_i2c);
  
+MODULE_LICENSE("GPL");
diff --git a/drivers/base/regmap/regmap-spi.c b/drivers/base/regmap/regmap-spi.c

index 4deba0621bc7f035b66ccc1ffc6821e580591f43..f8396945d6ed6339741ab0bd6db8dca9a985a587 100644 (file)
--- a/drivers/base/regmap/regmap-spi.c
+++ b/drivers/base/regmap/regmap-spi.c
@@ -13,6 +13,7 @@
  #include <linux/regmap.h>
  #include <linux/spi/spi.h>
  #include <linux/init.h>
+#include <linux/module.h>
  
  static int regmap_spi_write(struct device *dev, const void *data, size_t count)
  {
@@ -70,3 +71,5 @@ struct regmap *regmap_init_spi(struct spi_device *spi,
         return regmap_init(&spi->dev, &regmap_spi, config);
  }
  EXPORT_SYMBOL_GPL(regmap_init_spi);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c

index cf3565cae93d951ee7c77ddd7775e5aa878521b7..20663f8dae45a3674e6ce04c84a54c5e07361151 100644 (file)
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -168,13 +168,11 @@ struct regmap *regmap_init(struct device *dev,
         map->work_buf = kmalloc(map->format.buf_size, GFP_KERNEL);
         if (map->work_buf == NULL) {
                 ret = -ENOMEM;
-               goto err_bus;
+               goto err_map;
         }
  
         return map;
  
-err_bus:
-       module_put(map->bus->owner);
  err_map:
         kfree(map);
  err:
@@ -188,7 +186,6 @@ EXPORT_SYMBOL_GPL(regmap_init);
  void regmap_exit(struct regmap *map)
  {
         kfree(map->work_buf);
-       module_put(map->bus->owner);
         kfree(map);
  }
  EXPORT_SYMBOL_GPL(regmap_exit);
@@ -317,7 +314,7 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
                 u8[0] |= map->bus->read_flag_mask;
  
         ret = map->bus->read(map->dev, map->work_buf, map->format.reg_bytes,
-                            val, map->format.val_bytes);
+                            val, val_len);
         if (ret != 0)
                 return ret;
  
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c

index 873e2e4ac55f01795a3b174b103ce0960c484bc2..73b7b1a18fab6466db1ca9aa3f53a6bd4a420ca5 100644 (file)
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -15,6 +15,7 @@ MODULE_LICENSE("GPL");
  static int bcma_bus_match(struct device *dev, struct device_driver *drv);
  static int bcma_device_probe(struct device *dev);
  static int bcma_device_remove(struct device *dev);
+static int bcma_device_uevent(struct device *dev, struct kobj_uevent_env *env);
  
  static ssize_t manuf_show(struct device *dev, struct device_attribute *attr, char *buf)
  {
@@ -49,6 +50,7 @@ static struct bus_type bcma_bus_type = {
         .match          = bcma_bus_match,
         .probe          = bcma_device_probe,
         .remove         = bcma_device_remove,
+       .uevent         = bcma_device_uevent,
         .dev_attrs      = bcma_device_attrs,
  };
  
@@ -227,6 +229,16 @@ static int bcma_device_remove(struct device *dev)
         return 0;
  }
  
+static int bcma_device_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+       struct bcma_device *core = container_of(dev, struct bcma_device, dev);
+
+       return add_uevent_var(env,
+                             "MODALIAS=bcma:m%04Xid%04Xrev%02Xcl%02X",
+                             core->id.manuf, core->id.id,
+                             core->id.rev, core->id.class);
+}
+
  static int __init bcma_modinit(void)
  {
         int err;
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig

index 717d6e4e18d3d08086fd2d5365574adbd613aef8..6f07ec1c2f58d2dabd766463165e32728910faf6 100644 (file)
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -256,6 +256,21 @@ config BLK_DEV_LOOP
  
           Most users will answer N here.
  
+config BLK_DEV_LOOP_MIN_COUNT
+       int "Number of loop devices to pre-create at init time"
+       depends on BLK_DEV_LOOP
+       default 8
+       help
+         Static number of loop devices to be unconditionally pre-created
+         at init time.
+
+         This default value can be overwritten on the kernel command
+         line or with module-parameter loop.max_loop.
+
+         The historic default is 8. If a late 2011 version of losetup(8)
+         is used, it can be set to 0, since needed loop devices can be
+         dynamically allocated with the /dev/loop-control interface.
+
  config BLK_DEV_CRYPTOLOOP
         tristate "Cryptoloop Support"
         select CRYPTO
@@ -471,7 +486,7 @@ config XEN_BLKDEV_FRONTEND
           in another domain which drives the actual block device.
  
  config XEN_BLKDEV_BACKEND
-       tristate "Block-device backend driver"
+       tristate "Xen block-device backend driver"
         depends on XEN_BACKEND
         help
           The block-device backend driver allows the kernel to export its
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c

index 515bcd948a43d7ee04650a06044e7bea6bc4742c..0feab261e295b4c7ec75826066593f41bd44fc96 100644 (file)
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1829,10 +1829,10 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
  
         /* silently ignore cpu mask on UP kernel */
         if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) {
-               err = __bitmap_parse(sc.cpu_mask, 32, 0,
+               err = bitmap_parse(sc.cpu_mask, 32,
                                 cpumask_bits(new_cpu_mask), nr_cpu_ids);
                 if (err) {
-                       dev_warn(DEV, "__bitmap_parse() failed with %d\n", err);
+                       dev_warn(DEV, "bitmap_parse() failed with %d\n", err);
                         retcode = ERR_CPU_MASK_PARSE;
                         goto fail;
                 }
diff --git a/drivers/block/loop.c b/drivers/block/loop.c

index 76c8da78212bffec71f5dc7af025a84e4028d456..4720c7ade0aed0dfd4fccce47ec7e1b39402587a 100644 (file)
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -75,11 +75,11 @@
  #include <linux/kthread.h>
  #include <linux/splice.h>
  #include <linux/sysfs.h>
-
+#include <linux/miscdevice.h>
  #include <asm/uaccess.h>
  
-static LIST_HEAD(loop_devices);
-static DEFINE_MUTEX(loop_devices_mutex);
+static DEFINE_IDR(loop_index_idr);
+static DEFINE_MUTEX(loop_index_mutex);
  
  static int max_part;
  static int part_shift;
@@ -722,17 +722,10 @@ static inline int is_loop_device(struct file *file)
  static ssize_t loop_attr_show(struct device *dev, char *page,
                               ssize_t (*callback)(struct loop_device *, char *))
  {
-       struct loop_device *l, *lo = NULL;
-
-       mutex_lock(&loop_devices_mutex);
-       list_for_each_entry(l, &loop_devices, lo_list)
-               if (disk_to_dev(l->lo_disk) == dev) {
-                       lo = l;
-                       break;
-               }
-       mutex_unlock(&loop_devices_mutex);
+       struct gendisk *disk = dev_to_disk(dev);
+       struct loop_device *lo = disk->private_data;
  
-       return lo ? callback(lo, page) : -EIO;
+       return callback(lo, page);
  }
  
  #define LOOP_ATTR_RO(_name)                                            \
@@ -750,10 +743,10 @@ static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
         ssize_t ret;
         char *p = NULL;
  
-       mutex_lock(&lo->lo_ctl_mutex);
+       spin_lock_irq(&lo->lo_lock);
         if (lo->lo_backing_file)
                 p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1);
-       mutex_unlock(&lo->lo_ctl_mutex);
+       spin_unlock_irq(&lo->lo_lock);
  
         if (IS_ERR_OR_NULL(p))
                 ret = PTR_ERR(p);
@@ -1007,7 +1000,9 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
  
         kthread_stop(lo->lo_thread);
  
+       spin_lock_irq(&lo->lo_lock);
         lo->lo_backing_file = NULL;
+       spin_unlock_irq(&lo->lo_lock);
  
         loop_release_xfer(lo);
         lo->transfer = NULL;
@@ -1485,13 +1480,22 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
  
  static int lo_open(struct block_device *bdev, fmode_t mode)
  {
-       struct loop_device *lo = bdev->bd_disk->private_data;
+       struct loop_device *lo;
+       int err = 0;
+
+       mutex_lock(&loop_index_mutex);
+       lo = bdev->bd_disk->private_data;
+       if (!lo) {
+               err = -ENXIO;
+               goto out;
+       }
  
         mutex_lock(&lo->lo_ctl_mutex);
         lo->lo_refcnt++;
         mutex_unlock(&lo->lo_ctl_mutex);
-
-       return 0;
+out:
+       mutex_unlock(&loop_index_mutex);
+       return err;
  }
  
  static int lo_release(struct gendisk *disk, fmode_t mode)
@@ -1557,40 +1561,71 @@ int loop_register_transfer(struct loop_func_table *funcs)
         return 0;
  }
  
+static int unregister_transfer_cb(int id, void *ptr, void *data)
+{
+       struct loop_device *lo = ptr;
+       struct loop_func_table *xfer = data;
+
+       mutex_lock(&lo->lo_ctl_mutex);
+       if (lo->lo_encryption == xfer)
+               loop_release_xfer(lo);
+       mutex_unlock(&lo->lo_ctl_mutex);
+       return 0;
+}
+
  int loop_unregister_transfer(int number)
  {
         unsigned int n = number;
-       struct loop_device *lo;
         struct loop_func_table *xfer;
  
         if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
                 return -EINVAL;
  
         xfer_funcs[n] = NULL;
-
-       list_for_each_entry(lo, &loop_devices, lo_list) {
-               mutex_lock(&lo->lo_ctl_mutex);
-
-               if (lo->lo_encryption == xfer)
-                       loop_release_xfer(lo);
-
-               mutex_unlock(&lo->lo_ctl_mutex);
-       }
-
+       idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer);
         return 0;
  }
  
  EXPORT_SYMBOL(loop_register_transfer);
  EXPORT_SYMBOL(loop_unregister_transfer);
  
-static struct loop_device *loop_alloc(int i)
+static int loop_add(struct loop_device **l, int i)
  {
         struct loop_device *lo;
         struct gendisk *disk;
+       int err;
  
         lo = kzalloc(sizeof(*lo), GFP_KERNEL);
-       if (!lo)
+       if (!lo) {
+               err = -ENOMEM;
                 goto out;
+       }
+
+       err = idr_pre_get(&loop_index_idr, GFP_KERNEL);
+       if (err < 0)
+               goto out_free_dev;
+
+       if (i >= 0) {
+               int m;
+
+               /* create specific i in the index */
+               err = idr_get_new_above(&loop_index_idr, lo, i, &m);
+               if (err >= 0 && i != m) {
+                       idr_remove(&loop_index_idr, m);
+                       err = -EEXIST;
+               }
+       } else if (i == -1) {
+               int m;
+
+               /* get next free nr */
+               err = idr_get_new(&loop_index_idr, lo, &m);
+               if (err >= 0)
+                       i = m;
+       } else {
+               err = -EINVAL;
+       }
+       if (err < 0)
+               goto out_free_dev;
  
         lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
         if (!lo->lo_queue)
@@ -1611,81 +1646,158 @@ static struct loop_device *loop_alloc(int i)
         disk->private_data      = lo;
         disk->queue             = lo->lo_queue;
         sprintf(disk->disk_name, "loop%d", i);
-       return lo;
+       add_disk(disk);
+       *l = lo;
+       return lo->lo_number;
  
  out_free_queue:
         blk_cleanup_queue(lo->lo_queue);
  out_free_dev:
         kfree(lo);
  out:
-       return NULL;
+       return err;
  }
  
-static void loop_free(struct loop_device *lo)
+static void loop_remove(struct loop_device *lo)
  {
+       del_gendisk(lo->lo_disk);
         blk_cleanup_queue(lo->lo_queue);
         put_disk(lo->lo_disk);
-       list_del(&lo->lo_list);
         kfree(lo);
  }
  
-static struct loop_device *loop_init_one(int i)
+static int find_free_cb(int id, void *ptr, void *data)
+{
+       struct loop_device *lo = ptr;
+       struct loop_device **l = data;
+
+       if (lo->lo_state == Lo_unbound) {
+               *l = lo;
+               return 1;
+       }
+       return 0;
+}
+
+static int loop_lookup(struct loop_device **l, int i)
  {
         struct loop_device *lo;
+       int ret = -ENODEV;
  
-       list_for_each_entry(lo, &loop_devices, lo_list) {
-               if (lo->lo_number == i)
-                       return lo;
+       if (i < 0) {
+               int err;
+
+               err = idr_for_each(&loop_index_idr, &find_free_cb, &lo);
+               if (err == 1) {
+                       *l = lo;
+                       ret = lo->lo_number;
+               }
+               goto out;
         }
  
-       lo = loop_alloc(i);
+       /* lookup and return a specific i */
+       lo = idr_find(&loop_index_idr, i);
         if (lo) {
-               add_disk(lo->lo_disk);
-               list_add_tail(&lo->lo_list, &loop_devices);
+               *l = lo;
+               ret = lo->lo_number;
         }
-       return lo;
-}
-
-static void loop_del_one(struct loop_device *lo)
-{
-       del_gendisk(lo->lo_disk);
-       loop_free(lo);
+out:
+       return ret;
  }
  
  static struct kobject *loop_probe(dev_t dev, int *part, void *data)
  {
         struct loop_device *lo;
         struct kobject *kobj;
+       int err;
  
-       mutex_lock(&loop_devices_mutex);
-       lo = loop_init_one(MINOR(dev) >> part_shift);
-       kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM);
-       mutex_unlock(&loop_devices_mutex);
+       mutex_lock(&loop_index_mutex);
+       err = loop_lookup(&lo, MINOR(dev) >> part_shift);
+       if (err < 0)
+               err = loop_add(&lo, MINOR(dev) >> part_shift);
+       if (err < 0)
+               kobj = ERR_PTR(err);
+       else
+               kobj = get_disk(lo->lo_disk);
+       mutex_unlock(&loop_index_mutex);
  
         *part = 0;
         return kobj;
  }
  
+static long loop_control_ioctl(struct file *file, unsigned int cmd,
+                              unsigned long parm)
+{
+       struct loop_device *lo;
+       int ret = -ENOSYS;
+
+       mutex_lock(&loop_index_mutex);
+       switch (cmd) {
+       case LOOP_CTL_ADD:
+               ret = loop_lookup(&lo, parm);
+               if (ret >= 0) {
+                       ret = -EEXIST;
+                       break;
+               }
+               ret = loop_add(&lo, parm);
+               break;
+       case LOOP_CTL_REMOVE:
+               ret = loop_lookup(&lo, parm);
+               if (ret < 0)
+                       break;
+               mutex_lock(&lo->lo_ctl_mutex);
+               if (lo->lo_state != Lo_unbound) {
+                       ret = -EBUSY;
+                       mutex_unlock(&lo->lo_ctl_mutex);
+                       break;
+               }
+               if (lo->lo_refcnt > 0) {
+                       ret = -EBUSY;
+                       mutex_unlock(&lo->lo_ctl_mutex);
+                       break;
+               }
+               lo->lo_disk->private_data = NULL;
+               mutex_unlock(&lo->lo_ctl_mutex);
+               idr_remove(&loop_index_idr, lo->lo_number);
+               loop_remove(lo);
+               break;
+       case LOOP_CTL_GET_FREE:
+               ret = loop_lookup(&lo, -1);
+               if (ret >= 0)
+                       break;
+               ret = loop_add(&lo, -1);
+       }
+       mutex_unlock(&loop_index_mutex);
+
+       return ret;
+}
+
+static const struct file_operations loop_ctl_fops = {
+       .open           = nonseekable_open,
+       .unlocked_ioctl = loop_control_ioctl,
+       .compat_ioctl   = loop_control_ioctl,
+       .owner          = THIS_MODULE,
+       .llseek         = noop_llseek,
+};
+
+static struct miscdevice loop_misc = {
+       .minor          = LOOP_CTRL_MINOR,
+       .name           = "loop-control",
+       .fops           = &loop_ctl_fops,
+};
+
+MODULE_ALIAS_MISCDEV(LOOP_CTRL_MINOR);
+MODULE_ALIAS("devname:loop-control");
+
  static int __init loop_init(void)
  {
         int i, nr;
         unsigned long range;
-       struct loop_device *lo, *next;
+       struct loop_device *lo;
+       int err;
  
-       /*
-        * loop module now has a feature to instantiate underlying device
-        * structure on-demand, provided that there is an access dev node.
-        * However, this will not work well with user space tool that doesn't
-        * know about such "feature".  In order to not break any existing
-        * tool, we do the following:
-        *
-        * (1) if max_loop is specified, create that many upfront, and this
-        *     also becomes a hard limit.
-        * (2) if max_loop is not specified, create 8 loop device on module
-        *     load, user can further extend loop device by create dev node
-        *     themselves and have kernel automatically instantiate actual
-        *     device on-demand.
-        */
+       err = misc_register(&loop_misc);
+       if (err < 0)
+               return err;
  
         part_shift = 0;
         if (max_part > 0) {
@@ -1708,57 +1820,60 @@ static int __init loop_init(void)
         if (max_loop > 1UL << (MINORBITS - part_shift))
                 return -EINVAL;
  
+       /*
+        * If max_loop is specified, create that many devices upfront.
+        * This also becomes a hard limit. If max_loop is not specified,
+        * create CONFIG_BLK_DEV_LOOP_MIN_COUNT loop devices at module
+        * init time. Loop devices can be requested on-demand with the
+        * /dev/loop-control interface, or be instantiated by accessing
+        * a 'dead' device node.
+        */
         if (max_loop) {
                 nr = max_loop;
                 range = max_loop << part_shift;
         } else {
-               nr = 8;
+               nr = CONFIG_BLK_DEV_LOOP_MIN_COUNT;
                 range = 1UL << MINORBITS;
         }
  
         if (register_blkdev(LOOP_MAJOR, "loop"))
                 return -EIO;
  
-       for (i = 0; i < nr; i++) {
-               lo = loop_alloc(i);
-               if (!lo)
-                       goto Enomem;
-               list_add_tail(&lo->lo_list, &loop_devices);
-       }
-
-       /* point of no return */
-
-       list_for_each_entry(lo, &loop_devices, lo_list)
-               add_disk(lo->lo_disk);
-
         blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
                                   THIS_MODULE, loop_probe, NULL, NULL);
  
+       /* pre-create number of devices given by config or max_loop */
+       mutex_lock(&loop_index_mutex);
+       for (i = 0; i < nr; i++)
+               loop_add(&lo, i);
+       mutex_unlock(&loop_index_mutex);
+
         printk(KERN_INFO "loop: module loaded\n");
         return 0;
+}
  
-Enomem:
-       printk(KERN_INFO "loop: out of memory\n");
-
-       list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
-               loop_free(lo);
+static int loop_exit_cb(int id, void *ptr, void *data)
+{
+       struct loop_device *lo = ptr;
  
-       unregister_blkdev(LOOP_MAJOR, "loop");
-       return -ENOMEM;
+       loop_remove(lo);
+       return 0;
  }
  
  static void __exit loop_exit(void)
  {
         unsigned long range;
-       struct loop_device *lo, *next;
  
         range = max_loop ? max_loop << part_shift : 1UL << MINORBITS;
  
-       list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
-               loop_del_one(lo);
+       idr_for_each(&loop_index_idr, &loop_exit_cb, NULL);
+       idr_remove_all(&loop_index_idr);
+       idr_destroy(&loop_index_idr);
  
         blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range);
         unregister_blkdev(LOOP_MAJOR, "loop");
+
+       misc_deregister(&loop_misc);
  }
  
  module_init(loop_init);
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c

index 773bfa7927775396f80c004b4d6551c485efe4c6..ae3e167e17adc3f18bc4b14fe3b1635d299bfb3e 100644 (file)
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -1184,6 +1184,7 @@ static struct of_device_id swim3_match[] =
         {
         .compatible     = "swim3"
         },
+       { /* end of list */ }
  };
  
  static struct macio_driver swim3_driver =
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c

index b536a9cef917516d23448e589a7955ad984e6dd3..9ea8c2576c70e768f22ad8ea2cc423337611fb4f 100644 (file)
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -123,8 +123,8 @@ static DEFINE_SPINLOCK(minor_lock);
  #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
  #define EMULATED_HD_DISK_MINOR_OFFSET (0)
  #define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256)
-#define EMULATED_SD_DISK_MINOR_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET + (4 * 16))
-#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_HD_DISK_NAME_OFFSET + 4)
+#define EMULATED_SD_DISK_MINOR_OFFSET (0)
+#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_SD_DISK_MINOR_OFFSET / 256)
  
  #define DEV_NAME       "xvd"   /* name in /dev */
  
@@ -529,7 +529,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
                 minor = BLKIF_MINOR_EXT(info->vdevice);
                 nr_parts = PARTS_PER_EXT_DISK;
                 offset = minor / nr_parts;
-               if (xen_hvm_domain() && offset <= EMULATED_HD_DISK_NAME_OFFSET + 4)
+               if (xen_hvm_domain() && offset < EMULATED_HD_DISK_NAME_OFFSET + 4)
                         printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with "
                                         "emulated IDE disks,\n\t choose an xvd device name"
                                         "from xvde on\n", info->vdevice);
diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c

index a5854735bb2e7882124ad1dc2c1b9c9296c93820..db7cb8111fbe58a89f21ee6ccea50467356bd38a 100644 (file)
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -63,6 +63,7 @@ static struct usb_device_id ath3k_table[] = {
         /* Atheros AR3011 with sflash firmware*/
         { USB_DEVICE(0x0CF3, 0x3002) },
         { USB_DEVICE(0x13d3, 0x3304) },
+       { USB_DEVICE(0x0930, 0x0215) },
  
         /* Atheros AR9285 Malbec with sflash firmware */
         { USB_DEVICE(0x03F0, 0x311D) },
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c

index 91d13a9e8c657f9b838c56ecbae6608965342bc0..3ef476070bafcfa88505ffc9ed72c8eb3f301092 100644 (file)
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -106,6 +106,7 @@ static struct usb_device_id blacklist_table[] = {
         /* Atheros 3011 with sflash firmware */
         { USB_DEVICE(0x0cf3, 0x3002), .driver_info = BTUSB_IGNORE },
         { USB_DEVICE(0x13d3, 0x3304), .driver_info = BTUSB_IGNORE },
+       { USB_DEVICE(0x0930, 0x0215), .driver_info = BTUSB_IGNORE },
  
         /* Atheros AR9285 Malbec with sflash firmware */
         { USB_DEVICE(0x03f0, 0x311d), .driver_info = BTUSB_IGNORE },
@@ -256,7 +257,9 @@ static void btusb_intr_complete(struct urb *urb)
  
         err = usb_submit_urb(urb, GFP_ATOMIC);
         if (err < 0) {
-               if (err != -EPERM)
+               /* -EPERM: urb is being killed;
+                * -ENODEV: device got disconnected */
+               if (err != -EPERM && err != -ENODEV)
                         BT_ERR("%s urb %p failed to resubmit (%d)",
                                                 hdev->name, urb, -err);
                 usb_unanchor_urb(urb);
@@ -341,7 +344,9 @@ static void btusb_bulk_complete(struct urb *urb)
  
         err = usb_submit_urb(urb, GFP_ATOMIC);
         if (err < 0) {
-               if (err != -EPERM)
+               /* -EPERM: urb is being killed;
+                * -ENODEV: device got disconnected */
+               if (err != -EPERM && err != -ENODEV)
                         BT_ERR("%s urb %p failed to resubmit (%d)",
                                                 hdev->name, urb, -err);
                 usb_unanchor_urb(urb);
@@ -431,7 +436,9 @@ static void btusb_isoc_complete(struct urb *urb)
  
         err = usb_submit_urb(urb, GFP_ATOMIC);
         if (err < 0) {
-               if (err != -EPERM)
+               /* -EPERM: urb is being killed;
+                * -ENODEV: device got disconnected */
+               if (err != -EPERM && err != -ENODEV)
                         BT_ERR("%s urb %p failed to resubmit (%d)",
                                                 hdev->name, urb, -err);
                 usb_unanchor_urb(urb);
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c

index 75fb965b8f72b1e9fa2a8001f7fb16fb12388d5c..f997c27d79e299579975badce89975aefc9f941a 100644 (file)
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -1929,11 +1929,17 @@ static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s,
                 goto out;
  
         s->manufact.len = buf[0] << 8 | buf[1];
-       if (s->manufact.len < 0 || s->manufact.len > 2048) {
+       if (s->manufact.len < 0) {
                 cdinfo(CD_WARNING, "Received invalid manufacture info length"
                                    " (%d)\n", s->manufact.len);
                 ret = -EIO;
         } else {
+               if (s->manufact.len > 2048) {
+                       cdinfo(CD_WARNING, "Received invalid manufacture info "
+                                       "length (%d): truncating to 2048\n",
+                                       s->manufact.len);
+                       s->manufact.len = 2048;
+               }
                 memcpy(s->manufact.value, &buf[4], s->manufact.len);
         }
  
diff --git a/drivers/char/msm_smd_pkt.c b/drivers/char/msm_smd_pkt.c

index b6f8a65c9960dd9cb5892f27e93db398fe3fd90d..8eca55deb3a35c4a0a6ffc41130e3422d22bd926 100644 (file)
--- a/drivers/char/msm_smd_pkt.c
+++ b/drivers/char/msm_smd_pkt.c
@@ -379,9 +379,8 @@ static int __init smd_pkt_init(void)
         for (i = 0; i < NUM_SMD_PKT_PORTS; ++i) {
                 smd_pkt_devp[i] = kzalloc(sizeof(struct smd_pkt_dev),
                                           GFP_KERNEL);
-               if (IS_ERR(smd_pkt_devp[i])) {
-                       r = PTR_ERR(smd_pkt_devp[i]);
-                       pr_err("kmalloc() failed %d\n", r);
+               if (!smd_pkt_devp[i]) {
+                       pr_err("kmalloc() failed\n");
                         goto clean_cdevs;
                 }
  
diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c

index dc7c033ef587142ce080e33a2b2711a94ece1d89..32a77becc098534c2b0b77a585d48f4974538c65 100644 (file)
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -26,6 +26,7 @@
  #include <linux/clk.h>
  #include <linux/irq.h>
  #include <linux/err.h>
+#include <linux/delay.h>
  #include <linux/clocksource.h>
  #include <linux/clockchips.h>
  #include <linux/sh_timer.h>
@@ -150,13 +151,13 @@ static void sh_cmt_start_stop_ch(struct sh_cmt_priv *p, int start)
  
  static int sh_cmt_enable(struct sh_cmt_priv *p, unsigned long *rate)
  {
-       int ret;
+       int k, ret;
  
         /* enable clock */
         ret = clk_enable(p->clk);
         if (ret) {
                 dev_err(&p->pdev->dev, "cannot enable clock\n");
-               return ret;
+               goto err0;
         }
  
         /* make sure channel is disabled */
@@ -174,9 +175,38 @@ static int sh_cmt_enable(struct sh_cmt_priv *p, unsigned long *rate)
         sh_cmt_write(p, CMCOR, 0xffffffff);
         sh_cmt_write(p, CMCNT, 0);
  
+       /*
+        * According to the sh73a0 user's manual, as CMCNT can be operated
+        * only by the RCLK (Pseudo 32 KHz), there's one restriction on
+        * modifying CMCNT register; two RCLK cycles are necessary before
+        * this register is either read or any modification of the value
+        * it holds is reflected in the LSI's actual operation.
+        *
+        * While at it, we're supposed to clear out the CMCNT as of this
+        * moment, so make sure it's processed properly here.  This will
+        * take RCLKx2 at maximum.
+        */
+       for (k = 0; k < 100; k++) {
+               if (!sh_cmt_read(p, CMCNT))
+                       break;
+               udelay(1);
+       }
+
+       if (sh_cmt_read(p, CMCNT)) {
+               dev_err(&p->pdev->dev, "cannot clear CMCNT\n");
+               ret = -ETIMEDOUT;
+               goto err1;
+       }
+
         /* enable channel */
         sh_cmt_start_stop_ch(p, 1);
         return 0;
+ err1:
+       /* stop clock */
+       clk_disable(p->clk);
+
+ err0:
+       return ret;
  }
  
  static void sh_cmt_disable(struct sh_cmt_priv *p)
diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c

index 7b0603eb01297a7088c24d0ab1101e5e9625713b..cdc02ac8f41a7ff5d3f121e2b0927fd8bf99ae7a 100644 (file)
--- a/drivers/cpufreq/pcc-cpufreq.c
+++ b/drivers/cpufreq/pcc-cpufreq.c
@@ -261,6 +261,9 @@ static int pcc_get_offset(int cpu)
         pr = per_cpu(processors, cpu);
         pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu);
  
+       if (!pr)
+               return -ENODEV;
+
         status = acpi_evaluate_object(pr->handle, "PCCP", NULL, &buffer);
         if (ACPI_FAILURE(status))
                 return -ENODEV;
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c

index 196a7378d33238ec0a08c1ac3672d428fff370cd..be21e3f138a88a6317da6e40318d3dc79d9bd5e1 100644 (file)
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -80,6 +80,7 @@
  #include <linux/interrupt.h>
  #include <linux/slab.h>
  #include <linux/delay.h>
+#include <linux/dma-mapping.h>
  #include <linux/dmapool.h>
  #include <linux/dmaengine.h>
  #include <linux/amba/bus.h>
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c

index cd3a7c726bf87bef330f882981afc2fc42be964e..467e4dcb20a012cadfcc10bc616b25dbe11d373a 100644 (file)
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -174,8 +174,10 @@ struct d40_base;
   * @tasklet: Tasklet that gets scheduled from interrupt context to complete a
   * transfer and call client callback.
   * @client: Cliented owned descriptor list.
+ * @pending_queue: Submitted jobs, to be issued by issue_pending()
   * @active: Active descriptor.
   * @queue: Queued jobs.
+ * @prepare_queue: Prepared jobs.
   * @dma_cfg: The client configuration of this dma channel.
   * @configured: whether the dma_cfg configuration is valid
   * @base: Pointer to the device instance struct.
@@ -203,6 +205,7 @@ struct d40_chan {
         struct list_head                 pending_queue;
         struct list_head                 active;
         struct list_head                 queue;
+       struct list_head                 prepare_queue;
         struct stedma40_chan_cfg         dma_cfg;
         bool                             configured;
         struct d40_base                 *base;
@@ -477,7 +480,6 @@ static struct d40_desc *d40_desc_get(struct d40_chan *d40c)
  
                 list_for_each_entry_safe(d, _d, &d40c->client, node)
                         if (async_tx_test_ack(&d->txd)) {
-                               d40_pool_lli_free(d40c, d);
                                 d40_desc_remove(d);
                                 desc = d;
                                 memset(desc, 0, sizeof(*desc));
@@ -644,8 +646,11 @@ static struct d40_desc *d40_first_active_get(struct d40_chan *d40c)
         return d;
  }
  
+/* remove desc from current queue and add it to the pending_queue */
  static void d40_desc_queue(struct d40_chan *d40c, struct d40_desc *desc)
  {
+       d40_desc_remove(desc);
+       desc->is_in_client_list = false;
         list_add_tail(&desc->node, &d40c->pending_queue);
  }
  
@@ -803,6 +808,7 @@ done:
  static void d40_term_all(struct d40_chan *d40c)
  {
         struct d40_desc *d40d;
+       struct d40_desc *_d;
  
         /* Release active descriptors */
         while ((d40d = d40_first_active_get(d40c))) {
@@ -822,6 +828,21 @@ static void d40_term_all(struct d40_chan *d40c)
                 d40_desc_free(d40c, d40d);
         }
  
+       /* Release client owned descriptors */
+       if (!list_empty(&d40c->client))
+               list_for_each_entry_safe(d40d, _d, &d40c->client, node) {
+                       d40_desc_remove(d40d);
+                       d40_desc_free(d40c, d40d);
+               }
+
+       /* Release descriptors in prepare queue */
+       if (!list_empty(&d40c->prepare_queue))
+               list_for_each_entry_safe(d40d, _d,
+                                        &d40c->prepare_queue, node) {
+                       d40_desc_remove(d40d);
+                       d40_desc_free(d40c, d40d);
+               }
+
         d40c->pending_tx = 0;
         d40c->busy = false;
  }
@@ -1208,7 +1229,6 @@ static void dma_tasklet(unsigned long data)
  
         if (!d40d->cyclic) {
                 if (async_tx_test_ack(&d40d->txd)) {
-                       d40_pool_lli_free(d40c, d40d);
                         d40_desc_remove(d40d);
                         d40_desc_free(d40c, d40d);
                 } else {
@@ -1595,21 +1615,10 @@ static int d40_free_dma(struct d40_chan *d40c)
         u32 event;
         struct d40_phy_res *phy = d40c->phy_chan;
         bool is_src;
-       struct d40_desc *d;
-       struct d40_desc *_d;
-
  
         /* Terminate all queued and active transfers */
         d40_term_all(d40c);
  
-       /* Release client owned descriptors */
-       if (!list_empty(&d40c->client))
-               list_for_each_entry_safe(d, _d, &d40c->client, node) {
-                       d40_pool_lli_free(d40c, d);
-                       d40_desc_remove(d);
-                       d40_desc_free(d40c, d);
-               }
-
         if (phy == NULL) {
                 chan_err(d40c, "phy == null\n");
                 return -EINVAL;
@@ -1911,6 +1920,12 @@ d40_prep_sg(struct dma_chan *dchan, struct scatterlist *sg_src,
                 goto err;
         }
  
+       /*
+        * add descriptor to the prepare queue in order to be able
+        * to free them later in terminate_all
+        */
+       list_add_tail(&desc->node, &chan->prepare_queue);
+
         spin_unlock_irqrestore(&chan->lock, flags);
  
         return &desc->txd;
@@ -2400,6 +2415,7 @@ static void __init d40_chan_init(struct d40_base *base, struct dma_device *dma,
                 INIT_LIST_HEAD(&d40c->queue);
                 INIT_LIST_HEAD(&d40c->pending_queue);
                 INIT_LIST_HEAD(&d40c->client);
+               INIT_LIST_HEAD(&d40c->prepare_queue);
  
                 tasklet_init(&d40c->tasklet, dma_tasklet,
                              (unsigned long) d40c);
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c

index 04f1e7ce02b196868d42253243d79d1044d9b7cc..f6cf448d69b4f468160e19fc29fab0ea473f04b8 100644 (file)
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -1670,7 +1670,7 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci,
         char *type, *optype, *err, *msg;
         unsigned long error = m->status & 0x1ff0000l;
         u32 optypenum = (m->status >> 4) & 0x07;
-       u32 core_err_cnt = (m->status >> 38) && 0x7fff;
+       u32 core_err_cnt = (m->status >> 38) & 0x7fff;
         u32 dimm = (m->misc >> 16) & 0x3;
         u32 channel = (m->misc >> 18) & 0x3;
         u32 syndrome = m->misc >> 32;
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c

index e6ad3bb6c1a6b5efad00df79995f733be9a9c1f7..4799393247c8ada8a626722e0bea57c0ae99409e 100644 (file)
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -216,15 +216,33 @@ struct inbound_phy_packet_event {
         struct fw_cdev_event_phy_packet phy_packet;
  };
  
-static inline void __user *u64_to_uptr(__u64 value)
+#ifdef CONFIG_COMPAT
+static void __user *u64_to_uptr(u64 value)
+{
+       if (is_compat_task())
+               return compat_ptr(value);
+       else
+               return (void __user *)(unsigned long)value;
+}
+
+static u64 uptr_to_u64(void __user *ptr)
+{
+       if (is_compat_task())
+               return ptr_to_compat(ptr);
+       else
+               return (u64)(unsigned long)ptr;
+}
+#else
+static inline void __user *u64_to_uptr(u64 value)
  {
         return (void __user *)(unsigned long)value;
  }
  
-static inline __u64 uptr_to_u64(void __user *ptr)
+static inline u64 uptr_to_u64(void __user *ptr)
  {
-       return (__u64)(unsigned long)ptr;
+       return (u64)(unsigned long)ptr;
  }
+#endif /* CONFIG_COMPAT */
  
  static int fw_device_op_open(struct inode *inode, struct file *file)
  {
diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c

index 8ba7f7928f1f1dcd4fcedd33305277064bc8999b..f3b890da1e874b832b6dbab7dd919d94567955cf 100644 (file)
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -455,15 +455,20 @@ static struct device_attribute fw_device_attributes[] = {
  static int read_rom(struct fw_device *device,
                     int generation, int index, u32 *data)
  {
-       int rcode;
+       u64 offset = (CSR_REGISTER_BASE | CSR_CONFIG_ROM) + index * 4;
+       int i, rcode;
  
         /* device->node_id, accessed below, must not be older than generation */
         smp_rmb();
  
-       rcode = fw_run_transaction(device->card, TCODE_READ_QUADLET_REQUEST,
-                       device->node_id, generation, device->max_speed,
-                       (CSR_REGISTER_BASE | CSR_CONFIG_ROM) + index * 4,
-                       data, 4);
+       for (i = 10; i < 100; i += 10) {
+               rcode = fw_run_transaction(device->card,
+                               TCODE_READ_QUADLET_REQUEST, device->node_id,
+                               generation, device->max_speed, offset, data, 4);
+               if (rcode != RCODE_BUSY)
+                       break;
+               msleep(i);
+       }
         be32_to_cpus(data);
  
         return rcode;
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c

index bcf792fac442576181f00596a018cf22a0dd67aa..fd7170a9ad2cc4c21d0bbcf6099f9db8d2ce4884 100644 (file)
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -290,6 +290,9 @@ static const struct {
         {PCI_VENDOR_ID_NEC, PCI_ANY_ID, PCI_ANY_ID,
                 QUIRK_CYCLE_TIMER},
  
+       {PCI_VENDOR_ID_O2, PCI_ANY_ID, PCI_ANY_ID,
+               QUIRK_NO_MSI},
+
         {PCI_VENDOR_ID_RICOH, PCI_ANY_ID, PCI_ANY_ID,
                 QUIRK_CYCLE_TIMER},
  
@@ -2179,8 +2182,13 @@ static int ohci_enable(struct fw_card *card,
                         ohci_driver_name, ohci)) {
                 fw_error("Failed to allocate interrupt %d.\n", dev->irq);
                 pci_disable_msi(dev);
-               dma_free_coherent(ohci->card.device, CONFIG_ROM_SIZE,
-                                 ohci->config_rom, ohci->config_rom_bus);
+
+               if (config_rom) {
+                       dma_free_coherent(ohci->card.device, CONFIG_ROM_SIZE,
+                                         ohci->next_config_rom,
+                                         ohci->next_config_rom_bus);
+                       ohci->next_config_rom = NULL;
+               }
                 return -EIO;
         }
  
diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c

index 41841a3e3f99c9acd4c4fc72c972626d8dffdcee..17cef864506a7b6778b109dd0d0b570514eab6eb 100644 (file)
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -1198,6 +1198,10 @@ static int sbp2_remove(struct device *dev)
  {
         struct fw_unit *unit = fw_unit(dev);
         struct sbp2_target *tgt = dev_get_drvdata(&unit->device);
+       struct sbp2_logical_unit *lu;
+
+       list_for_each_entry(lu, &tgt->lu_list, link)
+               cancel_delayed_work_sync(&lu->work);
  
         sbp2_target_put(tgt);
         return 0;
diff --git a/drivers/firmware/google/gsmi.c b/drivers/firmware/google/gsmi.c

index 68810fd1a59d057da2625e57efd55313523eeadd..aa83de9db1b91ce380ed86ca9e6f4a421491095a 100644 (file)
--- a/drivers/firmware/google/gsmi.c
+++ b/drivers/firmware/google/gsmi.c
@@ -420,7 +420,7 @@ static efi_status_t gsmi_get_next_variable(unsigned long *name_size,
  
  static efi_status_t gsmi_set_variable(efi_char16_t *name,
                                       efi_guid_t *vendor,
-                                     unsigned long attr,
+                                     u32 attr,
                                       unsigned long data_size,
                                       void *data)
  {
diff --git a/drivers/gpio/gpio-generic.c b/drivers/gpio/gpio-generic.c

index 231714def4d2d9ea9229a94a60b5685f94375866..4e24436b0f82621b81b508a69f88396e363ef609 100644 (file)
--- a/drivers/gpio/gpio-generic.c
+++ b/drivers/gpio/gpio-generic.c
@@ -351,7 +351,7 @@ static int bgpio_setup_direction(struct bgpio_chip *bgc,
         return 0;
  }
  
-int __devexit bgpio_remove(struct bgpio_chip *bgc)
+int bgpio_remove(struct bgpio_chip *bgc)
  {
         int err = gpiochip_remove(&bgc->gc);
  
@@ -361,15 +361,10 @@ int __devexit bgpio_remove(struct bgpio_chip *bgc)
  }
  EXPORT_SYMBOL_GPL(bgpio_remove);
  
-int __devinit bgpio_init(struct bgpio_chip *bgc,
-                        struct device *dev,
-                        unsigned long sz,
-                        void __iomem *dat,
-                        void __iomem *set,
-                        void __iomem *clr,
-                        void __iomem *dirout,
-                        void __iomem *dirin,
-                        bool big_endian)
+int bgpio_init(struct bgpio_chip *bgc, struct device *dev,
+              unsigned long sz, void __iomem *dat, void __iomem *set,
+              void __iomem *clr, void __iomem *dirout, void __iomem *dirin,
+              bool big_endian)
  {
         int ret;
  
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c

index 82db1850666253dc021f8a43e635fe91ebce94df..fe738f05309ba39e6341d0fd79557f153711cb2f 100644 (file)
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -499,6 +499,7 @@ void drm_connector_cleanup(struct drm_connector *connector)
         mutex_lock(&dev->mode_config.mutex);
         drm_mode_object_put(dev, &connector->base);
         list_del(&connector->head);
+       dev->mode_config.num_connector--;
         mutex_unlock(&dev->mode_config.mutex);
  }
  EXPORT_SYMBOL(drm_connector_cleanup);
@@ -529,6 +530,7 @@ void drm_encoder_cleanup(struct drm_encoder *encoder)
         mutex_lock(&dev->mode_config.mutex);
         drm_mode_object_put(dev, &encoder->base);
         list_del(&encoder->head);
+       dev->mode_config.num_encoder--;
         mutex_unlock(&dev->mode_config.mutex);
  }
  EXPORT_SYMBOL(drm_encoder_cleanup);
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c

index 802b61ac31390d92b2affaf14f787b75f8caab2f..f7c6854eb4ddb77943163535a5eb94f78cca6a48 100644 (file)
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -256,7 +256,6 @@ int drm_fb_helper_panic(struct notifier_block *n, unsigned long ununsed,
  {
         printk(KERN_ERR "panic occurred, switching back to text console\n");
         return drm_fb_helper_force_kernel_mode();
-       return 0;
  }
  EXPORT_SYMBOL(drm_fb_helper_panic);
  
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c

index a8ab6263e0d75b357d1643a5e009de6e59494307..3c395a59da3539ac2975ba6fac1e2873f13873bd 100644 (file)
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -499,7 +499,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
         seq_printf(m, "Interrupts received: %d\n",
                    atomic_read(&dev_priv->irq_received));
         for (i = 0; i < I915_NUM_RINGS; i++) {
-               if (IS_GEN6(dev)) {
+               if (IS_GEN6(dev) || IS_GEN7(dev)) {
                         seq_printf(m, "Graphics Interrupt mask (%s):    %08x\n",
                                    dev_priv->ring[i].name,
                                    I915_READ_IMR(&dev_priv->ring[i]));
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h

index feb4f164fd1b35264b6673201d68e0f22494f025..7916bd97d5c131314baa113d08a47d566d2a3fb4 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -36,6 +36,7 @@
  #include <linux/io-mapping.h>
  #include <linux/i2c.h>
  #include <drm/intel-gtt.h>
+#include <linux/backlight.h>
  
  /* General customization:
   */
@@ -690,6 +691,7 @@ typedef struct drm_i915_private {
         int child_dev_num;
         struct child_device_config *child_dev;
         struct drm_connector *int_lvds_connector;
+       struct drm_connector *int_edp_connector;
  
         bool mchbar_need_disable;
  
@@ -723,6 +725,8 @@ typedef struct drm_i915_private {
         /* list of fbdev register on this device */
         struct intel_fbdev *fbdev;
  
+       struct backlight_device *backlight;
+
         struct drm_property *broadcast_rgb_property;
         struct drm_property *force_audio_property;
  
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c

index 02f96fd0d52dbea419cf575184196de2bbe66eb0..9cbb0cd8f46ae8ffc4e60a2fb90dde7867e112a3 100644 (file)
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2058,8 +2058,10 @@ void intel_irq_init(struct drm_device *dev)
                 dev->driver->get_vblank_counter = gm45_get_vblank_counter;
         }
  
-
-       dev->driver->get_vblank_timestamp = i915_get_vblank_timestamp;
+       if (drm_core_check_feature(dev, DRIVER_MODESET))
+               dev->driver->get_vblank_timestamp = i915_get_vblank_timestamp;
+       else
+               dev->driver->get_vblank_timestamp = NULL;
         dev->driver->get_scanout_position = i915_get_crtc_scanoutpos;
  
         if (IS_IVYBRIDGE(dev)) {
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h

index d1331f771e2f2f863677cc69bcc11a06d3af0ae4..542453f7498c83e3b58b390254d815247e209da2 100644 (file)
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -375,6 +375,7 @@
  # define MI_FLUSH_ENABLE                               (1 << 11)
  
  #define GFX_MODE       0x02520
+#define GFX_MODE_GEN7  0x0229c
  #define   GFX_RUN_LIST_ENABLE          (1<<15)
  #define   GFX_TLB_INVALIDATE_ALWAYS    (1<<13)
  #define   GFX_SURFACE_FAULT_ENABLE     (1<<12)
@@ -382,6 +383,9 @@
  #define   GFX_PSMI_GRANULARITY         (1<<10)
  #define   GFX_PPGTT_ENABLE             (1<<9)
  
+#define GFX_MODE_ENABLE(bit) (((bit) << 16) | (bit))
+#define GFX_MODE_DISABLE(bit) (((bit) << 16) | (0))
+
  #define SCPD0          0x0209c /* 915+ only */
  #define IER            0x020a0
  #define IIR            0x020a4
@@ -1318,6 +1322,7 @@
  #define   ADPA_PIPE_SELECT_MASK        (1<<30)
  #define   ADPA_PIPE_A_SELECT   0
  #define   ADPA_PIPE_B_SELECT   (1<<30)
+#define   ADPA_PIPE_SELECT(pipe) ((pipe) << 30)
  #define   ADPA_USE_VGA_HVPOLARITY (1<<15)
  #define   ADPA_SETS_HVPOLARITY 0
  #define   ADPA_VSYNC_CNTL_DISABLE (1<<11)
@@ -1460,6 +1465,7 @@
  /* Selects pipe B for LVDS data.  Must be set on pre-965. */
  #define   LVDS_PIPEB_SELECT            (1 << 30)
  #define   LVDS_PIPE_MASK               (1 << 30)
+#define   LVDS_PIPE(pipe)              ((pipe) << 30)
  /* LVDS dithering flag on 965/g4x platform */
  #define   LVDS_ENABLE_DITHER           (1 << 25)
  /* LVDS sync polarity flags. Set to invert (i.e. negative) */
@@ -1499,9 +1505,6 @@
  #define   LVDS_B0B3_POWER_DOWN         (0 << 2)
  #define   LVDS_B0B3_POWER_UP           (3 << 2)
  
-#define LVDS_PIPE_ENABLED(V, P) \
-       (((V) & (LVDS_PIPE_MASK | LVDS_PORT_EN)) == ((P) << 30 | LVDS_PORT_EN))
-
  /* Video Data Island Packet control */
  #define VIDEO_DIP_DATA         0x61178
  #define VIDEO_DIP_CTL          0x61170
@@ -3256,14 +3259,12 @@
  #define  ADPA_CRT_HOTPLUG_VOLREF_475MV  (1<<17)
  #define  ADPA_CRT_HOTPLUG_FORCE_TRIGGER (1<<16)
  
-#define ADPA_PIPE_ENABLED(V, P) \
-       (((V) & (ADPA_TRANS_SELECT_MASK | ADPA_DAC_ENABLE)) == ((P) << 30 | ADPA_DAC_ENABLE))
-
  /* or SDVOB */
  #define HDMIB   0xe1140
  #define  PORT_ENABLE    (1 << 31)
  #define  TRANSCODER_A   (0)
  #define  TRANSCODER_B   (1 << 30)
+#define  TRANSCODER(pipe)      ((pipe) << 30)
  #define  TRANSCODER_MASK   (1 << 30)
  #define  COLOR_FORMAT_8bpc      (0)
  #define  COLOR_FORMAT_12bpc     (3 << 26)
@@ -3280,9 +3281,6 @@
  #define  HSYNC_ACTIVE_HIGH      (1 << 3)
  #define  PORT_DETECTED          (1 << 2)
  
-#define HDMI_PIPE_ENABLED(V, P) \
-       (((V) & (TRANSCODER_MASK | PORT_ENABLE)) == ((P) << 30 | PORT_ENABLE))
-
  /* PCH SDVOB multiplex with HDMIB */
  #define PCH_SDVOB      HDMIB
  
@@ -3349,6 +3347,7 @@
  #define  PORT_TRANS_B_SEL_CPT  (1<<29)
  #define  PORT_TRANS_C_SEL_CPT  (2<<29)
  #define  PORT_TRANS_SEL_MASK   (3<<29)
+#define  PORT_TRANS_SEL_CPT(pipe)      ((pipe) << 29)
  
  #define TRANS_DP_CTL_A         0xe0300
  #define TRANS_DP_CTL_B         0xe1300
diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c

index 87677d60d0df21758b9a70c51e5f23388e21a5e6..f10742359ec9a1f6d6f03ec248cefcd78a85254a 100644 (file)
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c
@@ -871,7 +871,8 @@ int i915_restore_state(struct drm_device *dev)
         }
         mutex_unlock(&dev->struct_mutex);
  
-       intel_init_clock_gating(dev);
+       if (drm_core_check_feature(dev, DRIVER_MODESET))
+               intel_init_clock_gating(dev);
  
         if (IS_IRONLAKE_M(dev)) {
                 ironlake_enable_drps(dev);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c

index 35364e68a09127528b1d6d6f18bae3e331d78b85..56a8554d9039615b4b7772d474706ce2815a0a78 100644 (file)
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -878,7 +878,7 @@ static void assert_panel_unlocked(struct drm_i915_private *dev_priv,
         int pp_reg, lvds_reg;
         u32 val;
         enum pipe panel_pipe = PIPE_A;
-       bool locked = locked;
+       bool locked = true;
  
         if (HAS_PCH_SPLIT(dev_priv->dev)) {
                 pp_reg = PCH_PP_CONTROL;
@@ -980,8 +980,8 @@ static void assert_transcoder_disabled(struct drm_i915_private *dev_priv,
              pipe_name(pipe));
  }
  
-static bool dp_pipe_enabled(struct drm_i915_private *dev_priv, enum pipe pipe,
-                           int reg, u32 port_sel, u32 val)
+static bool dp_pipe_enabled(struct drm_i915_private *dev_priv,
+                           enum pipe pipe, u32 port_sel, u32 val)
  {
         if ((val & DP_PORT_EN) == 0)
                 return false;
@@ -998,11 +998,58 @@ static bool dp_pipe_enabled(struct drm_i915_private *dev_priv, enum pipe pipe,
         return true;
  }
  
+static bool hdmi_pipe_enabled(struct drm_i915_private *dev_priv,
+                             enum pipe pipe, u32 val)
+{
+       if ((val & PORT_ENABLE) == 0)
+               return false;
+
+       if (HAS_PCH_CPT(dev_priv->dev)) {
+               if ((val & PORT_TRANS_SEL_MASK) != PORT_TRANS_SEL_CPT(pipe))
+                       return false;
+       } else {
+               if ((val & TRANSCODER_MASK) != TRANSCODER(pipe))
+                       return false;
+       }
+       return true;
+}
+
+static bool lvds_pipe_enabled(struct drm_i915_private *dev_priv,
+                             enum pipe pipe, u32 val)
+{
+       if ((val & LVDS_PORT_EN) == 0)
+               return false;
+
+       if (HAS_PCH_CPT(dev_priv->dev)) {
+               if ((val & PORT_TRANS_SEL_MASK) != PORT_TRANS_SEL_CPT(pipe))
+                       return false;
+       } else {
+               if ((val & LVDS_PIPE_MASK) != LVDS_PIPE(pipe))
+                       return false;
+       }
+       return true;
+}
+
+static bool adpa_pipe_enabled(struct drm_i915_private *dev_priv,
+                             enum pipe pipe, u32 val)
+{
+       if ((val & ADPA_DAC_ENABLE) == 0)
+               return false;
+       if (HAS_PCH_CPT(dev_priv->dev)) {
+               if ((val & PORT_TRANS_SEL_MASK) != PORT_TRANS_SEL_CPT(pipe))
+                       return false;
+       } else {
+               if ((val & ADPA_PIPE_SELECT_MASK) != ADPA_PIPE_SELECT(pipe))
+                       return false;
+       }
+       return true;
+}
+
  static void assert_pch_dp_disabled(struct drm_i915_private *dev_priv,
                                    enum pipe pipe, int reg, u32 port_sel)
  {
         u32 val = I915_READ(reg);
-       WARN(dp_pipe_enabled(dev_priv, pipe, reg, port_sel, val),
+       WARN(dp_pipe_enabled(dev_priv, pipe, port_sel, val),
              "PCH DP (0x%08x) enabled on transcoder %c, should be disabled\n",
              reg, pipe_name(pipe));
  }
@@ -1011,7 +1058,7 @@ static void assert_pch_hdmi_disabled(struct drm_i915_private *dev_priv,
                                      enum pipe pipe, int reg)
  {
         u32 val = I915_READ(reg);
-       WARN(HDMI_PIPE_ENABLED(val, pipe),
+       WARN(hdmi_pipe_enabled(dev_priv, val, pipe),
              "PCH DP (0x%08x) enabled on transcoder %c, should be disabled\n",
              reg, pipe_name(pipe));
  }
@@ -1028,13 +1075,13 @@ static void assert_pch_ports_disabled(struct drm_i915_private *dev_priv,
  
         reg = PCH_ADPA;
         val = I915_READ(reg);
-       WARN(ADPA_PIPE_ENABLED(val, pipe),
+       WARN(adpa_pipe_enabled(dev_priv, val, pipe),
              "PCH VGA enabled on transcoder %c, should be disabled\n",
              pipe_name(pipe));
  
         reg = PCH_LVDS;
         val = I915_READ(reg);
-       WARN(LVDS_PIPE_ENABLED(val, pipe),
+       WARN(lvds_pipe_enabled(dev_priv, val, pipe),
              "PCH LVDS enabled on transcoder %c, should be disabled\n",
              pipe_name(pipe));
  
@@ -1360,7 +1407,7 @@ static void disable_pch_dp(struct drm_i915_private *dev_priv,
                            enum pipe pipe, int reg, u32 port_sel)
  {
         u32 val = I915_READ(reg);
-       if (dp_pipe_enabled(dev_priv, pipe, reg, port_sel, val)) {
+       if (dp_pipe_enabled(dev_priv, pipe, port_sel, val)) {
                 DRM_DEBUG_KMS("Disabling pch dp %x on pipe %d\n", reg, pipe);
                 I915_WRITE(reg, val & ~DP_PORT_EN);
         }
@@ -1370,7 +1417,7 @@ static void disable_pch_hdmi(struct drm_i915_private *dev_priv,
                              enum pipe pipe, int reg)
  {
         u32 val = I915_READ(reg);
-       if (HDMI_PIPE_ENABLED(val, pipe)) {
+       if (hdmi_pipe_enabled(dev_priv, val, pipe)) {
                 DRM_DEBUG_KMS("Disabling pch HDMI %x on pipe %d\n",
                               reg, pipe);
                 I915_WRITE(reg, val & ~PORT_ENABLE);
@@ -1392,12 +1439,13 @@ static void intel_disable_pch_ports(struct drm_i915_private *dev_priv,
  
         reg = PCH_ADPA;
         val = I915_READ(reg);
-       if (ADPA_PIPE_ENABLED(val, pipe))
+       if (adpa_pipe_enabled(dev_priv, val, pipe))
                 I915_WRITE(reg, val & ~ADPA_DAC_ENABLE);
  
         reg = PCH_LVDS;
         val = I915_READ(reg);
-       if (LVDS_PIPE_ENABLED(val, pipe)) {
+       if (lvds_pipe_enabled(dev_priv, val, pipe)) {
+               DRM_DEBUG_KMS("disable lvds on pipe %d val 0x%08x\n", pipe, val);
                 I915_WRITE(reg, val & ~LVDS_PORT_EN);
                 POSTING_READ(reg);
                 udelay(100);
@@ -5049,6 +5097,81 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
         return ret;
  }
  
+static void ironlake_update_pch_refclk(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct drm_mode_config *mode_config = &dev->mode_config;
+       struct drm_crtc *crtc;
+       struct intel_encoder *encoder;
+       struct intel_encoder *has_edp_encoder = NULL;
+       u32 temp;
+       bool has_lvds = false;
+
+       /* We need to take the global config into account */
+       list_for_each_entry(crtc, &mode_config->crtc_list, head) {
+               if (!crtc->enabled)
+                       continue;
+
+               list_for_each_entry(encoder, &mode_config->encoder_list,
+                                   base.head) {
+                       if (encoder->base.crtc != crtc)
+                               continue;
+
+                       switch (encoder->type) {
+                       case INTEL_OUTPUT_LVDS:
+                               has_lvds = true;
+                       case INTEL_OUTPUT_EDP:
+                               has_edp_encoder = encoder;
+                               break;
+                       }
+               }
+       }
+
+       /* Ironlake: try to setup display ref clock before DPLL
+        * enabling. This is only under driver's control after
+        * PCH B stepping, previous chipset stepping should be
+        * ignoring this setting.
+        */
+       temp = I915_READ(PCH_DREF_CONTROL);
+       /* Always enable nonspread source */
+       temp &= ~DREF_NONSPREAD_SOURCE_MASK;
+       temp |= DREF_NONSPREAD_SOURCE_ENABLE;
+       temp &= ~DREF_SSC_SOURCE_MASK;
+       temp |= DREF_SSC_SOURCE_ENABLE;
+       I915_WRITE(PCH_DREF_CONTROL, temp);
+
+       POSTING_READ(PCH_DREF_CONTROL);
+       udelay(200);
+
+       if (has_edp_encoder) {
+               if (intel_panel_use_ssc(dev_priv)) {
+                       temp |= DREF_SSC1_ENABLE;
+                       I915_WRITE(PCH_DREF_CONTROL, temp);
+
+                       POSTING_READ(PCH_DREF_CONTROL);
+                       udelay(200);
+               }
+               temp &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
+
+               /* Enable CPU source on CPU attached eDP */
+               if (!intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
+                       if (intel_panel_use_ssc(dev_priv))
+                               temp |= DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD;
+                       else
+                               temp |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD;
+               } else {
+                       /* Enable SSC on PCH eDP if needed */
+                       if (intel_panel_use_ssc(dev_priv)) {
+                               DRM_ERROR("enabling SSC on PCH\n");
+                               temp |= DREF_SUPERSPREAD_SOURCE_ENABLE;
+                       }
+               }
+               I915_WRITE(PCH_DREF_CONTROL, temp);
+               POSTING_READ(PCH_DREF_CONTROL);
+               udelay(200);
+       }
+}
+
  static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
                                   struct drm_display_mode *mode,
                                   struct drm_display_mode *adjusted_mode,
@@ -5244,49 +5367,7 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
         ironlake_compute_m_n(intel_crtc->bpp, lane, target_clock, link_bw,
                              &m_n);
  
-       /* Ironlake: try to setup display ref clock before DPLL
-        * enabling. This is only under driver's control after
-        * PCH B stepping, previous chipset stepping should be
-        * ignoring this setting.
-        */
-       temp = I915_READ(PCH_DREF_CONTROL);
-       /* Always enable nonspread source */
-       temp &= ~DREF_NONSPREAD_SOURCE_MASK;
-       temp |= DREF_NONSPREAD_SOURCE_ENABLE;
-       temp &= ~DREF_SSC_SOURCE_MASK;
-       temp |= DREF_SSC_SOURCE_ENABLE;
-       I915_WRITE(PCH_DREF_CONTROL, temp);
-
-       POSTING_READ(PCH_DREF_CONTROL);
-       udelay(200);
-
-       if (has_edp_encoder) {
-               if (intel_panel_use_ssc(dev_priv)) {
-                       temp |= DREF_SSC1_ENABLE;
-                       I915_WRITE(PCH_DREF_CONTROL, temp);
-
-                       POSTING_READ(PCH_DREF_CONTROL);
-                       udelay(200);
-               }
-               temp &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
-
-               /* Enable CPU source on CPU attached eDP */
-               if (!intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
-                       if (intel_panel_use_ssc(dev_priv))
-                               temp |= DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD;
-                       else
-                               temp |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD;
-               } else {
-                       /* Enable SSC on PCH eDP if needed */
-                       if (intel_panel_use_ssc(dev_priv)) {
-                               DRM_ERROR("enabling SSC on PCH\n");
-                               temp |= DREF_SUPERSPREAD_SOURCE_ENABLE;
-                       }
-               }
-               I915_WRITE(PCH_DREF_CONTROL, temp);
-               POSTING_READ(PCH_DREF_CONTROL);
-               udelay(200);
-       }
+       ironlake_update_pch_refclk(dev);
  
         fp = clock.n << 16 | clock.m1 << 8 | clock.m2;
         if (has_reduced_clock)
@@ -7157,8 +7238,6 @@ static void intel_setup_outputs(struct drm_device *dev)
                         intel_encoder_clones(dev, encoder->clone_mask);
         }
  
-       intel_panel_setup_backlight(dev);
-
         /* disable all the possible outputs/crtcs before entering KMS mode */
         drm_helper_disable_unused_functions(dev);
  }
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c

index 0feae908bb37f7bc8cbc642ea12f96a6cabf467a..44fef5e1c4906d49547d79fc42296163eea71905 100644 (file)
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1841,6 +1841,11 @@ done:
  static void
  intel_dp_destroy (struct drm_connector *connector)
  {
+       struct drm_device *dev = connector->dev;
+
+       if (intel_dpd_is_edp(dev))
+               intel_panel_destroy_backlight(dev);
+
         drm_sysfs_connector_remove(connector);
         drm_connector_cleanup(connector);
         kfree(connector);
@@ -2072,6 +2077,8 @@ intel_dp_init(struct drm_device *dev, int output_reg)
                                         DRM_MODE_TYPE_PREFERRED;
                         }
                 }
+               dev_priv->int_edp_connector = connector;
+               intel_panel_setup_backlight(dev);
         }
  
         intel_dp_add_properties(intel_dp, connector);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h

index 7b330e76a435afa32ba126d7a7837bb816ceb589..0b2ee9d39980c14df3c22dd37d9328004d9317fe 100644 (file)
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -297,9 +297,10 @@ extern void intel_pch_panel_fitting(struct drm_device *dev,
  extern u32 intel_panel_get_max_backlight(struct drm_device *dev);
  extern u32 intel_panel_get_backlight(struct drm_device *dev);
  extern void intel_panel_set_backlight(struct drm_device *dev, u32 level);
-extern void intel_panel_setup_backlight(struct drm_device *dev);
+extern int intel_panel_setup_backlight(struct drm_device *dev);
  extern void intel_panel_enable_backlight(struct drm_device *dev);
  extern void intel_panel_disable_backlight(struct drm_device *dev);
+extern void intel_panel_destroy_backlight(struct drm_device *dev);
  extern enum drm_connector_status intel_panel_detect(struct drm_device *dev);
  
  extern void intel_crtc_load_lut(struct drm_crtc *crtc);
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c

index 2e8ddfcba40c8c7d751776fb4d0b6818189b9c7e..31da77f5c051b816f705618a950ea9dfe3d50ee9 100644 (file)
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -72,14 +72,16 @@ static void intel_lvds_enable(struct intel_lvds *intel_lvds)
  {
         struct drm_device *dev = intel_lvds->base.base.dev;
         struct drm_i915_private *dev_priv = dev->dev_private;
-       u32 ctl_reg, lvds_reg;
+       u32 ctl_reg, lvds_reg, stat_reg;
  
         if (HAS_PCH_SPLIT(dev)) {
                 ctl_reg = PCH_PP_CONTROL;
                 lvds_reg = PCH_LVDS;
+               stat_reg = PCH_PP_STATUS;
         } else {
                 ctl_reg = PP_CONTROL;
                 lvds_reg = LVDS;
+               stat_reg = PP_STATUS;
         }
  
         I915_WRITE(lvds_reg, I915_READ(lvds_reg) | LVDS_PORT_EN);
@@ -94,17 +96,16 @@ static void intel_lvds_enable(struct intel_lvds *intel_lvds)
                 DRM_DEBUG_KMS("applying panel-fitter: %x, %x\n",
                               intel_lvds->pfit_control,
                               intel_lvds->pfit_pgm_ratios);
-               if (wait_for((I915_READ(PP_STATUS) & PP_ON) == 0, 1000)) {
-                       DRM_ERROR("timed out waiting for panel to power off\n");
-               } else {
-                       I915_WRITE(PFIT_PGM_RATIOS, intel_lvds->pfit_pgm_ratios);
-                       I915_WRITE(PFIT_CONTROL, intel_lvds->pfit_control);
-                       intel_lvds->pfit_dirty = false;
-               }
+
+               I915_WRITE(PFIT_PGM_RATIOS, intel_lvds->pfit_pgm_ratios);
+               I915_WRITE(PFIT_CONTROL, intel_lvds->pfit_control);
+               intel_lvds->pfit_dirty = false;
         }
  
         I915_WRITE(ctl_reg, I915_READ(ctl_reg) | POWER_TARGET_ON);
         POSTING_READ(lvds_reg);
+       if (wait_for((I915_READ(stat_reg) & PP_ON) != 0, 1000))
+               DRM_ERROR("timed out waiting for panel to power on\n");
  
         intel_panel_enable_backlight(dev);
  }
@@ -113,24 +114,25 @@ static void intel_lvds_disable(struct intel_lvds *intel_lvds)
  {
         struct drm_device *dev = intel_lvds->base.base.dev;
         struct drm_i915_private *dev_priv = dev->dev_private;
-       u32 ctl_reg, lvds_reg;
+       u32 ctl_reg, lvds_reg, stat_reg;
  
         if (HAS_PCH_SPLIT(dev)) {
                 ctl_reg = PCH_PP_CONTROL;
                 lvds_reg = PCH_LVDS;
+               stat_reg = PCH_PP_STATUS;
         } else {
                 ctl_reg = PP_CONTROL;
                 lvds_reg = LVDS;
+               stat_reg = PP_STATUS;
         }
  
         intel_panel_disable_backlight(dev);
  
         I915_WRITE(ctl_reg, I915_READ(ctl_reg) & ~POWER_TARGET_ON);
+       if (wait_for((I915_READ(stat_reg) & PP_ON) == 0, 1000))
+               DRM_ERROR("timed out waiting for panel to power off\n");
  
         if (intel_lvds->pfit_control) {
-               if (wait_for((I915_READ(PP_STATUS) & PP_ON) == 0, 1000))
-                       DRM_ERROR("timed out waiting for panel to power off\n");
-
                 I915_WRITE(PFIT_CONTROL, 0);
                 intel_lvds->pfit_dirty = true;
         }
@@ -398,53 +400,21 @@ out:
  
  static void intel_lvds_prepare(struct drm_encoder *encoder)
  {
-       struct drm_device *dev = encoder->dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
         struct intel_lvds *intel_lvds = to_intel_lvds(encoder);
  
-       /* We try to do the minimum that is necessary in order to unlock
-        * the registers for mode setting.
-        *
-        * On Ironlake, this is quite simple as we just set the unlock key
-        * and ignore all subtleties. (This may cause some issues...)
-        *
+       /*
          * Prior to Ironlake, we must disable the pipe if we want to adjust
          * the panel fitter. However at all other times we can just reset
          * the registers regardless.
          */
-
-       if (HAS_PCH_SPLIT(dev)) {
-               I915_WRITE(PCH_PP_CONTROL,
-                          I915_READ(PCH_PP_CONTROL) | PANEL_UNLOCK_REGS);
-       } else if (intel_lvds->pfit_dirty) {
-               I915_WRITE(PP_CONTROL,
-                          (I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS)
-                          & ~POWER_TARGET_ON);
-       } else {
-               I915_WRITE(PP_CONTROL,
-                          I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS);
-       }
+       if (!HAS_PCH_SPLIT(encoder->dev) && intel_lvds->pfit_dirty)
+               intel_lvds_disable(intel_lvds);
  }
  
  static void intel_lvds_commit(struct drm_encoder *encoder)
  {
-       struct drm_device *dev = encoder->dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
         struct intel_lvds *intel_lvds = to_intel_lvds(encoder);
  
-       /* Undo any unlocking done in prepare to prevent accidental
-        * adjustment of the registers.
-        */
-       if (HAS_PCH_SPLIT(dev)) {
-               u32 val = I915_READ(PCH_PP_CONTROL);
-               if ((val & PANEL_UNLOCK_REGS) == PANEL_UNLOCK_REGS)
-                       I915_WRITE(PCH_PP_CONTROL, val & 0x3);
-       } else {
-               u32 val = I915_READ(PP_CONTROL);
-               if ((val & PANEL_UNLOCK_REGS) == PANEL_UNLOCK_REGS)
-                       I915_WRITE(PP_CONTROL, val & 0x3);
-       }
-
         /* Always do a full power on as we do not know what state
          * we were left in.
          */
@@ -582,6 +552,8 @@ static void intel_lvds_destroy(struct drm_connector *connector)
         struct drm_device *dev = connector->dev;
         struct drm_i915_private *dev_priv = dev->dev_private;
  
+       intel_panel_destroy_backlight(dev);
+
         if (dev_priv->lid_notifier.notifier_call)
                 acpi_lid_notifier_unregister(&dev_priv->lid_notifier);
         drm_sysfs_connector_remove(connector);
@@ -1040,6 +1012,19 @@ out:
                 pwm = I915_READ(BLC_PWM_PCH_CTL1);
                 pwm |= PWM_PCH_ENABLE;
                 I915_WRITE(BLC_PWM_PCH_CTL1, pwm);
+               /*
+                * Unlock registers and just
+                * leave them unlocked
+                */
+               I915_WRITE(PCH_PP_CONTROL,
+                          I915_READ(PCH_PP_CONTROL) | PANEL_UNLOCK_REGS);
+       } else {
+               /*
+                * Unlock registers and just
+                * leave them unlocked
+                */
+               I915_WRITE(PP_CONTROL,
+                          I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS);
         }
         dev_priv->lid_notifier.notifier_call = intel_lid_notify;
         if (acpi_lid_notifier_register(&dev_priv->lid_notifier)) {
@@ -1049,6 +1034,9 @@ out:
         /* keep the LVDS connector */
         dev_priv->int_lvds_connector = connector;
         drm_sysfs_connector_add(connector);
+
+       intel_panel_setup_backlight(dev);
+
         return true;
  
  failed:
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c

index b7c5ddb564d144e43bdeac2727172a1bc6492bc5..b8e8158bb16e8cb9060718d3325bc6d549f4ace9 100644 (file)
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@@ -227,7 +227,6 @@ void intel_opregion_asle_intr(struct drm_device *dev)
         asle->aslc = asle_stat;
  }
  
-/* Only present on Ironlake+ */
  void intel_opregion_gse_intr(struct drm_device *dev)
  {
         struct drm_i915_private *dev_priv = dev->dev_private;
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c

index 05f500cd9c246c0156412e028f5bef0c35acd22b..a9e0c7bcd3179a2d76a1725315735550c8cd3800 100644 (file)
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -277,7 +277,7 @@ void intel_panel_enable_backlight(struct drm_device *dev)
         dev_priv->backlight_enabled = true;
  }
  
-void intel_panel_setup_backlight(struct drm_device *dev)
+static void intel_panel_init_backlight(struct drm_device *dev)
  {
         struct drm_i915_private *dev_priv = dev->dev_private;
  
@@ -309,3 +309,73 @@ intel_panel_detect(struct drm_device *dev)
  
         return connector_status_unknown;
  }
+
+#ifdef CONFIG_BACKLIGHT_CLASS_DEVICE
+static int intel_panel_update_status(struct backlight_device *bd)
+{
+       struct drm_device *dev = bl_get_data(bd);
+       intel_panel_set_backlight(dev, bd->props.brightness);
+       return 0;
+}
+
+static int intel_panel_get_brightness(struct backlight_device *bd)
+{
+       struct drm_device *dev = bl_get_data(bd);
+       return intel_panel_get_backlight(dev);
+}
+
+static const struct backlight_ops intel_panel_bl_ops = {
+       .update_status = intel_panel_update_status,
+       .get_brightness = intel_panel_get_brightness,
+};
+
+int intel_panel_setup_backlight(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct backlight_properties props;
+       struct drm_connector *connector;
+
+       intel_panel_init_backlight(dev);
+
+       if (dev_priv->int_lvds_connector)
+               connector = dev_priv->int_lvds_connector;
+       else if (dev_priv->int_edp_connector)
+               connector = dev_priv->int_edp_connector;
+       else
+               return -ENODEV;
+
+       props.type = BACKLIGHT_RAW;
+       props.max_brightness = intel_panel_get_max_backlight(dev);
+       dev_priv->backlight =
+               backlight_device_register("intel_backlight",
+                                         &connector->kdev, dev,
+                                         &intel_panel_bl_ops, &props);
+
+       if (IS_ERR(dev_priv->backlight)) {
+               DRM_ERROR("Failed to register backlight: %ld\n",
+                         PTR_ERR(dev_priv->backlight));
+               dev_priv->backlight = NULL;
+               return -ENODEV;
+       }
+       dev_priv->backlight->props.brightness = intel_panel_get_backlight(dev);
+       return 0;
+}
+
+void intel_panel_destroy_backlight(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       if (dev_priv->backlight)
+               backlight_device_unregister(dev_priv->backlight);
+}
+#else
+int intel_panel_setup_backlight(struct drm_device *dev)
+{
+       intel_panel_init_backlight(dev);
+       return 0;
+}
+
+void intel_panel_destroy_backlight(struct drm_device *dev)
+{
+       return;
+}
+#endif
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c

index 47b9b277703856e6b1f3d76d81ce70be25f3d296..c30626ea9f93a5319f95fb0ef631178b666c2e87 100644 (file)
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -290,6 +290,10 @@ static int init_render_ring(struct intel_ring_buffer *ring)
                 if (IS_GEN6(dev) || IS_GEN7(dev))
                         mode |= MI_FLUSH_ENABLE << 16 | MI_FLUSH_ENABLE;
                 I915_WRITE(MI_MODE, mode);
+               if (IS_GEN7(dev))
+                       I915_WRITE(GFX_MODE_GEN7,
+                                  GFX_MODE_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) |
+                                  GFX_MODE_ENABLE(GFX_REPLAY_MODE));
         }
  
         if (INTEL_INFO(dev)->gen >= 6) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c

index 8d02d875376d5979cf48986aef57e8a0f5216d17..c919cfc8f2fdf762589ba5db144be75e3066fb6d 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -530,7 +530,8 @@ nouveau_fence_channel_init(struct nouveau_channel *chan)
                 nouveau_gpuobj_ref(NULL, &obj);
                 if (ret)
                         return ret;
-       } else {
+       } else
+       if (USE_SEMA(dev)) {
                 /* map fence bo into channel's vm */
                 ret = nouveau_bo_vma_add(dev_priv->fence.bo, chan->vm,
                                          &chan->fence.vma);
diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c

index c444cadbf849110e4d3b418397f26e9524670ca0..2706cb3d871a2cdafd84da5b3bb73f5c39f01fe8 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
@@ -37,8 +37,11 @@ nouveau_sgdma_populate(struct ttm_backend *be, unsigned long num_pages,
                 return -ENOMEM;
  
         nvbe->ttm_alloced = kmalloc(sizeof(bool) * num_pages, GFP_KERNEL);
-       if (!nvbe->ttm_alloced)
+       if (!nvbe->ttm_alloced) {
+               kfree(nvbe->pages);
+               nvbe->pages = NULL;
                 return -ENOMEM;
+       }
  
         nvbe->nr_pages = 0;
         while (num_pages--) {
@@ -126,7 +129,7 @@ nv04_sgdma_bind(struct ttm_backend *be, struct ttm_mem_reg *mem)
  
                 for (j = 0; j < PAGE_SIZE / NV_CTXDMA_PAGE_SIZE; j++, pte++) {
                         nv_wo32(gpuobj, (pte * 4) + 0, offset_l | 3);
-                       dma_offset += NV_CTXDMA_PAGE_SIZE;
+                       offset_l += NV_CTXDMA_PAGE_SIZE;
                 }
         }
  
diff --git a/drivers/gpu/drm/nouveau/nv04_crtc.c b/drivers/gpu/drm/nouveau/nv04_crtc.c

index 118261d4927abf9736de31dfc3ec392b48ee92e9..5e45398a9e2deed847a2191260c76611e325b3c5 100644 (file)
--- a/drivers/gpu/drm/nouveau/nv04_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv04_crtc.c
@@ -781,11 +781,20 @@ nv04_crtc_do_mode_set_base(struct drm_crtc *crtc,
         struct drm_device *dev = crtc->dev;
         struct drm_nouveau_private *dev_priv = dev->dev_private;
         struct nv04_crtc_reg *regp = &dev_priv->mode_reg.crtc_reg[nv_crtc->index];
-       struct drm_framebuffer *drm_fb = nv_crtc->base.fb;
-       struct nouveau_framebuffer *fb = nouveau_framebuffer(drm_fb);
+       struct drm_framebuffer *drm_fb;
+       struct nouveau_framebuffer *fb;
         int arb_burst, arb_lwm;
         int ret;
  
+       NV_DEBUG_KMS(dev, "index %d\n", nv_crtc->index);
+
+       /* no fb bound */
+       if (!atomic && !crtc->fb) {
+               NV_DEBUG_KMS(dev, "No FB bound\n");
+               return 0;
+       }
+
+
         /* If atomic, we want to switch to the fb we were passed, so
          * now we update pointers to do that.  (We don't pin; just
          * assume we're already pinned and update the base address.)
@@ -794,6 +803,8 @@ nv04_crtc_do_mode_set_base(struct drm_crtc *crtc,
                 drm_fb = passed_fb;
                 fb = nouveau_framebuffer(passed_fb);
         } else {
+               drm_fb = crtc->fb;
+               fb = nouveau_framebuffer(crtc->fb);
                 /* If not atomic, we can go ahead and pin, and unpin the
                  * old fb we were passed.
                  */
diff --git a/drivers/gpu/drm/nouveau/nv50_crtc.c b/drivers/gpu/drm/nouveau/nv50_crtc.c

index 46ad59ea2185a720cfb3a07db0c82b3ac2ad22e8..5d989073ba6e3f55fafc76d624a18f10291392f8 100644 (file)
--- a/drivers/gpu/drm/nouveau/nv50_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv50_crtc.c
@@ -519,12 +519,18 @@ nv50_crtc_do_mode_set_base(struct drm_crtc *crtc,
         struct drm_device *dev = nv_crtc->base.dev;
         struct drm_nouveau_private *dev_priv = dev->dev_private;
         struct nouveau_channel *evo = nv50_display(dev)->master;
-       struct drm_framebuffer *drm_fb = nv_crtc->base.fb;
-       struct nouveau_framebuffer *fb = nouveau_framebuffer(drm_fb);
+       struct drm_framebuffer *drm_fb;
+       struct nouveau_framebuffer *fb;
         int ret;
  
         NV_DEBUG_KMS(dev, "index %d\n", nv_crtc->index);
  
+       /* no fb bound */
+       if (!atomic && !crtc->fb) {
+               NV_DEBUG_KMS(dev, "No FB bound\n");
+               return 0;
+       }
+
         /* If atomic, we want to switch to the fb we were passed, so
          * now we update pointers to do that.  (We don't pin; just
          * assume we're already pinned and update the base address.)
@@ -533,6 +539,8 @@ nv50_crtc_do_mode_set_base(struct drm_crtc *crtc,
                 drm_fb = passed_fb;
                 fb = nouveau_framebuffer(passed_fb);
         } else {
+               drm_fb = crtc->fb;
+               fb = nouveau_framebuffer(crtc->fb);
                 /* If not atomic, we can go ahead and pin, and unpin the
                  * old fb we were passed.
                  */
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c

index 645b84b3d203395b59f35d6a545c0ca488aaedf9..7ad43c6b1db765261e4b81466a9145c48d441834 100644 (file)
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -613,6 +613,18 @@ static bool radeon_dp_get_link_status(struct radeon_connector *radeon_connector,
         return true;
  }
  
+bool radeon_dp_needs_link_train(struct radeon_connector *radeon_connector)
+{
+       u8 link_status[DP_LINK_STATUS_SIZE];
+       struct radeon_connector_atom_dig *dig = radeon_connector->con_priv;
+
+       if (!radeon_dp_get_link_status(radeon_connector, link_status))
+               return false;
+       if (dp_channel_eq_ok(link_status, dig->dp_lane_count))
+               return false;
+       return true;
+}
+
  struct radeon_dp_link_train_info {
         struct radeon_device *rdev;
         struct drm_encoder *encoder;
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c

index 14dce9f221721128b008e1988b50ac51788ef4de..e8a746712b5b5c66a02ccd2152b05235a58df991 100644 (file)
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -41,6 +41,31 @@ static void evergreen_gpu_init(struct radeon_device *rdev);
  void evergreen_fini(struct radeon_device *rdev);
  static void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
  
+void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev)
+{
+       u16 ctl, v;
+       int cap, err;
+
+       cap = pci_pcie_cap(rdev->pdev);
+       if (!cap)
+               return;
+
+       err = pci_read_config_word(rdev->pdev, cap + PCI_EXP_DEVCTL, &ctl);
+       if (err)
+               return;
+
+       v = (ctl & PCI_EXP_DEVCTL_READRQ) >> 12;
+
+       /* if bios or OS sets MAX_READ_REQUEST_SIZE to an invalid value, fix it
+        * to avoid hangs or perfomance issues
+        */
+       if ((v == 0) || (v == 6) || (v == 7)) {
+               ctl &= ~PCI_EXP_DEVCTL_READRQ;
+               ctl |= (2 << 12);
+               pci_write_config_word(rdev->pdev, cap + PCI_EXP_DEVCTL, ctl);
+       }
+}
+
  void evergreen_pre_page_flip(struct radeon_device *rdev, int crtc)
  {
         /* enable the pflip int */
@@ -743,7 +768,7 @@ static void evergreen_program_watermarks(struct radeon_device *rdev,
                     !evergreen_average_bandwidth_vs_available_bandwidth(&wm) ||
                     !evergreen_check_latency_hiding(&wm) ||
                     (rdev->disp_priority == 2)) {
-                       DRM_INFO("force priority to high\n");
+                       DRM_DEBUG_KMS("force priority to high\n");
                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
                 }
@@ -1357,6 +1382,7 @@ int evergreen_cp_resume(struct radeon_device *rdev)
                                  SOFT_RESET_PA |
                                  SOFT_RESET_SH |
                                  SOFT_RESET_VGT |
+                                SOFT_RESET_SPI |
                                  SOFT_RESET_SX));
         RREG32(GRBM_SOFT_RESET);
         mdelay(15);
@@ -1378,7 +1404,8 @@ int evergreen_cp_resume(struct radeon_device *rdev)
         /* Initialize the ring buffer's read and write pointers */
         WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA);
         WREG32(CP_RB_RPTR_WR, 0);
-       WREG32(CP_RB_WPTR, 0);
+       rdev->cp.wptr = 0;
+       WREG32(CP_RB_WPTR, rdev->cp.wptr);
  
         /* set the wb address wether it's enabled or not */
         WREG32(CP_RB_RPTR_ADDR,
@@ -1400,7 +1427,6 @@ int evergreen_cp_resume(struct radeon_device *rdev)
         WREG32(CP_DEBUG, (1 << 27) | (1 << 28));
  
         rdev->cp.rptr = RREG32(CP_RB_RPTR);
-       rdev->cp.wptr = RREG32(CP_RB_WPTR);
  
         evergreen_cp_start(rdev);
         rdev->cp.ready = true;
@@ -1862,6 +1888,8 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
  
         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
  
+       evergreen_fix_pci_max_read_req_size(rdev);
+
         cc_gc_shader_pipe_config = RREG32(CC_GC_SHADER_PIPE_CONFIG) & ~2;
  
         cc_gc_shader_pipe_config |=
@@ -3143,21 +3171,23 @@ int evergreen_suspend(struct radeon_device *rdev)
  }
  
  int evergreen_copy_blit(struct radeon_device *rdev,
-                       uint64_t src_offset, uint64_t dst_offset,
-                       unsigned num_pages, struct radeon_fence *fence)
+                       uint64_t src_offset,
+                       uint64_t dst_offset,
+                       unsigned num_gpu_pages,
+                       struct radeon_fence *fence)
  {
         int r;
  
         mutex_lock(&rdev->r600_blit.mutex);
         rdev->r600_blit.vb_ib = NULL;
-       r = evergreen_blit_prepare_copy(rdev, num_pages * RADEON_GPU_PAGE_SIZE);
+       r = evergreen_blit_prepare_copy(rdev, num_gpu_pages * RADEON_GPU_PAGE_SIZE);
         if (r) {
                 if (rdev->r600_blit.vb_ib)
                         radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
                 mutex_unlock(&rdev->r600_blit.mutex);
                 return r;
         }
-       evergreen_kms_blit_copy(rdev, src_offset, dst_offset, num_pages * RADEON_GPU_PAGE_SIZE);
+       evergreen_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages * RADEON_GPU_PAGE_SIZE);
         evergreen_blit_done_copy(rdev, fence);
         mutex_unlock(&rdev->r600_blit.mutex);
         return 0;
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c

index 44c4750f4518c3f010e96b38c981e7208cb84a1a..99fbd793c08ce2bf95ccd8352794fd9c951a628a 100644 (file)
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -39,6 +39,7 @@ extern int evergreen_mc_wait_for_idle(struct radeon_device *rdev);
  extern void evergreen_mc_program(struct radeon_device *rdev);
  extern void evergreen_irq_suspend(struct radeon_device *rdev);
  extern int evergreen_mc_init(struct radeon_device *rdev);
+extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
  
  #define EVERGREEN_PFP_UCODE_SIZE 1120
  #define EVERGREEN_PM4_UCODE_SIZE 1376
@@ -669,6 +670,8 @@ static void cayman_gpu_init(struct radeon_device *rdev)
  
         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
  
+       evergreen_fix_pci_max_read_req_size(rdev);
+
         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
  
@@ -1159,6 +1162,7 @@ int cayman_cp_resume(struct radeon_device *rdev)
                                  SOFT_RESET_PA |
                                  SOFT_RESET_SH |
                                  SOFT_RESET_VGT |
+                                SOFT_RESET_SPI |
                                  SOFT_RESET_SX));
         RREG32(GRBM_SOFT_RESET);
         mdelay(15);
@@ -1183,7 +1187,8 @@ int cayman_cp_resume(struct radeon_device *rdev)
  
         /* Initialize the ring buffer's read and write pointers */
         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
-       WREG32(CP_RB0_WPTR, 0);
+       rdev->cp.wptr = 0;
+       WREG32(CP_RB0_WPTR, rdev->cp.wptr);
  
         /* set the wb address wether it's enabled or not */
         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
@@ -1203,7 +1208,6 @@ int cayman_cp_resume(struct radeon_device *rdev)
         WREG32(CP_RB0_BASE, rdev->cp.gpu_addr >> 8);
  
         rdev->cp.rptr = RREG32(CP_RB0_RPTR);
-       rdev->cp.wptr = RREG32(CP_RB0_WPTR);
  
         /* ring1  - compute only */
         /* Set ring buffer size */
@@ -1216,7 +1220,8 @@ int cayman_cp_resume(struct radeon_device *rdev)
  
         /* Initialize the ring buffer's read and write pointers */
         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
-       WREG32(CP_RB1_WPTR, 0);
+       rdev->cp1.wptr = 0;
+       WREG32(CP_RB1_WPTR, rdev->cp1.wptr);
  
         /* set the wb address wether it's enabled or not */
         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
@@ -1228,7 +1233,6 @@ int cayman_cp_resume(struct radeon_device *rdev)
         WREG32(CP_RB1_BASE, rdev->cp1.gpu_addr >> 8);
  
         rdev->cp1.rptr = RREG32(CP_RB1_RPTR);
-       rdev->cp1.wptr = RREG32(CP_RB1_WPTR);
  
         /* ring2 - compute only */
         /* Set ring buffer size */
@@ -1241,7 +1245,8 @@ int cayman_cp_resume(struct radeon_device *rdev)
  
         /* Initialize the ring buffer's read and write pointers */
         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
-       WREG32(CP_RB2_WPTR, 0);
+       rdev->cp2.wptr = 0;
+       WREG32(CP_RB2_WPTR, rdev->cp2.wptr);
  
         /* set the wb address wether it's enabled or not */
         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
@@ -1253,7 +1258,6 @@ int cayman_cp_resume(struct radeon_device *rdev)
         WREG32(CP_RB2_BASE, rdev->cp2.gpu_addr >> 8);
  
         rdev->cp2.rptr = RREG32(CP_RB2_RPTR);
-       rdev->cp2.wptr = RREG32(CP_RB2_WPTR);
  
         /* start the rings */
         cayman_cp_start(rdev);
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c

index f2204cb1ccdfa96e0daa22ed9963a59555664f19..5b1837b4aacfb04ce52064c363db3ef7b438ac59 100644 (file)
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -721,11 +721,11 @@ void r100_fence_ring_emit(struct radeon_device *rdev,
  int r100_copy_blit(struct radeon_device *rdev,
                    uint64_t src_offset,
                    uint64_t dst_offset,
-                  unsigned num_pages,
+                  unsigned num_gpu_pages,
                    struct radeon_fence *fence)
  {
         uint32_t cur_pages;
-       uint32_t stride_bytes = PAGE_SIZE;
+       uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE;
         uint32_t pitch;
         uint32_t stride_pixels;
         unsigned ndw;
@@ -737,7 +737,7 @@ int r100_copy_blit(struct radeon_device *rdev,
         /* radeon pitch is /64 */
         pitch = stride_bytes / 64;
         stride_pixels = stride_bytes / 4;
-       num_loops = DIV_ROUND_UP(num_pages, 8191);
+       num_loops = DIV_ROUND_UP(num_gpu_pages, 8191);
  
         /* Ask for enough room for blit + flush + fence */
         ndw = 64 + (10 * num_loops);
@@ -746,12 +746,12 @@ int r100_copy_blit(struct radeon_device *rdev,
                 DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
                 return -EINVAL;
         }
-       while (num_pages > 0) {
-               cur_pages = num_pages;
+       while (num_gpu_pages > 0) {
+               cur_pages = num_gpu_pages;
                 if (cur_pages > 8191) {
                         cur_pages = 8191;
                 }
-               num_pages -= cur_pages;
+               num_gpu_pages -= cur_pages;
  
                 /* pages are in Y direction - height
                    page width in X direction - width */
@@ -773,8 +773,8 @@ int r100_copy_blit(struct radeon_device *rdev,
                 radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
                 radeon_ring_write(rdev, 0);
                 radeon_ring_write(rdev, (0x1fff) | (0x1fff << 16));
-               radeon_ring_write(rdev, num_pages);
-               radeon_ring_write(rdev, num_pages);
+               radeon_ring_write(rdev, cur_pages);
+               radeon_ring_write(rdev, cur_pages);
                 radeon_ring_write(rdev, cur_pages | (stride_pixels << 16));
         }
         radeon_ring_write(rdev, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
@@ -990,7 +990,8 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
         /* Force read & write ptr to 0 */
         WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA | RADEON_RB_NO_UPDATE);
         WREG32(RADEON_CP_RB_RPTR_WR, 0);
-       WREG32(RADEON_CP_RB_WPTR, 0);
+       rdev->cp.wptr = 0;
+       WREG32(RADEON_CP_RB_WPTR, rdev->cp.wptr);
  
         /* set the wb address whether it's enabled or not */
         WREG32(R_00070C_CP_RB_RPTR_ADDR,
@@ -1007,9 +1008,6 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
         WREG32(RADEON_CP_RB_CNTL, tmp);
         udelay(10);
         rdev->cp.rptr = RREG32(RADEON_CP_RB_RPTR);
-       rdev->cp.wptr = RREG32(RADEON_CP_RB_WPTR);
-       /* protect against crazy HW on resume */
-       rdev->cp.wptr &= rdev->cp.ptr_mask;
         /* Set cp mode to bus mastering & enable cp*/
         WREG32(RADEON_CP_CSQ_MODE,
                REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c

index f2405830041360f5e74ec1c848ba2fce5736cd16..a1f3ba063c2dda0de972f363345f815ab76f84d8 100644 (file)
--- a/drivers/gpu/drm/radeon/r200.c
+++ b/drivers/gpu/drm/radeon/r200.c
@@ -84,7 +84,7 @@ static int r200_get_vtx_size_0(uint32_t vtx_fmt_0)
  int r200_copy_dma(struct radeon_device *rdev,
                   uint64_t src_offset,
                   uint64_t dst_offset,
-                 unsigned num_pages,
+                 unsigned num_gpu_pages,
                   struct radeon_fence *fence)
  {
         uint32_t size;
@@ -93,7 +93,7 @@ int r200_copy_dma(struct radeon_device *rdev,
         int r = 0;
  
         /* radeon pitch is /64 */
-       size = num_pages << PAGE_SHIFT;
+       size = num_gpu_pages << RADEON_GPU_PAGE_SHIFT;
         num_loops = DIV_ROUND_UP(size, 0x1FFFFF);
         r = radeon_ring_lock(rdev, num_loops * 4 + 64);
         if (r) {
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c

index aa5571b73aa02e947ec590ad6febb624778fc3da..720dd99163f855feeb16b921f64f21fa38cc2971 100644 (file)
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2209,7 +2209,8 @@ int r600_cp_resume(struct radeon_device *rdev)
         /* Initialize the ring buffer's read and write pointers */
         WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA);
         WREG32(CP_RB_RPTR_WR, 0);
-       WREG32(CP_RB_WPTR, 0);
+       rdev->cp.wptr = 0;
+       WREG32(CP_RB_WPTR, rdev->cp.wptr);
  
         /* set the wb address whether it's enabled or not */
         WREG32(CP_RB_RPTR_ADDR,
@@ -2231,7 +2232,6 @@ int r600_cp_resume(struct radeon_device *rdev)
         WREG32(CP_DEBUG, (1 << 27) | (1 << 28));
  
         rdev->cp.rptr = RREG32(CP_RB_RPTR);
-       rdev->cp.wptr = RREG32(CP_RB_WPTR);
  
         r600_cp_start(rdev);
         rdev->cp.ready = true;
@@ -2353,21 +2353,23 @@ void r600_fence_ring_emit(struct radeon_device *rdev,
  }
  
  int r600_copy_blit(struct radeon_device *rdev,
-                  uint64_t src_offset, uint64_t dst_offset,
-                  unsigned num_pages, struct radeon_fence *fence)
+                  uint64_t src_offset,
+                  uint64_t dst_offset,
+                  unsigned num_gpu_pages,
+                  struct radeon_fence *fence)
  {
         int r;
  
         mutex_lock(&rdev->r600_blit.mutex);
         rdev->r600_blit.vb_ib = NULL;
-       r = r600_blit_prepare_copy(rdev, num_pages * RADEON_GPU_PAGE_SIZE);
+       r = r600_blit_prepare_copy(rdev, num_gpu_pages * RADEON_GPU_PAGE_SIZE);
         if (r) {
                 if (rdev->r600_blit.vb_ib)
                         radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
                 mutex_unlock(&rdev->r600_blit.mutex);
                 return r;
         }
-       r600_kms_blit_copy(rdev, src_offset, dst_offset, num_pages * RADEON_GPU_PAGE_SIZE);
+       r600_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages * RADEON_GPU_PAGE_SIZE);
         r600_blit_done_copy(rdev, fence);
         mutex_unlock(&rdev->r600_blit.mutex);
         return 0;
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h

index 32807baf55e232ebb0bc1c35ee4775d49fb1af00..c1e056b35b292b76bdae1416cf5c95280dacb301 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -322,6 +322,7 @@ union radeon_gart_table {
  
  #define RADEON_GPU_PAGE_SIZE 4096
  #define RADEON_GPU_PAGE_MASK (RADEON_GPU_PAGE_SIZE - 1)
+#define RADEON_GPU_PAGE_SHIFT 12
  
  struct radeon_gart {
         dma_addr_t                      table_addr;
@@ -914,17 +915,17 @@ struct radeon_asic {
         int (*copy_blit)(struct radeon_device *rdev,
                          uint64_t src_offset,
                          uint64_t dst_offset,
-                        unsigned num_pages,
+                        unsigned num_gpu_pages,
                          struct radeon_fence *fence);
         int (*copy_dma)(struct radeon_device *rdev,
                         uint64_t src_offset,
                         uint64_t dst_offset,
-                       unsigned num_pages,
+                       unsigned num_gpu_pages,
                         struct radeon_fence *fence);
         int (*copy)(struct radeon_device *rdev,
                     uint64_t src_offset,
                     uint64_t dst_offset,
-                   unsigned num_pages,
+                   unsigned num_gpu_pages,
                     struct radeon_fence *fence);
         uint32_t (*get_engine_clock)(struct radeon_device *rdev);
         void (*set_engine_clock)(struct radeon_device *rdev, uint32_t eng_clock);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h

index 3d7a0d7c6a9afe4601267e1707eeef2ac9eef5ec..3dedaa07aac197b4179ce27aa25054b460684f7b 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -75,7 +75,7 @@ uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg);
  int r100_copy_blit(struct radeon_device *rdev,
                    uint64_t src_offset,
                    uint64_t dst_offset,
-                  unsigned num_pages,
+                  unsigned num_gpu_pages,
                    struct radeon_fence *fence);
  int r100_set_surface_reg(struct radeon_device *rdev, int reg,
                          uint32_t tiling_flags, uint32_t pitch,
@@ -143,7 +143,7 @@ extern void r100_post_page_flip(struct radeon_device *rdev, int crtc);
  extern int r200_copy_dma(struct radeon_device *rdev,
                          uint64_t src_offset,
                          uint64_t dst_offset,
-                        unsigned num_pages,
+                        unsigned num_gpu_pages,
                          struct radeon_fence *fence);
  void r200_set_safe_registers(struct radeon_device *rdev);
  
@@ -311,7 +311,7 @@ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
  int r600_ring_test(struct radeon_device *rdev);
  int r600_copy_blit(struct radeon_device *rdev,
                    uint64_t src_offset, uint64_t dst_offset,
-                  unsigned num_pages, struct radeon_fence *fence);
+                  unsigned num_gpu_pages, struct radeon_fence *fence);
  void r600_hpd_init(struct radeon_device *rdev);
  void r600_hpd_fini(struct radeon_device *rdev);
  bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
@@ -403,7 +403,7 @@ void evergreen_bandwidth_update(struct radeon_device *rdev);
  void evergreen_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
  int evergreen_copy_blit(struct radeon_device *rdev,
                         uint64_t src_offset, uint64_t dst_offset,
-                       unsigned num_pages, struct radeon_fence *fence);
+                       unsigned num_gpu_pages, struct radeon_fence *fence);
  void evergreen_hpd_init(struct radeon_device *rdev);
  void evergreen_hpd_fini(struct radeon_device *rdev);
  bool evergreen_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
diff --git a/drivers/gpu/drm/radeon/radeon_clocks.c b/drivers/gpu/drm/radeon/radeon_clocks.c

index dcd0863e31ae8c24b1a8870aa777bbe2c550ce2b..b6e18c8db9f53511e2141a0f20aa2ec2880b263d 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon_clocks.c
+++ b/drivers/gpu/drm/radeon/radeon_clocks.c
@@ -219,6 +219,9 @@ void radeon_get_clock_info(struct drm_device *dev)
                 } else {
                         DRM_INFO("Using generic clock info\n");
  
+                       /* may need to be per card */
+                       rdev->clock.max_pixel_clock = 35000;
+
                         if (rdev->flags & RADEON_IS_IGP) {
                                 p1pll->reference_freq = 1432;
                                 p2pll->reference_freq = 1432;
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c

index e0138b674aca038e99b020000cdfec0285aef405..63675241c7ff281092afca5429786058c9d0f4c2 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -3298,6 +3298,14 @@ void radeon_combios_asic_init(struct drm_device *dev)
             rdev->pdev->subsystem_device == 0x30a4)
                 return;
  
+       /* quirk for rs4xx Compaq Presario V5245EU laptop to make it resume
+        * - it hangs on resume inside the dynclk 1 table.
+        */
+       if (rdev->family == CHIP_RS480 &&
+           rdev->pdev->subsystem_vendor == 0x103c &&
+           rdev->pdev->subsystem_device == 0x30ae)
+               return;
+
         /* DYN CLK 1 */
         table = combios_get_table_offset(dev, COMBIOS_DYN_CLK_1_TABLE);
         if (table)
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c

index 6d6b5f16bc09461af70ea4eefcd1046999b113aa..c4b8741dbf58671cb7abde00f587f11e26823283 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -60,18 +60,20 @@ void radeon_connector_hotplug(struct drm_connector *connector)
  
         radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
  
-       /* powering up/down the eDP panel generates hpd events which
-        * can interfere with modesetting.
-        */
-       if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
+       /* if the connector is already off, don't turn it back on */
+       if (connector->dpms != DRM_MODE_DPMS_ON)
                 return;
  
-       /* pre-r600 did not always have the hpd pins mapped accurately to connectors */
-       if (rdev->family >= CHIP_R600) {
-               if (radeon_hpd_sense(rdev, radeon_connector->hpd.hpd))
+       /* just deal with DP (not eDP) here. */
+       if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) {
+               int saved_dpms = connector->dpms;
+
+               if (radeon_hpd_sense(rdev, radeon_connector->hpd.hpd) &&
+                   radeon_dp_needs_link_train(radeon_connector))
                         drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
                 else
                         drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
+               connector->dpms = saved_dpms;
         }
  }
  
@@ -464,6 +466,16 @@ static bool radeon_connector_needs_extended_probe(struct radeon_device *dev,
                     (supported_device == ATOM_DEVICE_DFP2_SUPPORT))
                         return true;
         }
+       /* TOSHIBA Satellite L300D with ATI Mobility Radeon x1100
+        * (RS690M) sends data to i2c bus for a HDMI connector that
+        * is not implemented */
+       if ((dev->pdev->device == 0x791f) &&
+           (dev->pdev->subsystem_vendor == 0x1179) &&
+           (dev->pdev->subsystem_device == 0xff68)) {
+               if ((connector_type == DRM_MODE_CONNECTOR_HDMIA) &&
+                   (supported_device == ATOM_DEVICE_DFP2_SUPPORT))
+                       return true;
+       }
  
         /* Default: no EDID header probe required for DDC probing */
         return false;
@@ -474,11 +486,19 @@ static void radeon_fixup_lvds_native_mode(struct drm_encoder *encoder,
  {
         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
         struct drm_display_mode *native_mode = &radeon_encoder->native_mode;
+       struct drm_display_mode *t, *mode;
+
+       /* If the EDID preferred mode doesn't match the native mode, use it */
+       list_for_each_entry_safe(mode, t, &connector->probed_modes, head) {
+               if (mode->type & DRM_MODE_TYPE_PREFERRED) {
+                       if (mode->hdisplay != native_mode->hdisplay ||
+                           mode->vdisplay != native_mode->vdisplay)
+                               memcpy(native_mode, mode, sizeof(*mode));
+               }
+       }
  
         /* Try to get native mode details from EDID if necessary */
         if (!native_mode->clock) {
-               struct drm_display_mode *t, *mode;
-
                 list_for_each_entry_safe(mode, t, &connector->probed_modes, head) {
                         if (mode->hdisplay == native_mode->hdisplay &&
                             mode->vdisplay == native_mode->vdisplay) {
@@ -489,6 +509,7 @@ static void radeon_fixup_lvds_native_mode(struct drm_encoder *encoder,
                         }
                 }
         }
+
         if (!native_mode->clock) {
                 DRM_DEBUG_KMS("No LVDS native mode details, disabling RMX\n");
                 radeon_encoder->rmx_type = RMX_OFF;
@@ -1276,12 +1297,33 @@ radeon_dp_detect(struct drm_connector *connector, bool force)
                 if (!radeon_dig_connector->edp_on)
                         atombios_set_edp_panel_power(connector,
                                                      ATOM_TRANSMITTER_ACTION_POWER_OFF);
-       } else {
-               /* need to setup ddc on the bridge */
-               if (radeon_connector_encoder_is_dp_bridge(connector)) {
+       } else if (radeon_connector_encoder_is_dp_bridge(connector)) {
+               /* DP bridges are always DP */
+               radeon_dig_connector->dp_sink_type = CONNECTOR_OBJECT_ID_DISPLAYPORT;
+               /* get the DPCD from the bridge */
+               radeon_dp_getdpcd(radeon_connector);
+
+               if (radeon_hpd_sense(rdev, radeon_connector->hpd.hpd))
+                       ret = connector_status_connected;
+               else {
+                       /* need to setup ddc on the bridge */
                         if (encoder)
                                 radeon_atom_ext_encoder_setup_ddc(encoder);
+                       if (radeon_ddc_probe(radeon_connector,
+                                            radeon_connector->requires_extended_probe))
+                               ret = connector_status_connected;
+               }
+
+               if ((ret == connector_status_disconnected) &&
+                   radeon_connector->dac_load_detect) {
+                       struct drm_encoder *encoder = radeon_best_single_encoder(connector);
+                       struct drm_encoder_helper_funcs *encoder_funcs;
+                       if (encoder) {
+                               encoder_funcs = encoder->helper_private;
+                               ret = encoder_funcs->detect(encoder, connector);
+                       }
                 }
+       } else {
                 radeon_dig_connector->dp_sink_type = radeon_dp_getsinktype(radeon_connector);
                 if (radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) {
                         ret = connector_status_connected;
@@ -1297,16 +1339,6 @@ radeon_dp_detect(struct drm_connector *connector, bool force)
                                         ret = connector_status_connected;
                         }
                 }
-
-               if ((ret == connector_status_disconnected) &&
-                   radeon_connector->dac_load_detect) {
-                       struct drm_encoder *encoder = radeon_best_single_encoder(connector);
-                       struct drm_encoder_helper_funcs *encoder_funcs;
-                       if (encoder) {
-                               encoder_funcs = encoder->helper_private;
-                               ret = encoder_funcs->detect(encoder, connector);
-                       }
-               }
         }
  
         radeon_connector_update_scratch_regs(connector, ret);
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c

index 440e6ecccc40054c5620e38db033da76ee3dd3aa..b51e15725c6e45321b088664749a48101e79be28 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -32,6 +32,7 @@
  #include <drm/radeon_drm.h>
  #include <linux/vgaarb.h>
  #include <linux/vga_switcheroo.h>
+#include <linux/efi.h>
  #include "radeon_reg.h"
  #include "radeon.h"
  #include "atom.h"
@@ -300,6 +301,8 @@ void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *mc, u64
                 mc->mc_vram_size = mc->aper_size;
         }
         mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
+       if (radeon_vram_limit && radeon_vram_limit < mc->real_vram_size)
+               mc->real_vram_size = radeon_vram_limit;
         dev_info(rdev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
                         mc->mc_vram_size >> 20, mc->vram_start,
                         mc->vram_end, mc->real_vram_size >> 20);
@@ -348,6 +351,9 @@ bool radeon_card_posted(struct radeon_device *rdev)
  {
         uint32_t reg;
  
+       if (efi_enabled && rdev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE)
+               return false;
+
         /* first check CRTCs */
         if (ASIC_IS_DCE41(rdev)) {
                 reg = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET) |
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c

index 1a858944e4f3f31243ec55d5178ea7cd7b1f3422..6adb3e58affdc3fccbce06e254cf648c6f4de5cb 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -473,8 +473,8 @@ pflip_cleanup:
         spin_lock_irqsave(&dev->event_lock, flags);
         radeon_crtc->unpin_work = NULL;
  unlock_free:
-       drm_gem_object_unreference_unlocked(old_radeon_fb->obj);
         spin_unlock_irqrestore(&dev->event_lock, flags);
+       drm_gem_object_unreference_unlocked(old_radeon_fb->obj);
         radeon_fence_unref(&work->fence);
         kfree(work);
  
@@ -707,16 +707,21 @@ int radeon_ddc_get_modes(struct radeon_connector *radeon_connector)
                 radeon_router_select_ddc_port(radeon_connector);
  
         if ((radeon_connector->base.connector_type == DRM_MODE_CONNECTOR_DisplayPort) ||
-           (radeon_connector->base.connector_type == DRM_MODE_CONNECTOR_eDP)) {
+           (radeon_connector->base.connector_type == DRM_MODE_CONNECTOR_eDP) ||
+           radeon_connector_encoder_is_dp_bridge(&radeon_connector->base)) {
                 struct radeon_connector_atom_dig *dig = radeon_connector->con_priv;
+
                 if ((dig->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT ||
                      dig->dp_sink_type == CONNECTOR_OBJECT_ID_eDP) && dig->dp_i2c_bus)
-                       radeon_connector->edid = drm_get_edid(&radeon_connector->base, &dig->dp_i2c_bus->adapter);
-       }
-       if (!radeon_connector->ddc_bus)
-               return -1;
-       if (!radeon_connector->edid) {
-               radeon_connector->edid = drm_get_edid(&radeon_connector->base, &radeon_connector->ddc_bus->adapter);
+                       radeon_connector->edid = drm_get_edid(&radeon_connector->base,
+                                                             &dig->dp_i2c_bus->adapter);
+               else if (radeon_connector->ddc_bus && !radeon_connector->edid)
+                       radeon_connector->edid = drm_get_edid(&radeon_connector->base,
+                                                             &radeon_connector->ddc_bus->adapter);
+       } else {
+               if (radeon_connector->ddc_bus && !radeon_connector->edid)
+                       radeon_connector->edid = drm_get_edid(&radeon_connector->base,
+                                                             &radeon_connector->ddc_bus->adapter);
         }
  
         if (!radeon_connector->edid) {
diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c

index b293487e5aa3dc2193fee36eb0f9c00fe7080a0c..319d85d7e759b4f0599f2c5df2b9182568bd993d 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_encoders.c
@@ -2323,6 +2323,9 @@ radeon_add_atom_encoder(struct drm_device *dev,
         default:
                 encoder->possible_crtcs = 0x3;
                 break;
+       case 4:
+               encoder->possible_crtcs = 0xf;
+               break;
         case 6:
                 encoder->possible_crtcs = 0x3f;
                 break;
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h

index d09031c03e262606677f5abb1d59576066b1f695..68820f5f630354a91826da1f4a8391e8b92b5e86 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -479,6 +479,7 @@ extern void radeon_dp_set_link_config(struct drm_connector *connector,
                                       struct drm_display_mode *mode);
  extern void radeon_dp_link_train(struct drm_encoder *encoder,
                                  struct drm_connector *connector);
+extern bool radeon_dp_needs_link_train(struct radeon_connector *radeon_connector);
  extern u8 radeon_dp_getsinktype(struct radeon_connector *radeon_connector);
  extern bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector);
  extern void atombios_dig_encoder_setup(struct drm_encoder *encoder, int action, int panel_mode);
diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c

index dee4a0c1b4b2f9e473cc59f1766ffb0e2121ba5f..602fa3541c454f8ac2c058cb5832520fad95612f 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -40,10 +40,14 @@ void radeon_test_moves(struct radeon_device *rdev)
         size = 1024 * 1024;
  
         /* Number of tests =
-        * (Total GTT - IB pool - writeback page - ring buffer) / test size
+        * (Total GTT - IB pool - writeback page - ring buffers) / test size
          */
-       n = ((u32)(rdev->mc.gtt_size - RADEON_IB_POOL_SIZE*64*1024 - RADEON_GPU_PAGE_SIZE -
-            rdev->cp.ring_size)) / size;
+       n = rdev->mc.gtt_size - RADEON_IB_POOL_SIZE*64*1024 - rdev->cp.ring_size;
+       if (rdev->wb.wb_obj)
+               n -= RADEON_GPU_PAGE_SIZE;
+       if (rdev->ih.ring_obj)
+               n -= rdev->ih.ring_size;
+       n /= size;
  
         gtt_obj = kzalloc(n * sizeof(*gtt_obj), GFP_KERNEL);
         if (!gtt_obj) {
@@ -132,9 +136,15 @@ void radeon_test_moves(struct radeon_device *rdev)
                      gtt_start++, vram_start++) {
                         if (*vram_start != gtt_start) {
                                 DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, "
-                                         "expected 0x%p (GTT map 0x%p-0x%p)\n",
-                                         i, *vram_start, gtt_start, gtt_map,
-                                         gtt_end);
+                                         "expected 0x%p (GTT/VRAM offset "
+                                         "0x%16llx/0x%16llx)\n",
+                                         i, *vram_start, gtt_start,
+                                         (unsigned long long)
+                                         (gtt_addr - rdev->mc.gtt_start +
+                                          (void*)gtt_start - gtt_map),
+                                         (unsigned long long)
+                                         (vram_addr - rdev->mc.vram_start +
+                                          (void*)gtt_start - gtt_map));
                                 radeon_bo_kunmap(vram_obj);
                                 goto out_cleanup;
                         }
@@ -175,9 +185,15 @@ void radeon_test_moves(struct radeon_device *rdev)
                      gtt_start++, vram_start++) {
                         if (*gtt_start != vram_start) {
                                 DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, "
-                                         "expected 0x%p (VRAM map 0x%p-0x%p)\n",
-                                         i, *gtt_start, vram_start, vram_map,
-                                         vram_end);
+                                         "expected 0x%p (VRAM/GTT offset "
+                                         "0x%16llx/0x%16llx)\n",
+                                         i, *gtt_start, vram_start,
+                                         (unsigned long long)
+                                         (vram_addr - rdev->mc.vram_start +
+                                          (void*)vram_start - vram_map),
+                                         (unsigned long long)
+                                         (gtt_addr - rdev->mc.gtt_start +
+                                          (void*)vram_start - vram_map));
                                 radeon_bo_kunmap(gtt_obj[i]);
                                 goto out_cleanup;
                         }
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c

index 60125ddba1e93551c41376a3e10e27a72519271e..0b5468bfaf54484531062bec756aa874e844bb3a 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -277,7 +277,12 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
                 DRM_ERROR("Trying to move memory with CP turned off.\n");
                 return -EINVAL;
         }
-       r = radeon_copy(rdev, old_start, new_start, new_mem->num_pages, fence);
+
+       BUILD_BUG_ON((PAGE_SIZE % RADEON_GPU_PAGE_SIZE) != 0);
+
+       r = radeon_copy(rdev, old_start, new_start,
+                       new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */
+                       fence);
         /* FIXME: handle copy error */
         r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL,
                                       evict, no_wait_reserve, no_wait_gpu, new_mem);
@@ -450,6 +455,29 @@ static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_
                         return -EINVAL;
                 mem->bus.base = rdev->mc.aper_base;
                 mem->bus.is_iomem = true;
+#ifdef __alpha__
+               /*
+                * Alpha: use bus.addr to hold the ioremap() return,
+                * so we can modify bus.base below.
+                */
+               if (mem->placement & TTM_PL_FLAG_WC)
+                       mem->bus.addr =
+                               ioremap_wc(mem->bus.base + mem->bus.offset,
+                                          mem->bus.size);
+               else
+                       mem->bus.addr =
+                               ioremap_nocache(mem->bus.base + mem->bus.offset,
+                                               mem->bus.size);
+
+               /*
+                * Alpha: Use just the bus offset plus
+                * the hose/domain memory base for bus.base.
+                * It then can be used to build PTEs for VRAM
+                * access, as done in ttm_bo_vm_fault().
+                */
+               mem->bus.base = (mem->bus.base & 0x0ffffffffUL) +
+                       rdev->ddev->hose->dense_mem_base;
+#endif
                 break;
         default:
                 return -EINVAL;
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c

index 56619f64b6bfa004c9b1900ac3dd36328593a6cf..ef06194c5aa6049c4813db6048f22922b1b5eaed 100644 (file)
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -353,8 +353,10 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc)
  
                 ret = ttm_tt_set_user(bo->ttm, current,
                                       bo->buffer_start, bo->num_pages);
-               if (unlikely(ret != 0))
+               if (unlikely(ret != 0)) {
                         ttm_tt_destroy(bo->ttm);
+                       bo->ttm = NULL;
+               }
                 break;
         default:
                 printk(KERN_ERR TTM_PFX "Illegal buffer object type\n");
@@ -390,10 +392,13 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
          * Create and bind a ttm if required.
          */
  
-       if (!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED) && (bo->ttm == NULL)) {
-               ret = ttm_bo_add_ttm(bo, false);
-               if (ret)
-                       goto out_err;
+       if (!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED)) {
+               if (bo->ttm == NULL) {
+                       bool zero = !(old_man->flags & TTM_MEMTYPE_FLAG_FIXED);
+                       ret = ttm_bo_add_ttm(bo, zero);
+                       if (ret)
+                               goto out_err;
+               }
  
                 ret = ttm_tt_set_placement_caching(bo->ttm, mem->placement);
                 if (ret)
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c

index 77dbf408c0d01d86e2fe27a5d3cbc836d17ab46c..ae3c6f5dd2b71acee36f863deafc3896a874010e 100644 (file)
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -635,13 +635,13 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
                 if (ret)
                         return ret;
  
-               ttm_bo_free_old_node(bo);
                 if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
                     (bo->ttm != NULL)) {
                         ttm_tt_unbind(bo->ttm);
                         ttm_tt_destroy(bo->ttm);
                         bo->ttm = NULL;
                 }
+               ttm_bo_free_old_node(bo);
         } else {
                 /**
                  * This should help pipeline ordinary buffer moves.
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig

index 306b15f39c9c12a8e717699c0d1c25fe396aa714..1130a898712516d2bfc9149bf9c859e401b6ac57 100644 (file)
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -589,6 +589,7 @@ config HID_WACOM_POWER_SUPPLY
  config HID_WIIMOTE
         tristate "Nintendo Wii Remote support"
         depends on BT_HIDP
+       depends on LEDS_CLASS
         ---help---
         Support for the Nintendo Wii Remote bluetooth device.
  
diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c

index b85744fe846477221ad02221f78bb397c603f54d..18b3bc646bf322ee4a7d96f709bc667dd6a85c06 100644 (file)
--- a/drivers/hid/hid-apple.c
+++ b/drivers/hid/hid-apple.c
@@ -444,6 +444,12 @@ static const struct hid_device_id apple_devices[] = {
         { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_JIS),
                 .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN |
                         APPLE_RDESC_JIS },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI),
+               .driver_data = APPLE_HAS_FN },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO),
+               .driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_JIS),
+               .driver_data = APPLE_HAS_FN },
         { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI),
                 .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN },
         { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO),
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c

index 1a5cf0c9cfcade20b35386e183f36a69155f5a7a..242353df3dc450bbde4186f04740d66b422a970f 100644 (file)
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1340,6 +1340,9 @@ static const struct hid_device_id hid_have_special_driver[] = {
         { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI) },
         { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ISO) },
         { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_JIS) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_JIS) },
         { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI) },
         { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO) },
         { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS) },
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h

index db63ccf21cc867b3d6392c994d241ba7f0585a85..7484e1b67249578a9e58ec9e3dcdf32bc6d84315 100644 (file)
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -109,6 +109,9 @@
  #define USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI   0x0245
  #define USB_DEVICE_ID_APPLE_WELLSPRING5_ISO    0x0246
  #define USB_DEVICE_ID_APPLE_WELLSPRING5_JIS    0x0247
+#define USB_DEVICE_ID_APPLE_ALU_REVB_ANSI      0x024f
+#define USB_DEVICE_ID_APPLE_ALU_REVB_ISO       0x0250
+#define USB_DEVICE_ID_APPLE_ALU_REVB_JIS       0x0251
  #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI  0x0239
  #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO   0x023a
  #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS   0x023b
@@ -274,6 +277,7 @@
  #define USB_DEVICE_ID_PENPOWER         0x00f4
  
  #define USB_VENDOR_ID_GREENASIA                0x0e8f
+#define USB_DEVICE_ID_GREENASIA_DUAL_USB_JOYPAD        0x3013
  
  #define USB_VENDOR_ID_GRETAGMACBETH    0x0971
  #define USB_DEVICE_ID_GRETAGMACBETH_HUEY       0x2005
@@ -576,6 +580,9 @@
  #define USB_DEVICE_ID_SAMSUNG_IR_REMOTE        0x0001
  #define USB_DEVICE_ID_SAMSUNG_WIRELESS_KBD_MOUSE       0x0600
  
+#define USB_VENDOR_ID_SIGMA_MICRO      0x1c4f
+#define USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD     0x0002
+
  #define USB_VENDOR_ID_SKYCABLE                 0x1223
  #define        USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER       0x3F07
  
diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c

index 0ec91c18a4216a52a4a3c7d85292a22f61a6e828..f0fbd7bd239e389e11e21d55ef108b5e8066984d 100644 (file)
--- a/drivers/hid/hid-magicmouse.c
+++ b/drivers/hid/hid-magicmouse.c
@@ -81,6 +81,28 @@ MODULE_PARM_DESC(report_undeciphered, "Report undeciphered multi-touch state fie
  #define NO_TOUCHES -1
  #define SINGLE_TOUCH_UP -2
  
+/* Touch surface information. Dimension is in hundredths of a mm, min and max
+ * are in units. */
+#define MOUSE_DIMENSION_X (float)9056
+#define MOUSE_MIN_X -1100
+#define MOUSE_MAX_X 1258
+#define MOUSE_RES_X ((MOUSE_MAX_X - MOUSE_MIN_X) / (MOUSE_DIMENSION_X / 100))
+#define MOUSE_DIMENSION_Y (float)5152
+#define MOUSE_MIN_Y -1589
+#define MOUSE_MAX_Y 2047
+#define MOUSE_RES_Y ((MOUSE_MAX_Y - MOUSE_MIN_Y) / (MOUSE_DIMENSION_Y / 100))
+
+#define TRACKPAD_DIMENSION_X (float)13000
+#define TRACKPAD_MIN_X -2909
+#define TRACKPAD_MAX_X 3167
+#define TRACKPAD_RES_X \
+       ((TRACKPAD_MAX_X - TRACKPAD_MIN_X) / (TRACKPAD_DIMENSION_X / 100))
+#define TRACKPAD_DIMENSION_Y (float)11000
+#define TRACKPAD_MIN_Y -2456
+#define TRACKPAD_MAX_Y 2565
+#define TRACKPAD_RES_Y \
+       ((TRACKPAD_MAX_Y - TRACKPAD_MIN_Y) / (TRACKPAD_DIMENSION_Y / 100))
+
  /**
   * struct magicmouse_sc - Tracks Magic Mouse-specific data.
   * @input: Input device through which we report events.
@@ -406,17 +428,31 @@ static void magicmouse_setup_input(struct input_dev *input, struct hid_device *h
                  * inverse of the reported Y.
                  */
                 if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) {
-                       input_set_abs_params(input, ABS_MT_POSITION_X, -1100,
-                               1358, 4, 0);
-                       input_set_abs_params(input, ABS_MT_POSITION_Y, -1589,
-                               2047, 4, 0);
+                       input_set_abs_params(input, ABS_MT_POSITION_X,
+                               MOUSE_MIN_X, MOUSE_MAX_X, 4, 0);
+                       input_set_abs_params(input, ABS_MT_POSITION_Y,
+                               MOUSE_MIN_Y, MOUSE_MAX_Y, 4, 0);
+
+                       input_abs_set_res(input, ABS_MT_POSITION_X,
+                               MOUSE_RES_X);
+                       input_abs_set_res(input, ABS_MT_POSITION_Y,
+                               MOUSE_RES_Y);
                 } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */
-                       input_set_abs_params(input, ABS_X, -2909, 3167, 4, 0);
-                       input_set_abs_params(input, ABS_Y, -2456, 2565, 4, 0);
-                       input_set_abs_params(input, ABS_MT_POSITION_X, -2909,
-                               3167, 4, 0);
-                       input_set_abs_params(input, ABS_MT_POSITION_Y, -2456,
-                               2565, 4, 0);
+                       input_set_abs_params(input, ABS_X, TRACKPAD_MIN_X,
+                               TRACKPAD_MAX_X, 4, 0);
+                       input_set_abs_params(input, ABS_Y, TRACKPAD_MIN_Y,
+                               TRACKPAD_MAX_Y, 4, 0);
+                       input_set_abs_params(input, ABS_MT_POSITION_X,
+                               TRACKPAD_MIN_X, TRACKPAD_MAX_X, 4, 0);
+                       input_set_abs_params(input, ABS_MT_POSITION_Y,
+                               TRACKPAD_MIN_Y, TRACKPAD_MAX_Y, 4, 0);
+
+                       input_abs_set_res(input, ABS_X, TRACKPAD_RES_X);
+                       input_abs_set_res(input, ABS_Y, TRACKPAD_RES_Y);
+                       input_abs_set_res(input, ABS_MT_POSITION_X,
+                               TRACKPAD_RES_X);
+                       input_abs_set_res(input, ABS_MT_POSITION_Y,
+                               TRACKPAD_RES_Y);
                 }
  
                 input_set_events_per_packet(input, 60);
@@ -501,9 +537,17 @@ static int magicmouse_probe(struct hid_device *hdev,
         }
         report->size = 6;
  
+       /*
+        * Some devices repond with 'invalid report id' when feature
+        * report switching it into multitouch mode is sent to it.
+        *
+        * This results in -EIO from the _raw low-level transport callback,
+        * but there seems to be no other way of switching the mode.
+        * Thus the super-ugly hacky success check below.
+        */
         ret = hdev->hid_output_raw_report(hdev, feature, sizeof(feature),
                         HID_FEATURE_REPORT);
-       if (ret != sizeof(feature)) {
+       if (ret != -EIO && ret != sizeof(feature)) {
                 hid_err(hdev, "unable to request touch data (%d)\n", ret);
                 goto err_stop_hw;
         }
diff --git a/drivers/hid/hid-wacom.c b/drivers/hid/hid-wacom.c

index 06888323828c3b18a8e162402193eeb6aec72b84..72ca689b64741c3cbb41d6041ce44d411534dd5d 100644 (file)
--- a/drivers/hid/hid-wacom.c
+++ b/drivers/hid/hid-wacom.c
@@ -353,11 +353,7 @@ static int wacom_probe(struct hid_device *hdev,
         if (ret) {
                 hid_warn(hdev, "can't create sysfs battery attribute, err: %d\n",
                          ret);
-               /*
-                * battery attribute is not critical for the tablet, but if it
-                * failed then there is no need to create ac attribute
-                */
-               goto move_on;
+               goto err_battery;
         }
  
         wdata->ac.properties = wacom_ac_props;
@@ -371,18 +367,14 @@ static int wacom_probe(struct hid_device *hdev,
         if (ret) {
                 hid_warn(hdev,
                          "can't create ac battery attribute, err: %d\n", ret);
-               /*
-                * ac attribute is not critical for the tablet, but if it
-                * failed then we don't want to battery attribute to exist
-                */
-               power_supply_unregister(&wdata->battery);
+               goto err_ac;
         }
-
-move_on:
  #endif
         hidinput = list_entry(hdev->inputs.next, struct hid_input, list);
         input = hidinput->input;
  
+       __set_bit(INPUT_PROP_POINTER, input->propbit);
+
         /* Basics */
         input->evbit[0] |= BIT(EV_KEY) | BIT(EV_ABS) | BIT(EV_REL);
  
@@ -416,6 +408,13 @@ move_on:
  
         return 0;
  
+#ifdef CONFIG_HID_WACOM_POWER_SUPPLY
+err_ac:
+       power_supply_unregister(&wdata->battery);
+err_battery:
+       device_remove_file(&hdev->dev, &dev_attr_speed);
+       hid_hw_stop(hdev);
+#endif
  err_free:
         kfree(wdata);
         return ret;
@@ -426,6 +425,7 @@ static void wacom_remove(struct hid_device *hdev)
  #ifdef CONFIG_HID_WACOM_POWER_SUPPLY
         struct wacom_data *wdata = hid_get_drvdata(hdev);
  #endif
+       device_remove_file(&hdev->dev, &dev_attr_speed);
         hid_hw_stop(hdev);
  
  #ifdef CONFIG_HID_WACOM_POWER_SUPPLY
diff --git a/drivers/hid/hid-wiimote.c b/drivers/hid/hid-wiimote.c

index a594383ce03db0458ea13f34638ed7f0eafb8b19..85a02e5f9fe873e5097a55861ec55c77f50ee05f 100644 (file)
--- a/drivers/hid/hid-wiimote.c
+++ b/drivers/hid/hid-wiimote.c
@@ -10,10 +10,10 @@
   * any later version.
   */
  
-#include <linux/atomic.h>
  #include <linux/device.h>
  #include <linux/hid.h>
  #include <linux/input.h>
+#include <linux/leds.h>
  #include <linux/module.h>
  #include <linux/spinlock.h>
  #include "hid-ids.h"
@@ -33,9 +33,9 @@ struct wiimote_state {
  };
  
  struct wiimote_data {
-       atomic_t ready;
         struct hid_device *hdev;
         struct input_dev *input;
+       struct led_classdev *leds[4];
  
         spinlock_t qlock;
         __u8 head;
@@ -53,8 +53,15 @@ struct wiimote_data {
  #define WIIPROTO_FLAGS_LEDS (WIIPROTO_FLAG_LED1 | WIIPROTO_FLAG_LED2 | \
                                         WIIPROTO_FLAG_LED3 | WIIPROTO_FLAG_LED4)
  
+/* return flag for led \num */
+#define WIIPROTO_FLAG_LED(num) (WIIPROTO_FLAG_LED1 << (num - 1))
+
  enum wiiproto_reqs {
+       WIIPROTO_REQ_NULL = 0x0,
         WIIPROTO_REQ_LED = 0x11,
+       WIIPROTO_REQ_DRM = 0x12,
+       WIIPROTO_REQ_STATUS = 0x20,
+       WIIPROTO_REQ_RETURN = 0x22,
         WIIPROTO_REQ_DRM_K = 0x30,
  };
  
@@ -87,9 +94,6 @@ static __u16 wiiproto_keymap[] = {
         BTN_MODE,       /* WIIPROTO_KEY_HOME */
  };
  
-#define dev_to_wii(pdev) hid_get_drvdata(container_of(pdev, struct hid_device, \
-                                                                       dev))
-
  static ssize_t wiimote_hid_send(struct hid_device *hdev, __u8 *buffer,
                                                                 size_t count)
  {
@@ -192,66 +196,96 @@ static void wiiproto_req_leds(struct wiimote_data *wdata, int leds)
         wiimote_queue(wdata, cmd, sizeof(cmd));
  }
  
-#define wiifs_led_show_set(num)                                                \
-static ssize_t wiifs_led_show_##num(struct device *dev,                        \
-                       struct device_attribute *attr, char *buf)       \
-{                                                                      \
-       struct wiimote_data *wdata = dev_to_wii(dev);                   \
-       unsigned long flags;                                            \
-       int state;                                                      \
-                                                                       \
-       if (!atomic_read(&wdata->ready))                                \
-               return -EBUSY;                                          \
-                                                                       \
-       spin_lock_irqsave(&wdata->state.lock, flags);                   \
-       state = !!(wdata->state.flags & WIIPROTO_FLAG_LED##num);        \
-       spin_unlock_irqrestore(&wdata->state.lock, flags);              \
-                                                                       \
-       return sprintf(buf, "%d\n", state);                             \
-}                                                                      \
-static ssize_t wiifs_led_set_##num(struct device *dev,                 \
-       struct device_attribute *attr, const char *buf, size_t count)   \
-{                                                                      \
-       struct wiimote_data *wdata = dev_to_wii(dev);                   \
-       int tmp = simple_strtoul(buf, NULL, 10);                        \
-       unsigned long flags;                                            \
-       __u8 state;                                                     \
-                                                                       \
-       if (!atomic_read(&wdata->ready))                                \
-               return -EBUSY;                                          \
-                                                                       \
-       spin_lock_irqsave(&wdata->state.lock, flags);                   \
-                                                                       \
-       state = wdata->state.flags;                                     \
-                                                                       \
-       if (tmp)                                                        \
-               wiiproto_req_leds(wdata, state | WIIPROTO_FLAG_LED##num);\
-       else                                                            \
-               wiiproto_req_leds(wdata, state & ~WIIPROTO_FLAG_LED##num);\
-                                                                       \
-       spin_unlock_irqrestore(&wdata->state.lock, flags);              \
-                                                                       \
-       return count;                                                   \
-}                                                                      \
-static DEVICE_ATTR(led##num, S_IRUGO | S_IWUSR, wiifs_led_show_##num,  \
-                                               wiifs_led_set_##num)
-
-wiifs_led_show_set(1);
-wiifs_led_show_set(2);
-wiifs_led_show_set(3);
-wiifs_led_show_set(4);
+/*
+ * Check what peripherals of the wiimote are currently
+ * active and select a proper DRM that supports all of
+ * the requested data inputs.
+ */
+static __u8 select_drm(struct wiimote_data *wdata)
+{
+       return WIIPROTO_REQ_DRM_K;
+}
+
+static void wiiproto_req_drm(struct wiimote_data *wdata, __u8 drm)
+{
+       __u8 cmd[3];
+
+       if (drm == WIIPROTO_REQ_NULL)
+               drm = select_drm(wdata);
+
+       cmd[0] = WIIPROTO_REQ_DRM;
+       cmd[1] = 0;
+       cmd[2] = drm;
+
+       wiimote_queue(wdata, cmd, sizeof(cmd));
+}
+
+static enum led_brightness wiimote_leds_get(struct led_classdev *led_dev)
+{
+       struct wiimote_data *wdata;
+       struct device *dev = led_dev->dev->parent;
+       int i;
+       unsigned long flags;
+       bool value = false;
+
+       wdata = hid_get_drvdata(container_of(dev, struct hid_device, dev));
+
+       for (i = 0; i < 4; ++i) {
+               if (wdata->leds[i] == led_dev) {
+                       spin_lock_irqsave(&wdata->state.lock, flags);
+                       value = wdata->state.flags & WIIPROTO_FLAG_LED(i + 1);
+                       spin_unlock_irqrestore(&wdata->state.lock, flags);
+                       break;
+               }
+       }
+
+       return value ? LED_FULL : LED_OFF;
+}
+
+static void wiimote_leds_set(struct led_classdev *led_dev,
+                                               enum led_brightness value)
+{
+       struct wiimote_data *wdata;
+       struct device *dev = led_dev->dev->parent;
+       int i;
+       unsigned long flags;
+       __u8 state, flag;
+
+       wdata = hid_get_drvdata(container_of(dev, struct hid_device, dev));
+
+       for (i = 0; i < 4; ++i) {
+               if (wdata->leds[i] == led_dev) {
+                       flag = WIIPROTO_FLAG_LED(i + 1);
+                       spin_lock_irqsave(&wdata->state.lock, flags);
+                       state = wdata->state.flags;
+                       if (value == LED_OFF)
+                               wiiproto_req_leds(wdata, state & ~flag);
+                       else
+                               wiiproto_req_leds(wdata, state | flag);
+                       spin_unlock_irqrestore(&wdata->state.lock, flags);
+                       break;
+               }
+       }
+}
  
  static int wiimote_input_event(struct input_dev *dev, unsigned int type,
                                                 unsigned int code, int value)
+{
+       return 0;
+}
+
+static int wiimote_input_open(struct input_dev *dev)
  {
         struct wiimote_data *wdata = input_get_drvdata(dev);
  
-       if (!atomic_read(&wdata->ready))
-               return -EBUSY;
-       /* smp_rmb: Make sure wdata->xy is available when wdata->ready is 1 */
-       smp_rmb();
+       return hid_hw_open(wdata->hdev);
+}
  
-       return 0;
+static void wiimote_input_close(struct input_dev *dev)
+{
+       struct wiimote_data *wdata = input_get_drvdata(dev);
+
+       hid_hw_close(wdata->hdev);
  }
  
  static void handler_keys(struct wiimote_data *wdata, const __u8 *payload)
@@ -281,6 +315,26 @@ static void handler_keys(struct wiimote_data *wdata, const __u8 *payload)
         input_sync(wdata->input);
  }
  
+static void handler_status(struct wiimote_data *wdata, const __u8 *payload)
+{
+       handler_keys(wdata, payload);
+
+       /* on status reports the drm is reset so we need to resend the drm */
+       wiiproto_req_drm(wdata, WIIPROTO_REQ_NULL);
+}
+
+static void handler_return(struct wiimote_data *wdata, const __u8 *payload)
+{
+       __u8 err = payload[3];
+       __u8 cmd = payload[2];
+
+       handler_keys(wdata, payload);
+
+       if (err)
+               hid_warn(wdata->hdev, "Remote error %hhu on req %hhu\n", err,
+                                                                       cmd);
+}
+
  struct wiiproto_handler {
         __u8 id;
         size_t size;
@@ -288,6 +342,8 @@ struct wiiproto_handler {
  };
  
  static struct wiiproto_handler handlers[] = {
+       { .id = WIIPROTO_REQ_STATUS, .size = 6, .func = handler_status },
+       { .id = WIIPROTO_REQ_RETURN, .size = 4, .func = handler_return },
         { .id = WIIPROTO_REQ_DRM_K, .size = 2, .func = handler_keys },
         { .id = 0 }
  };
@@ -300,11 +356,6 @@ static int wiimote_hid_event(struct hid_device *hdev, struct hid_report *report,
         int i;
         unsigned long flags;
  
-       if (!atomic_read(&wdata->ready))
-               return -EBUSY;
-       /* smp_rmb: Make sure wdata->xy is available when wdata->ready is 1 */
-       smp_rmb();
-
         if (size < 1)
                 return -EINVAL;
  
@@ -321,6 +372,58 @@ static int wiimote_hid_event(struct hid_device *hdev, struct hid_report *report,
         return 0;
  }
  
+static void wiimote_leds_destroy(struct wiimote_data *wdata)
+{
+       int i;
+       struct led_classdev *led;
+
+       for (i = 0; i < 4; ++i) {
+               if (wdata->leds[i]) {
+                       led = wdata->leds[i];
+                       wdata->leds[i] = NULL;
+                       led_classdev_unregister(led);
+                       kfree(led);
+               }
+       }
+}
+
+static int wiimote_leds_create(struct wiimote_data *wdata)
+{
+       int i, ret;
+       struct device *dev = &wdata->hdev->dev;
+       size_t namesz = strlen(dev_name(dev)) + 9;
+       struct led_classdev *led;
+       char *name;
+
+       for (i = 0; i < 4; ++i) {
+               led = kzalloc(sizeof(struct led_classdev) + namesz, GFP_KERNEL);
+               if (!led) {
+                       ret = -ENOMEM;
+                       goto err;
+               }
+               name = (void*)&led[1];
+               snprintf(name, namesz, "%s:blue:p%d", dev_name(dev), i);
+               led->name = name;
+               led->brightness = 0;
+               led->max_brightness = 1;
+               led->brightness_get = wiimote_leds_get;
+               led->brightness_set = wiimote_leds_set;
+
+               ret = led_classdev_register(dev, led);
+               if (ret) {
+                       kfree(led);
+                       goto err;
+               }
+               wdata->leds[i] = led;
+       }
+
+       return 0;
+
+err:
+       wiimote_leds_destroy(wdata);
+       return ret;
+}
+
  static struct wiimote_data *wiimote_create(struct hid_device *hdev)
  {
         struct wiimote_data *wdata;
@@ -341,6 +444,8 @@ static struct wiimote_data *wiimote_create(struct hid_device *hdev)
  
         input_set_drvdata(wdata->input, wdata);
         wdata->input->event = wiimote_input_event;
+       wdata->input->open = wiimote_input_open;
+       wdata->input->close = wiimote_input_close;
         wdata->input->dev.parent = &wdata->hdev->dev;
         wdata->input->id.bustype = wdata->hdev->bus;
         wdata->input->id.vendor = wdata->hdev->vendor;
@@ -362,6 +467,12 @@ static struct wiimote_data *wiimote_create(struct hid_device *hdev)
  
  static void wiimote_destroy(struct wiimote_data *wdata)
  {
+       wiimote_leds_destroy(wdata);
+
+       input_unregister_device(wdata->input);
+       cancel_work_sync(&wdata->worker);
+       hid_hw_stop(wdata->hdev);
+
         kfree(wdata);
  }
  
@@ -377,19 +488,6 @@ static int wiimote_hid_probe(struct hid_device *hdev,
                 return -ENOMEM;
         }
  
-       ret = device_create_file(&hdev->dev, &dev_attr_led1);
-       if (ret)
-               goto err;
-       ret = device_create_file(&hdev->dev, &dev_attr_led2);
-       if (ret)
-               goto err;
-       ret = device_create_file(&hdev->dev, &dev_attr_led3);
-       if (ret)
-               goto err;
-       ret = device_create_file(&hdev->dev, &dev_attr_led4);
-       if (ret)
-               goto err;
-
         ret = hid_parse(hdev);
         if (ret) {
                 hid_err(hdev, "HID parse failed\n");
@@ -408,9 +506,10 @@ static int wiimote_hid_probe(struct hid_device *hdev,
                 goto err_stop;
         }
  
-       /* smp_wmb: Write wdata->xy first before wdata->ready is set to 1 */
-       smp_wmb();
-       atomic_set(&wdata->ready, 1);
+       ret = wiimote_leds_create(wdata);
+       if (ret)
+               goto err_free;
+
         hid_info(hdev, "New device registered\n");
  
         /* by default set led1 after device initialization */
@@ -420,15 +519,15 @@ static int wiimote_hid_probe(struct hid_device *hdev,
  
         return 0;
  
+err_free:
+       wiimote_destroy(wdata);
+       return ret;
+
  err_stop:
         hid_hw_stop(hdev);
  err:
         input_free_device(wdata->input);
-       device_remove_file(&hdev->dev, &dev_attr_led1);
-       device_remove_file(&hdev->dev, &dev_attr_led2);
-       device_remove_file(&hdev->dev, &dev_attr_led3);
-       device_remove_file(&hdev->dev, &dev_attr_led4);
-       wiimote_destroy(wdata);
+       kfree(wdata);
         return ret;
  }
  
@@ -437,16 +536,6 @@ static void wiimote_hid_remove(struct hid_device *hdev)
         struct wiimote_data *wdata = hid_get_drvdata(hdev);
  
         hid_info(hdev, "Device removed\n");
-
-       device_remove_file(&hdev->dev, &dev_attr_led1);
-       device_remove_file(&hdev->dev, &dev_attr_led2);
-       device_remove_file(&hdev->dev, &dev_attr_led3);
-       device_remove_file(&hdev->dev, &dev_attr_led4);
-
-       hid_hw_stop(hdev);
-       input_unregister_device(wdata->input);
-
-       cancel_work_sync(&wdata->worker);
         wiimote_destroy(wdata);
  }
  
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c

index 621959d5cc42c6b6798328fe32ce32072fa8669a..3146fdcda272cd33f199f13b686d28c092a4921c 100644 (file)
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -47,6 +47,7 @@ static const struct hid_blacklist {
         { USB_VENDOR_ID_AFATECH, USB_DEVICE_ID_AFATECH_AF9016, HID_QUIRK_FULLSPEED_INTERVAL },
  
         { USB_VENDOR_ID_ETURBOTOUCH, USB_DEVICE_ID_ETURBOTOUCH, HID_QUIRK_MULTI_INPUT },
+       { USB_VENDOR_ID_GREENASIA, USB_DEVICE_ID_GREENASIA_DUAL_USB_JOYPAD, HID_QUIRK_MULTI_INPUT },
         { USB_VENDOR_ID_PANTHERLORD, USB_DEVICE_ID_PANTHERLORD_TWIN_USB_JOYSTICK, HID_QUIRK_MULTI_INPUT | HID_QUIRK_SKIP_OUTPUT_REPORTS },
         { USB_VENDOR_ID_PLAYDOTCOM, USB_DEVICE_ID_PLAYDOTCOM_EMS_USBII, HID_QUIRK_MULTI_INPUT },
         { USB_VENDOR_ID_TOUCHPACK, USB_DEVICE_ID_TOUCHPACK_RTS, HID_QUIRK_MULTI_INPUT },
@@ -89,6 +90,7 @@ static const struct hid_blacklist {
  
         { USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_MULTI_TOUCH, HID_QUIRK_MULTI_INPUT },
         { USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS, HID_QUIRK_MULTI_INPUT },
+       { USB_VENDOR_ID_SIGMA_MICRO, USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD, HID_QUIRK_NO_INIT_REPORTS },
         { 0, 0 }
  };
  
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c

index 59d83e83da7fe0b5de39f7ce6e001899babfb7be..4112576761330fe7af5c9ee26f8715e6c4a8bc73 100644 (file)
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -601,7 +601,12 @@ static int create_core_data(struct platform_data *pdata,
         err = rdmsr_safe_on_cpu(cpu, tdata->intrpt_reg, &eax, &edx);
         if (!err) {
                 tdata->attr_size += MAX_THRESH_ATTRS;
-               tdata->ttarget = tdata->tjmax - ((eax >> 16) & 0x7f) * 1000;
+               tdata->tmin = tdata->tjmax -
+                             ((eax & THERM_MASK_THRESHOLD0) >>
+                              THERM_SHIFT_THRESHOLD0) * 1000;
+               tdata->ttarget = tdata->tjmax -
+                                ((eax & THERM_MASK_THRESHOLD1) >>
+                                 THERM_SHIFT_THRESHOLD1) * 1000;
         }
  
         pdata->core_data[attr_no] = tdata;
diff --git a/drivers/hwmon/i5k_amb.c b/drivers/hwmon/i5k_amb.c

index c4c40be0edbfd6afb00a7e6dbf6a0b20a5544b3f..d22f241b6a67ea8b1f2030a54a913360db615a62 100644 (file)
--- a/drivers/hwmon/i5k_amb.c
+++ b/drivers/hwmon/i5k_amb.c
@@ -114,7 +114,6 @@ struct i5k_amb_data {
         void __iomem *amb_mmio;
         struct i5k_device_attribute *attrs;
         unsigned int num_attrs;
-       unsigned long chipset_id;
  };
  
  static ssize_t show_name(struct device *dev, struct device_attribute *devattr,
@@ -444,8 +443,6 @@ static int __devinit i5k_find_amb_registers(struct i5k_amb_data *data,
                 goto out;
         }
  
-       data->chipset_id = devid;
-
         res = 0;
  out:
         pci_dev_put(pcidev);
@@ -478,23 +475,13 @@ out:
         return res;
  }
  
-static unsigned long i5k_channel_pci_id(struct i5k_amb_data *data,
-                                       unsigned long channel)
-{
-       switch (data->chipset_id) {
-       case PCI_DEVICE_ID_INTEL_5000_ERR:
-               return PCI_DEVICE_ID_INTEL_5000_FBD0 + channel;
-       case PCI_DEVICE_ID_INTEL_5400_ERR:
-               return PCI_DEVICE_ID_INTEL_5400_FBD0 + channel;
-       default:
-               BUG();
-       }
-}
-
-static unsigned long chipset_ids[] = {
-       PCI_DEVICE_ID_INTEL_5000_ERR,
-       PCI_DEVICE_ID_INTEL_5400_ERR,
-       0
+static struct {
+       unsigned long err;
+       unsigned long fbd0;
+} chipset_ids[] __devinitdata  = {
+       { PCI_DEVICE_ID_INTEL_5000_ERR, PCI_DEVICE_ID_INTEL_5000_FBD0 },
+       { PCI_DEVICE_ID_INTEL_5400_ERR, PCI_DEVICE_ID_INTEL_5400_FBD0 },
+       { 0, 0 }
  };
  
  #ifdef MODULE
@@ -510,8 +497,7 @@ static int __devinit i5k_amb_probe(struct platform_device *pdev)
  {
         struct i5k_amb_data *data;
         struct resource *reso;
-       int i;
-       int res = -ENODEV;
+       int i, res;
  
         data = kzalloc(sizeof(*data), GFP_KERNEL);
         if (!data)
@@ -520,22 +506,22 @@ static int __devinit i5k_amb_probe(struct platform_device *pdev)
         /* Figure out where the AMB registers live */
         i = 0;
         do {
-               res = i5k_find_amb_registers(data, chipset_ids[i]);
+               res = i5k_find_amb_registers(data, chipset_ids[i].err);
+               if (res == 0)
+                       break;
                 i++;
-       } while (res && chipset_ids[i]);
+       } while (chipset_ids[i].err);
  
         if (res)
                 goto err;
  
         /* Copy the DIMM presence map for the first two channels */
-       res = i5k_channel_probe(&data->amb_present[0],
-                               i5k_channel_pci_id(data, 0));
+       res = i5k_channel_probe(&data->amb_present[0], chipset_ids[i].fbd0);
         if (res)
                 goto err;
  
         /* Copy the DIMM presence map for the optional second two channels */
-       i5k_channel_probe(&data->amb_present[2],
-                         i5k_channel_pci_id(data, 1));
+       i5k_channel_probe(&data->amb_present[2], chipset_ids[i].fbd0 + 1);
  
         /* Set up resource regions */
         reso = request_mem_region(data->amb_base, data->amb_len, DRVNAME);
diff --git a/drivers/hwmon/ibmaem.c b/drivers/hwmon/ibmaem.c

index 1a409c5bc9bce687922ce5389150599c6e928f39..c316294c48b47ba848f5844270e0394d13e2be87 100644 (file)
--- a/drivers/hwmon/ibmaem.c
+++ b/drivers/hwmon/ibmaem.c
@@ -432,13 +432,15 @@ static int aem_read_sensor(struct aem_data *data, u8 elt, u8 reg,
         aem_send_message(ipmi);
  
         res = wait_for_completion_timeout(&ipmi->read_complete, IPMI_TIMEOUT);
-       if (!res)
-               return -ETIMEDOUT;
+       if (!res) {
+               res = -ETIMEDOUT;
+               goto out;
+       }
  
         if (ipmi->rx_result || ipmi->rx_msg_len != rs_size ||
             memcmp(&rs_resp->id, &system_x_id, sizeof(system_x_id))) {
-               kfree(rs_resp);
-               return -ENOENT;
+               res = -ENOENT;
+               goto out;
         }
  
         switch (size) {
@@ -463,8 +465,11 @@ static int aem_read_sensor(struct aem_data *data, u8 elt, u8 reg,
                 break;
         }
         }
+       res = 0;
  
-       return 0;
+out:
+       kfree(rs_resp);
+       return res;
  }
  
  /* Update AEM energy registers */
diff --git a/drivers/hwmon/max16065.c b/drivers/hwmon/max16065.c

index d94a24fdf4ba2a07ad006ebeae536745d5e1a25d..dd2d7b9620c2bde5bb8cac1a2c8ac48879a1b290 100644 (file)
--- a/drivers/hwmon/max16065.c
+++ b/drivers/hwmon/max16065.c
@@ -124,7 +124,7 @@ static inline int MV_TO_LIMIT(int mv, int range)
  
  static inline int ADC_TO_CURR(int adc, int gain)
  {
-       return adc * 1400000 / gain * 255;
+       return adc * 1400000 / (gain * 255);
  }
  
  /*
diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c

index d7926f4336b5f5b8d712e6e74c6118f8e2a24d13..eab11615dced6b54e71996bfd4df60c159bbe570 100644 (file)
--- a/drivers/hwmon/ntc_thermistor.c
+++ b/drivers/hwmon/ntc_thermistor.c
@@ -211,8 +211,7 @@ static int lookup_comp(struct ntc_data *data,
         if (data->comp[mid].ohm <= ohm) {
                 *i_low = mid;
                 *i_high = mid - 1;
-       }
-       if (data->comp[mid].ohm > ohm) {
+       } else {
                 *i_low = mid + 1;
                 *i_high = mid;
         }
diff --git a/drivers/hwmon/pmbus/lm25066.c b/drivers/hwmon/pmbus/lm25066.c

index d4bc114572de2d8232497d394204c1cfc0e28d10..ac254fba551b21cc30b4a8276683cb85ab57ea67 100644 (file)
--- a/drivers/hwmon/pmbus/lm25066.c
+++ b/drivers/hwmon/pmbus/lm25066.c
@@ -161,6 +161,17 @@ static int lm25066_write_word_data(struct i2c_client *client, int page, int reg,
         return ret;
  }
  
+static int lm25066_write_byte(struct i2c_client *client, int page, u8 value)
+{
+       if (page > 1)
+               return -EINVAL;
+
+       if (page == 0)
+               return pmbus_write_byte(client, 0, value);
+
+       return 0;
+}
+
  static int lm25066_probe(struct i2c_client *client,
                           const struct i2c_device_id *id)
  {
@@ -204,6 +215,7 @@ static int lm25066_probe(struct i2c_client *client,
  
         info->read_word_data = lm25066_read_word_data;
         info->write_word_data = lm25066_write_word_data;
+       info->write_byte = lm25066_write_byte;
  
         switch (id->driver_data) {
         case lm25066:
diff --git a/drivers/hwmon/pmbus/pmbus.h b/drivers/hwmon/pmbus/pmbus.h

index 0808d986d75b4b5efb59a3b7c64ec6f539fc6438..a6ae20ffef6b64d96f8864c27c864996fd893168 100644 (file)
--- a/drivers/hwmon/pmbus/pmbus.h
+++ b/drivers/hwmon/pmbus/pmbus.h
@@ -325,6 +325,7 @@ struct pmbus_driver_info {
         int (*read_word_data)(struct i2c_client *client, int page, int reg);
         int (*write_word_data)(struct i2c_client *client, int page, int reg,
                                u16 word);
+       int (*write_byte)(struct i2c_client *client, int page, u8 value);
         /*
          * The identify function determines supported PMBus functionality.
          * This function is only necessary if a chip driver supports multiple
diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c

index 5c1b6cf317012682875cd2d74e6388189b33009d..397fc59b5682e44901b29d9857e719c34697e8c5 100644 (file)
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -182,6 +182,24 @@ int pmbus_write_byte(struct i2c_client *client, int page, u8 value)
  }
  EXPORT_SYMBOL_GPL(pmbus_write_byte);
  
+/*
+ * _pmbus_write_byte() is similar to pmbus_write_byte(), but checks if
+ * a device specific mapping funcion exists and calls it if necessary.
+ */
+static int _pmbus_write_byte(struct i2c_client *client, int page, u8 value)
+{
+       struct pmbus_data *data = i2c_get_clientdata(client);
+       const struct pmbus_driver_info *info = data->info;
+       int status;
+
+       if (info->write_byte) {
+               status = info->write_byte(client, page, value);
+               if (status != -ENODATA)
+                       return status;
+       }
+       return pmbus_write_byte(client, page, value);
+}
+
  int pmbus_write_word_data(struct i2c_client *client, u8 page, u8 reg, u16 word)
  {
         int rv;
@@ -281,7 +299,7 @@ static int _pmbus_read_byte_data(struct i2c_client *client, int page, int reg)
  
  static void pmbus_clear_fault_page(struct i2c_client *client, int page)
  {
-       pmbus_write_byte(client, page, PMBUS_CLEAR_FAULTS);
+       _pmbus_write_byte(client, page, PMBUS_CLEAR_FAULTS);
  }
  
  void pmbus_clear_faults(struct i2c_client *client)
@@ -960,6 +978,8 @@ static void pmbus_find_max_attr(struct i2c_client *client,
  struct pmbus_limit_attr {
         u16 reg;                /* Limit register */
         bool update;            /* True if register needs updates */
+       bool low;               /* True if low limit; for limits with compare
+                                  functions only */
         const char *attr;       /* Attribute name */
         const char *alarm;      /* Alarm attribute name */
         u32 sbit;               /* Alarm attribute status bit */
@@ -1011,7 +1031,8 @@ static bool pmbus_add_limit_attrs(struct i2c_client *client,
                                 if (attr->compare) {
                                         pmbus_add_boolean_cmp(data, name,
                                                 l->alarm, index,
-                                               cbase, cindex,
+                                               l->low ? cindex : cbase,
+                                               l->low ? cbase : cindex,
                                                 attr->sbase + page, l->sbit);
                                 } else {
                                         pmbus_add_boolean_reg(data, name,
@@ -1348,11 +1369,13 @@ static const struct pmbus_sensor_attr power_attributes[] = {
  static const struct pmbus_limit_attr temp_limit_attrs[] = {
         {
                 .reg = PMBUS_UT_WARN_LIMIT,
+               .low = true,
                 .attr = "min",
                 .alarm = "min_alarm",
                 .sbit = PB_TEMP_UT_WARNING,
         }, {
                 .reg = PMBUS_UT_FAULT_LIMIT,
+               .low = true,
                 .attr = "lcrit",
                 .alarm = "lcrit_alarm",
                 .sbit = PB_TEMP_UT_FAULT,
@@ -1381,11 +1404,13 @@ static const struct pmbus_limit_attr temp_limit_attrs[] = {
  static const struct pmbus_limit_attr temp_limit_attrs23[] = {
         {
                 .reg = PMBUS_UT_WARN_LIMIT,
+               .low = true,
                 .attr = "min",
                 .alarm = "min_alarm",
                 .sbit = PB_TEMP_UT_WARNING,
         }, {
                 .reg = PMBUS_UT_FAULT_LIMIT,
+               .low = true,
                 .attr = "lcrit",
                 .alarm = "lcrit_alarm",
                 .sbit = PB_TEMP_UT_FAULT,
diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c

index ace1c731973435581b4d41efedf72bf19f8a888f..d0ddb60155c972c8136d3e2ddd9d0f857fc29cd9 100644 (file)
--- a/drivers/hwmon/pmbus/ucd9000.c
+++ b/drivers/hwmon/pmbus/ucd9000.c
@@ -141,13 +141,11 @@ static int ucd9000_probe(struct i2c_client *client,
         block_buffer[ret] = '\0';
         dev_info(&client->dev, "Device ID %s\n", block_buffer);
  
-       mid = NULL;
-       for (i = 0; i < ARRAY_SIZE(ucd9000_id); i++) {
-               mid = &ucd9000_id[i];
+       for (mid = ucd9000_id; mid->name[0]; mid++) {
                 if (!strncasecmp(mid->name, block_buffer, strlen(mid->name)))
                         break;
         }
-       if (!mid || !strlen(mid->name)) {
+       if (!mid->name[0]) {
                 dev_err(&client->dev, "Unsupported device\n");
                 return -ENODEV;
         }
diff --git a/drivers/hwmon/pmbus/ucd9200.c b/drivers/hwmon/pmbus/ucd9200.c

index ffcc1cf3609d60d8cf56b4e767eed6292bb653c8..c65e9da707cc160a25e74681bc681715e189f3d1 100644 (file)
--- a/drivers/hwmon/pmbus/ucd9200.c
+++ b/drivers/hwmon/pmbus/ucd9200.c
@@ -68,13 +68,11 @@ static int ucd9200_probe(struct i2c_client *client,
         block_buffer[ret] = '\0';
         dev_info(&client->dev, "Device ID %s\n", block_buffer);
  
-       mid = NULL;
-       for (i = 0; i < ARRAY_SIZE(ucd9200_id); i++) {
-               mid = &ucd9200_id[i];
+       for (mid = ucd9200_id; mid->name[0]; mid++) {
                 if (!strncasecmp(mid->name, block_buffer, strlen(mid->name)))
                         break;
         }
-       if (!mid || !strlen(mid->name)) {
+       if (!mid->name[0]) {
                 dev_err(&client->dev, "Unsupported device\n");
                 return -ENODEV;
         }
diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c

index 0c731ca69f1506d70b05da8c2d895c6493e4b1f3..b228e09c5d05aca9fbbfb6e639d1ba2fe0d9bcf4 100644 (file)
--- a/drivers/i2c/busses/i2c-nomadik.c
+++ b/drivers/i2c/busses/i2c-nomadik.c
@@ -146,6 +146,7 @@ struct i2c_nmk_client {
   * @stop: stop condition
   * @xfer_complete: acknowledge completion for a I2C message
   * @result: controller propogated result
+ * @regulator: pointer to i2c regulator
   * @busy: Busy doing transfer
   */
  struct nmk_i2c_dev {
@@ -417,12 +418,12 @@ static int read_i2c(struct nmk_i2c_dev *dev)
         writel(readl(dev->virtbase + I2C_IMSCR) | irq_mask,
                         dev->virtbase + I2C_IMSCR);
  
-       timeout = wait_for_completion_interruptible_timeout(
+       timeout = wait_for_completion_timeout(
                 &dev->xfer_complete, dev->adap.timeout);
  
         if (timeout < 0) {
                 dev_err(&dev->pdev->dev,
-                       "wait_for_completion_interruptible_timeout"
+                       "wait_for_completion_timeout"
                         "returned %d waiting for event\n", timeout);
                 status = timeout;
         }
@@ -504,12 +505,12 @@ static int write_i2c(struct nmk_i2c_dev *dev)
         writel(readl(dev->virtbase + I2C_IMSCR) | irq_mask,
                         dev->virtbase + I2C_IMSCR);
  
-       timeout = wait_for_completion_interruptible_timeout(
+       timeout = wait_for_completion_timeout(
                 &dev->xfer_complete, dev->adap.timeout);
  
         if (timeout < 0) {
                 dev_err(&dev->pdev->dev,
-                       "wait_for_completion_interruptible_timeout"
+                       "wait_for_completion_timeout "
                         "returned %d waiting for event\n", timeout);
                 status = timeout;
         }
diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c

index 1a766cf74f6be3cbf05dc7cdd950c1292bce34e8..2dfb6317685613ae3bd4d55c33737658a1fc3bc1 100644 (file)
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -1139,41 +1139,12 @@ omap_i2c_remove(struct platform_device *pdev)
         return 0;
  }
  
-#ifdef CONFIG_SUSPEND
-static int omap_i2c_suspend(struct device *dev)
-{
-       if (!pm_runtime_suspended(dev))
-               if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend)
-                       dev->bus->pm->runtime_suspend(dev);
-
-       return 0;
-}
-
-static int omap_i2c_resume(struct device *dev)
-{
-       if (!pm_runtime_suspended(dev))
-               if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume)
-                       dev->bus->pm->runtime_resume(dev);
-
-       return 0;
-}
-
-static struct dev_pm_ops omap_i2c_pm_ops = {
-       .suspend = omap_i2c_suspend,
-       .resume = omap_i2c_resume,
-};
-#define OMAP_I2C_PM_OPS (&omap_i2c_pm_ops)
-#else
-#define OMAP_I2C_PM_OPS NULL
-#endif
-
  static struct platform_driver omap_i2c_driver = {
         .probe          = omap_i2c_probe,
         .remove         = omap_i2c_remove,
         .driver         = {
                 .name   = "omap_i2c",
                 .owner  = THIS_MODULE,
-               .pm     = OMAP_I2C_PM_OPS,
         },
  };
  
diff --git a/drivers/i2c/busses/i2c-pxa-pci.c b/drivers/i2c/busses/i2c-pxa-pci.c

index 6659d269b841b4c04869ec3aa9d18eb23cb4a4a4..b73da6cd6f915008384d9fd3f20b95ca4d41db8d 100644 (file)
--- a/drivers/i2c/busses/i2c-pxa-pci.c
+++ b/drivers/i2c/busses/i2c-pxa-pci.c
@@ -109,12 +109,15 @@ static int __devinit ce4100_i2c_probe(struct pci_dev *dev,
                 return -EINVAL;
         }
         sds = kzalloc(sizeof(*sds), GFP_KERNEL);
-       if (!sds)
+       if (!sds) {
+               ret = -ENOMEM;
                 goto err_mem;
+       }
  
         for (i = 0; i < ARRAY_SIZE(sds->pdev); i++) {
                 sds->pdev[i] = add_i2c_device(dev, i);
                 if (IS_ERR(sds->pdev[i])) {
+                       ret = PTR_ERR(sds->pdev[i]);
                         while (--i >= 0)
                                 platform_device_unregister(sds->pdev[i]);
                         goto err_dev_add;
diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c

index 2440b741197851247ea267cd8464467c925a4c72..3c94c4a81a554563daecc1d872f044886b09fb1b 100644 (file)
--- a/drivers/i2c/busses/i2c-tegra.c
+++ b/drivers/i2c/busses/i2c-tegra.c
@@ -270,14 +270,30 @@ static int tegra_i2c_fill_tx_fifo(struct tegra_i2c_dev *i2c_dev)
  
         /* Rounds down to not include partial word at the end of buf */
         words_to_transfer = buf_remaining / BYTES_PER_FIFO_WORD;
-       if (words_to_transfer > tx_fifo_avail)
-               words_to_transfer = tx_fifo_avail;
  
-       i2c_writesl(i2c_dev, buf, I2C_TX_FIFO, words_to_transfer);
-
-       buf += words_to_transfer * BYTES_PER_FIFO_WORD;
-       buf_remaining -= words_to_transfer * BYTES_PER_FIFO_WORD;
-       tx_fifo_avail -= words_to_transfer;
+       /* It's very common to have < 4 bytes, so optimize that case. */
+       if (words_to_transfer) {
+               if (words_to_transfer > tx_fifo_avail)
+                       words_to_transfer = tx_fifo_avail;
+
+               /*
+                * Update state before writing to FIFO.  If this casues us
+                * to finish writing all bytes (AKA buf_remaining goes to 0) we
+                * have a potential for an interrupt (PACKET_XFER_COMPLETE is
+                * not maskable).  We need to make sure that the isr sees
+                * buf_remaining as 0 and doesn't call us back re-entrantly.
+                */
+               buf_remaining -= words_to_transfer * BYTES_PER_FIFO_WORD;
+               tx_fifo_avail -= words_to_transfer;
+               i2c_dev->msg_buf_remaining = buf_remaining;
+               i2c_dev->msg_buf = buf +
+                       words_to_transfer * BYTES_PER_FIFO_WORD;
+               barrier();
+
+               i2c_writesl(i2c_dev, buf, I2C_TX_FIFO, words_to_transfer);
+
+               buf += words_to_transfer * BYTES_PER_FIFO_WORD;
+       }
  
         /*
          * If there is a partial word at the end of buf, handle it manually to
@@ -287,14 +303,15 @@ static int tegra_i2c_fill_tx_fifo(struct tegra_i2c_dev *i2c_dev)
         if (tx_fifo_avail > 0 && buf_remaining > 0) {
                 BUG_ON(buf_remaining > 3);
                 memcpy(&val, buf, buf_remaining);
+
+               /* Again update before writing to FIFO to make sure isr sees. */
+               i2c_dev->msg_buf_remaining = 0;
+               i2c_dev->msg_buf = NULL;
+               barrier();
+
                 i2c_writel(i2c_dev, val, I2C_TX_FIFO);
-               buf_remaining = 0;
-               tx_fifo_avail--;
         }
  
-       BUG_ON(tx_fifo_avail > 0 && buf_remaining > 0);
-       i2c_dev->msg_buf_remaining = buf_remaining;
-       i2c_dev->msg_buf = buf;
         return 0;
  }
  
@@ -411,9 +428,10 @@ static irqreturn_t tegra_i2c_isr(int irq, void *dev_id)
                         tegra_i2c_mask_irq(i2c_dev, I2C_INT_TX_FIFO_DATA_REQ);
         }
  
-       if ((status & I2C_INT_PACKET_XFER_COMPLETE) &&
-                       !i2c_dev->msg_buf_remaining)
+       if (status & I2C_INT_PACKET_XFER_COMPLETE) {
+               BUG_ON(i2c_dev->msg_buf_remaining);
                 complete(&i2c_dev->msg_complete);
+       }
  
         i2c_writel(i2c_dev, status, I2C_INT_STATUS);
         if (i2c_dev->is_dvc)
@@ -531,7 +549,7 @@ static int tegra_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[],
  
  static u32 tegra_i2c_func(struct i2c_adapter *adap)
  {
-       return I2C_FUNC_I2C;
+       return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
  }
  
  static const struct i2c_algorithm tegra_i2c_algo = {
@@ -719,6 +737,17 @@ static int tegra_i2c_resume(struct platform_device *pdev)
  }
  #endif
  
+#if defined(CONFIG_OF)
+/* Match table for of_platform binding */
+static const struct of_device_id tegra_i2c_of_match[] __devinitconst = {
+       { .compatible = "nvidia,tegra20-i2c", },
+       {},
+};
+MODULE_DEVICE_TABLE(of, tegra_i2c_of_match);
+#else
+#define tegra_i2c_of_match NULL
+#endif
+
  static struct platform_driver tegra_i2c_driver = {
         .probe   = tegra_i2c_probe,
         .remove  = tegra_i2c_remove,
@@ -729,6 +758,7 @@ static struct platform_driver tegra_i2c_driver = {
         .driver  = {
                 .name  = "tegra-i2c",
                 .owner = THIS_MODULE,
+               .of_match_table = tegra_i2c_of_match,
         },
  };
  
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c

index 43f89ba0a90888d209341378ab626423b94b991f..fe89c4660d555fc821b8908c28a92ec208ca3070 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -717,11 +717,13 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
  {
         struct ipoib_dev_priv *priv = netdev_priv(dev);
         struct ipoib_neigh *neigh;
-       struct neighbour *n;
+       struct neighbour *n = NULL;
         unsigned long flags;
  
-       n = dst_get_neighbour(skb_dst(skb));
-       if (likely(skb_dst(skb) && n)) {
+       if (likely(skb_dst(skb)))
+               n = dst_get_neighbour(skb_dst(skb));
+
+       if (likely(n)) {
                 if (unlikely(!*to_ipoib_neigh(n))) {
                         ipoib_path_lookup(skb, dev);
                         return NETDEV_TX_OK;
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c

index 8db008de5392209c6da296d9a2aa7f3729e4ae72..9c61b9c2c597a9e515af791faa6e16f87a95cfc3 100644 (file)
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -101,13 +101,17 @@ iscsi_iser_recv(struct iscsi_conn *conn,
  
         /* verify PDU length */
         datalen = ntoh24(hdr->dlength);
-       if (datalen != rx_data_len) {
-               printk(KERN_ERR "iscsi_iser: datalen %d (hdr) != %d (IB) \n",
-                      datalen, rx_data_len);
+       if (datalen > rx_data_len || (datalen + 4) < rx_data_len) {
+               iser_err("wrong datalen %d (hdr), %d (IB)\n",
+                       datalen, rx_data_len);
                 rc = ISCSI_ERR_DATALEN;
                 goto error;
         }
  
+       if (datalen != rx_data_len)
+               iser_dbg("aligned datalen (%d) hdr, %d (IB)\n",
+                       datalen, rx_data_len);
+
         /* read AHS */
         ahslen = hdr->hlength * 4;
  
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h

index 342cbc1bdaaecd0af0cd9ab857391c55f226bc59..db6f3ce9f3bf52d2bbfde9475b7b239f0a341bcf 100644 (file)
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -89,7 +89,7 @@
         } while (0)
  
  #define SHIFT_4K       12
-#define SIZE_4K        (1UL << SHIFT_4K)
+#define SIZE_4K        (1ULL << SHIFT_4K)
  #define MASK_4K        (~(SIZE_4K-1))
  
                                         /* support up to 512KB in one RDMA */
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c

index 5745b7fe158c0e4178ae240dea6c3c4a8894d60f..f299de6b419bb03cf9486e7f813e1100c4cad4a5 100644 (file)
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -412,7 +412,7 @@ int iser_send_control(struct iscsi_conn *conn,
                 memcpy(iser_conn->ib_conn->login_buf, task->data,
                                                         task->data_count);
                 tx_dsg->addr    = iser_conn->ib_conn->login_dma;
-               tx_dsg->length  = data_seg_len;
+               tx_dsg->length  = task->data_count;
                 tx_dsg->lkey    = device->mr->lkey;
                 mdesc->num_sge = 2;
         }
diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c

index 9882971827e6325bd7a424ef83b13b5679cec8d1..358cd7ee905b7ff4f9a7498e277341037437bf19 100644 (file)
--- a/drivers/input/joystick/analog.c
+++ b/drivers/input/joystick/analog.c
@@ -139,7 +139,7 @@ struct analog_port {
  #include <linux/i8253.h>
  
  #define GET_TIME(x)    do { if (cpu_has_tsc) rdtscl(x); else x = get_time_pit(); } while (0)
-#define DELTA(x,y)     (cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? CLOCK_TICK_RATE / HZ : 0)))
+#define DELTA(x,y)     (cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0)))
  #define TIME_NAME      (cpu_has_tsc?"TSC":"PIT")
  static unsigned int get_time_pit(void)
  {
diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c

index 7b404e5443ed15fa4bc2ba98eebd390e3b3f700b..e34eeb8ae371e3e569220f7cb20ab49ddda6fa77 100644 (file)
--- a/drivers/input/keyboard/adp5588-keys.c
+++ b/drivers/input/keyboard/adp5588-keys.c
@@ -668,4 +668,3 @@ module_exit(adp5588_exit);
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
  MODULE_DESCRIPTION("ADP5588/87 Keypad driver");
-MODULE_ALIAS("platform:adp5588-keys");
diff --git a/drivers/input/keyboard/ep93xx_keypad.c b/drivers/input/keyboard/ep93xx_keypad.c

index c8242dd190d0c920f64ff105903fd76c2e5a6be1..aa17e024d80329acffd48223b554d5f2528f513a 100644 (file)
--- a/drivers/input/keyboard/ep93xx_keypad.c
+++ b/drivers/input/keyboard/ep93xx_keypad.c
@@ -20,6 +20,7 @@
   * flag.
   */
  
+#include <linux/module.h>
  #include <linux/platform_device.h>
  #include <linux/interrupt.h>
  #include <linux/clk.h>
diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c

index f270447ba9519cb9497eea8a02d9869eef58ba4b..a5a77915c65003fb30eb058765945353f11d6298 100644 (file)
--- a/drivers/input/keyboard/tegra-kbc.c
+++ b/drivers/input/keyboard/tegra-kbc.c
@@ -702,7 +702,7 @@ err_iounmap:
  err_free_mem_region:
         release_mem_region(res->start, resource_size(res));
  err_free_mem:
-       input_free_device(kbc->idev);
+       input_free_device(input_dev);
         kfree(kbc);
  
         return err;
diff --git a/drivers/input/misc/ad714x-i2c.c b/drivers/input/misc/ad714x-i2c.c

index e21deb1baa8abfb193d46702023540e2d09a41ca..025417d74ca29c37ffa8393878aae2f257892427 100644 (file)
--- a/drivers/input/misc/ad714x-i2c.c
+++ b/drivers/input/misc/ad714x-i2c.c
@@ -1,7 +1,7 @@
  /*
   * AD714X CapTouch Programmable Controller driver (I2C bus)
   *
- * Copyright 2009 Analog Devices Inc.
+ * Copyright 2009-2011 Analog Devices Inc.
   *
   * Licensed under the GPL-2 or later.
   */
@@ -27,54 +27,49 @@ static int ad714x_i2c_resume(struct device *dev)
  
  static SIMPLE_DEV_PM_OPS(ad714x_i2c_pm, ad714x_i2c_suspend, ad714x_i2c_resume);
  
-static int ad714x_i2c_write(struct device *dev, unsigned short reg,
-                               unsigned short data)
+static int ad714x_i2c_write(struct ad714x_chip *chip,
+                           unsigned short reg, unsigned short data)
  {
-       struct i2c_client *client = to_i2c_client(dev);
-       int ret = 0;
-       u8 *_reg = (u8 *)&reg;
-       u8 *_data = (u8 *)&data;
-
-       u8 tx[4] = {
-               _reg[1],
-               _reg[0],
-               _data[1],
-               _data[0]
-       };
-
-       ret = i2c_master_send(client, tx, 4);
-       if (ret < 0)
-               dev_err(&client->dev, "I2C write error\n");
-
-       return ret;
+       struct i2c_client *client = to_i2c_client(chip->dev);
+       int error;
+
+       chip->xfer_buf[0] = cpu_to_be16(reg);
+       chip->xfer_buf[1] = cpu_to_be16(data);
+
+       error = i2c_master_send(client, (u8 *)chip->xfer_buf,
+                               2 * sizeof(*chip->xfer_buf));
+       if (unlikely(error < 0)) {
+               dev_err(&client->dev, "I2C write error: %d\n", error);
+               return error;
+       }
+
+       return 0;
  }
  
-static int ad714x_i2c_read(struct device *dev, unsigned short reg,
-                               unsigned short *data)
+static int ad714x_i2c_read(struct ad714x_chip *chip,
+                          unsigned short reg, unsigned short *data, size_t len)
  {
-       struct i2c_client *client = to_i2c_client(dev);
-       int ret = 0;
-       u8 *_reg = (u8 *)&reg;
-       u8 *_data = (u8 *)data;
-
-       u8 tx[2] = {
-               _reg[1],
-               _reg[0]
-       };
-       u8 rx[2];
-
-       ret = i2c_master_send(client, tx, 2);
-       if (ret >= 0)
-               ret = i2c_master_recv(client, rx, 2);
-
-       if (unlikely(ret < 0)) {
-               dev_err(&client->dev, "I2C read error\n");
-       } else {
-               _data[0] = rx[1];
-               _data[1] = rx[0];
+       struct i2c_client *client = to_i2c_client(chip->dev);
+       int i;
+       int error;
+
+       chip->xfer_buf[0] = cpu_to_be16(reg);
+
+       error = i2c_master_send(client, (u8 *)chip->xfer_buf,
+                               sizeof(*chip->xfer_buf));
+       if (error >= 0)
+               error = i2c_master_recv(client, (u8 *)chip->xfer_buf,
+                                       len * sizeof(*chip->xfer_buf));
+
+       if (unlikely(error < 0)) {
+               dev_err(&client->dev, "I2C read error: %d\n", error);
+               return error;
         }
  
-       return ret;
+       for (i = 0; i < len; i++)
+               data[i] = be16_to_cpu(chip->xfer_buf[i]);
+
+       return 0;
  }
  
  static int __devinit ad714x_i2c_probe(struct i2c_client *client,
diff --git a/drivers/input/misc/ad714x-spi.c b/drivers/input/misc/ad714x-spi.c

index 4120dd5493059126b272256b47eba121044e396d..875b50811361cef62ca8e1e1b009b120121bfe66 100644 (file)
--- a/drivers/input/misc/ad714x-spi.c
+++ b/drivers/input/misc/ad714x-spi.c
@@ -1,12 +1,12 @@
  /*
   * AD714X CapTouch Programmable Controller driver (SPI bus)
   *
- * Copyright 2009 Analog Devices Inc.
+ * Copyright 2009-2011 Analog Devices Inc.
   *
   * Licensed under the GPL-2 or later.
   */
  
-#include <linux/input.h>       /* BUS_I2C */
+#include <linux/input.h>       /* BUS_SPI */
  #include <linux/module.h>
  #include <linux/spi/spi.h>
  #include <linux/pm.h>
@@ -30,30 +30,68 @@ static int ad714x_spi_resume(struct device *dev)
  
  static SIMPLE_DEV_PM_OPS(ad714x_spi_pm, ad714x_spi_suspend, ad714x_spi_resume);
  
-static int ad714x_spi_read(struct device *dev, unsigned short reg,
-               unsigned short *data)
+static int ad714x_spi_read(struct ad714x_chip *chip,
+                          unsigned short reg, unsigned short *data, size_t len)
  {
-       struct spi_device *spi = to_spi_device(dev);
-       unsigned short tx = AD714x_SPI_CMD_PREFIX | AD714x_SPI_READ | reg;
+       struct spi_device *spi = to_spi_device(chip->dev);
+       struct spi_message message;
+       struct spi_transfer xfer[2];
+       int i;
+       int error;
+
+       spi_message_init(&message);
+       memset(xfer, 0, sizeof(xfer));
+
+       chip->xfer_buf[0] = cpu_to_be16(AD714x_SPI_CMD_PREFIX |
+                                       AD714x_SPI_READ | reg);
+       xfer[0].tx_buf = &chip->xfer_buf[0];
+       xfer[0].len = sizeof(chip->xfer_buf[0]);
+       spi_message_add_tail(&xfer[0], &message);
+
+       xfer[1].rx_buf = &chip->xfer_buf[1];
+       xfer[1].len = sizeof(chip->xfer_buf[1]) * len;
+       spi_message_add_tail(&xfer[1], &message);
+
+       error = spi_sync(spi, &message);
+       if (unlikely(error)) {
+               dev_err(chip->dev, "SPI read error: %d\n", error);
+               return error;
+       }
+
+       for (i = 0; i < len; i++)
+               data[i] = be16_to_cpu(chip->xfer_buf[i + 1]);
  
-       return spi_write_then_read(spi, (u8 *)&tx, 2, (u8 *)data, 2);
+       return 0;
  }
  
-static int ad714x_spi_write(struct device *dev, unsigned short reg,
-               unsigned short data)
+static int ad714x_spi_write(struct ad714x_chip *chip,
+                           unsigned short reg, unsigned short data)
  {
-       struct spi_device *spi = to_spi_device(dev);
-       unsigned short tx[2] = {
-               AD714x_SPI_CMD_PREFIX | reg,
-               data
-       };
+       struct spi_device *spi = to_spi_device(chip->dev);
+       int error;
+
+       chip->xfer_buf[0] = cpu_to_be16(AD714x_SPI_CMD_PREFIX | reg);
+       chip->xfer_buf[1] = cpu_to_be16(data);
+
+       error = spi_write(spi, (u8 *)chip->xfer_buf,
+                         2 * sizeof(*chip->xfer_buf));
+       if (unlikely(error)) {
+               dev_err(chip->dev, "SPI write error: %d\n", error);
+               return error;
+       }
  
-       return spi_write(spi, (u8 *)tx, 4);
+       return 0;
  }
  
  static int __devinit ad714x_spi_probe(struct spi_device *spi)
  {
         struct ad714x_chip *chip;
+       int err;
+
+       spi->bits_per_word = 8;
+       err = spi_setup(spi);
+       if (err < 0)
+               return err;
  
         chip = ad714x_probe(&spi->dev, BUS_SPI, spi->irq,
                             ad714x_spi_read, ad714x_spi_write);
diff --git a/drivers/input/misc/ad714x.c b/drivers/input/misc/ad714x.c

index c3a62c42cd28838a64fa4cb432d2ac4e430a0fc0..ca42c7d2a3c79ab247163eafd7e4d4112b6ee097 100644 (file)
--- a/drivers/input/misc/ad714x.c
+++ b/drivers/input/misc/ad714x.c
@@ -1,7 +1,7 @@
  /*
   * AD714X CapTouch Programmable Controller driver supporting AD7142/3/7/8/7A
   *
- * Copyright 2009 Analog Devices Inc.
+ * Copyright 2009-2011 Analog Devices Inc.
   *
   * Licensed under the GPL-2 or later.
   */
@@ -59,7 +59,6 @@
  #define STAGE11_AMBIENT                0x27D
  
  #define PER_STAGE_REG_NUM      36
-#define STAGE_NUM              12
  #define STAGE_CFGREG_NUM       8
  #define SYS_CFGREG_NUM         8
  
@@ -124,27 +123,6 @@ struct ad714x_driver_data {
   * information to integrate all things which will be private data
   * of spi/i2c device
   */
-struct ad714x_chip {
-       unsigned short h_state;
-       unsigned short l_state;
-       unsigned short c_state;
-       unsigned short adc_reg[STAGE_NUM];
-       unsigned short amb_reg[STAGE_NUM];
-       unsigned short sensor_val[STAGE_NUM];
-
-       struct ad714x_platform_data *hw;
-       struct ad714x_driver_data *sw;
-
-       int irq;
-       struct device *dev;
-       ad714x_read_t read;
-       ad714x_write_t write;
-
-       struct mutex mutex;
-
-       unsigned product;
-       unsigned version;
-};
  
  static void ad714x_use_com_int(struct ad714x_chip *ad714x,
                                 int start_stage, int end_stage)
@@ -154,13 +132,13 @@ static void ad714x_use_com_int(struct ad714x_chip *ad714x,
  
         mask = ((1 << (end_stage + 1)) - 1) - ((1 << start_stage) - 1);
  
-       ad714x->read(ad714x->dev, STG_COM_INT_EN_REG, &data);
+       ad714x->read(ad714x, STG_COM_INT_EN_REG, &data, 1);
         data |= 1 << end_stage;
-       ad714x->write(ad714x->dev, STG_COM_INT_EN_REG, data);
+       ad714x->write(ad714x, STG_COM_INT_EN_REG, data);
  
-       ad714x->read(ad714x->dev, STG_HIGH_INT_EN_REG, &data);
+       ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data, 1);
         data &= ~mask;
-       ad714x->write(ad714x->dev, STG_HIGH_INT_EN_REG, data);
+       ad714x->write(ad714x, STG_HIGH_INT_EN_REG, data);
  }
  
  static void ad714x_use_thr_int(struct ad714x_chip *ad714x,
@@ -171,13 +149,13 @@ static void ad714x_use_thr_int(struct ad714x_chip *ad714x,
  
         mask = ((1 << (end_stage + 1)) - 1) - ((1 << start_stage) - 1);
  
-       ad714x->read(ad714x->dev, STG_COM_INT_EN_REG, &data);
+       ad714x->read(ad714x, STG_COM_INT_EN_REG, &data, 1);
         data &= ~(1 << end_stage);
-       ad714x->write(ad714x->dev, STG_COM_INT_EN_REG, data);
+       ad714x->write(ad714x, STG_COM_INT_EN_REG, data);
  
-       ad714x->read(ad714x->dev, STG_HIGH_INT_EN_REG, &data);
+       ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data, 1);
         data |= mask;
-       ad714x->write(ad714x->dev, STG_HIGH_INT_EN_REG, data);
+       ad714x->write(ad714x, STG_HIGH_INT_EN_REG, data);
  }
  
  static int ad714x_cal_highest_stage(struct ad714x_chip *ad714x,
@@ -273,15 +251,16 @@ static void ad714x_slider_cal_sensor_val(struct ad714x_chip *ad714x, int idx)
         struct ad714x_slider_plat *hw = &ad714x->hw->slider[idx];
         int i;
  
+       ad714x->read(ad714x, CDC_RESULT_S0 + hw->start_stage,
+                       &ad714x->adc_reg[hw->start_stage],
+                       hw->end_stage - hw->start_stage + 1);
+
         for (i = hw->start_stage; i <= hw->end_stage; i++) {
-               ad714x->read(ad714x->dev, CDC_RESULT_S0 + i,
-                       &ad714x->adc_reg[i]);
-               ad714x->read(ad714x->dev,
-                               STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
-                               &ad714x->amb_reg[i]);
-
-               ad714x->sensor_val[i] = abs(ad714x->adc_reg[i] -
-                               ad714x->amb_reg[i]);
+               ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
+                               &ad714x->amb_reg[i], 1);
+
+               ad714x->sensor_val[i] =
+                       abs(ad714x->adc_reg[i] - ad714x->amb_reg[i]);
         }
  }
  
@@ -444,15 +423,16 @@ static void ad714x_wheel_cal_sensor_val(struct ad714x_chip *ad714x, int idx)
         struct ad714x_wheel_plat *hw = &ad714x->hw->wheel[idx];
         int i;
  
+       ad714x->read(ad714x, CDC_RESULT_S0 + hw->start_stage,
+                       &ad714x->adc_reg[hw->start_stage],
+                       hw->end_stage - hw->start_stage + 1);
+
         for (i = hw->start_stage; i <= hw->end_stage; i++) {
-               ad714x->read(ad714x->dev, CDC_RESULT_S0 + i,
-                       &ad714x->adc_reg[i]);
-               ad714x->read(ad714x->dev,
-                               STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
-                               &ad714x->amb_reg[i]);
+               ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
+                               &ad714x->amb_reg[i], 1);
                 if (ad714x->adc_reg[i] > ad714x->amb_reg[i])
-                       ad714x->sensor_val[i] = ad714x->adc_reg[i] -
-                               ad714x->amb_reg[i];
+                       ad714x->sensor_val[i] =
+                               ad714x->adc_reg[i] - ad714x->amb_reg[i];
                 else
                         ad714x->sensor_val[i] = 0;
         }
@@ -597,15 +577,16 @@ static void touchpad_cal_sensor_val(struct ad714x_chip *ad714x, int idx)
         struct ad714x_touchpad_plat *hw = &ad714x->hw->touchpad[idx];
         int i;
  
+       ad714x->read(ad714x, CDC_RESULT_S0 + hw->x_start_stage,
+                       &ad714x->adc_reg[hw->x_start_stage],
+                       hw->x_end_stage - hw->x_start_stage + 1);
+
         for (i = hw->x_start_stage; i <= hw->x_end_stage; i++) {
-               ad714x->read(ad714x->dev, CDC_RESULT_S0 + i,
-                               &ad714x->adc_reg[i]);
-               ad714x->read(ad714x->dev,
-                               STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
-                               &ad714x->amb_reg[i]);
+               ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
+                               &ad714x->amb_reg[i], 1);
                 if (ad714x->adc_reg[i] > ad714x->amb_reg[i])
-                       ad714x->sensor_val[i] = ad714x->adc_reg[i] -
-                               ad714x->amb_reg[i];
+                       ad714x->sensor_val[i] =
+                               ad714x->adc_reg[i] - ad714x->amb_reg[i];
                 else
                         ad714x->sensor_val[i] = 0;
         }
@@ -891,7 +872,7 @@ static int ad714x_hw_detect(struct ad714x_chip *ad714x)
  {
         unsigned short data;
  
-       ad714x->read(ad714x->dev, AD714X_PARTID_REG, &data);
+       ad714x->read(ad714x, AD714X_PARTID_REG, &data, 1);
         switch (data & 0xFFF0) {
         case AD7142_PARTID:
                 ad714x->product = 0x7142;
@@ -940,23 +921,20 @@ static void ad714x_hw_init(struct ad714x_chip *ad714x)
         for (i = 0; i < STAGE_NUM; i++) {
                 reg_base = AD714X_STAGECFG_REG + i * STAGE_CFGREG_NUM;
                 for (j = 0; j < STAGE_CFGREG_NUM; j++)
-                       ad714x->write(ad714x->dev, reg_base + j,
+                       ad714x->write(ad714x, reg_base + j,
                                         ad714x->hw->stage_cfg_reg[i][j]);
         }
  
         for (i = 0; i < SYS_CFGREG_NUM; i++)
-               ad714x->write(ad714x->dev, AD714X_SYSCFG_REG + i,
+               ad714x->write(ad714x, AD714X_SYSCFG_REG + i,
                         ad714x->hw->sys_cfg_reg[i]);
         for (i = 0; i < SYS_CFGREG_NUM; i++)
-               ad714x->read(ad714x->dev, AD714X_SYSCFG_REG + i,
-                       &data);
+               ad714x->read(ad714x, AD714X_SYSCFG_REG + i, &data, 1);
  
-       ad714x->write(ad714x->dev, AD714X_STG_CAL_EN_REG, 0xFFF);
+       ad714x->write(ad714x, AD714X_STG_CAL_EN_REG, 0xFFF);
  
         /* clear all interrupts */
-       ad714x->read(ad714x->dev, STG_LOW_INT_STA_REG, &data);
-       ad714x->read(ad714x->dev, STG_HIGH_INT_STA_REG, &data);
-       ad714x->read(ad714x->dev, STG_COM_INT_STA_REG, &data);
+       ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state, 3);
  }
  
  static irqreturn_t ad714x_interrupt_thread(int irq, void *data)
@@ -966,9 +944,7 @@ static irqreturn_t ad714x_interrupt_thread(int irq, void *data)
  
         mutex_lock(&ad714x->mutex);
  
-       ad714x->read(ad714x->dev, STG_LOW_INT_STA_REG, &ad714x->l_state);
-       ad714x->read(ad714x->dev, STG_HIGH_INT_STA_REG, &ad714x->h_state);
-       ad714x->read(ad714x->dev, STG_COM_INT_STA_REG, &ad714x->c_state);
+       ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state, 3);
  
         for (i = 0; i < ad714x->hw->button_num; i++)
                 ad714x_button_state_machine(ad714x, i);
@@ -1245,7 +1221,7 @@ int ad714x_disable(struct ad714x_chip *ad714x)
         mutex_lock(&ad714x->mutex);
  
         data = ad714x->hw->sys_cfg_reg[AD714X_PWR_CTRL] | 0x3;
-       ad714x->write(ad714x->dev, AD714X_PWR_CTRL, data);
+       ad714x->write(ad714x, AD714X_PWR_CTRL, data);
  
         mutex_unlock(&ad714x->mutex);
  
@@ -1255,24 +1231,20 @@ EXPORT_SYMBOL(ad714x_disable);
  
  int ad714x_enable(struct ad714x_chip *ad714x)
  {
-       unsigned short data;
-
         dev_dbg(ad714x->dev, "%s enter\n", __func__);
  
         mutex_lock(&ad714x->mutex);
  
         /* resume to non-shutdown mode */
  
-       ad714x->write(ad714x->dev, AD714X_PWR_CTRL,
+       ad714x->write(ad714x, AD714X_PWR_CTRL,
                         ad714x->hw->sys_cfg_reg[AD714X_PWR_CTRL]);
  
         /* make sure the interrupt output line is not low level after resume,
          * otherwise we will get no chance to enter falling-edge irq again
          */
  
-       ad714x->read(ad714x->dev, STG_LOW_INT_STA_REG, &data);
-       ad714x->read(ad714x->dev, STG_HIGH_INT_STA_REG, &data);
-       ad714x->read(ad714x->dev, STG_COM_INT_STA_REG, &data);
+       ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state, 3);
  
         mutex_unlock(&ad714x->mutex);
  
diff --git a/drivers/input/misc/ad714x.h b/drivers/input/misc/ad714x.h

index 45c54fb13f0755007e689d9b549b615d71b4a104..3c85455aa66d23329d3605fb367d400d320e4976 100644 (file)
--- a/drivers/input/misc/ad714x.h
+++ b/drivers/input/misc/ad714x.h
@@ -1,7 +1,7 @@
  /*
   * AD714X CapTouch Programmable Controller driver (bus interfaces)
   *
- * Copyright 2009 Analog Devices Inc.
+ * Copyright 2009-2011 Analog Devices Inc.
   *
   * Licensed under the GPL-2 or later.
   */
@@ -11,11 +11,40 @@
  
  #include <linux/types.h>
  
+#define STAGE_NUM              12
+
  struct device;
+struct ad714x_platform_data;
+struct ad714x_driver_data;
  struct ad714x_chip;
  
-typedef int (*ad714x_read_t)(struct device *, unsigned short, unsigned short *);
-typedef int (*ad714x_write_t)(struct device *, unsigned short, unsigned short);
+typedef int (*ad714x_read_t)(struct ad714x_chip *, unsigned short, unsigned short *, size_t);
+typedef int (*ad714x_write_t)(struct ad714x_chip *, unsigned short, unsigned short);
+
+struct ad714x_chip {
+       unsigned short l_state;
+       unsigned short h_state;
+       unsigned short c_state;
+       unsigned short adc_reg[STAGE_NUM];
+       unsigned short amb_reg[STAGE_NUM];
+       unsigned short sensor_val[STAGE_NUM];
+
+       struct ad714x_platform_data *hw;
+       struct ad714x_driver_data *sw;
+
+       int irq;
+       struct device *dev;
+       ad714x_read_t read;
+       ad714x_write_t write;
+
+       struct mutex mutex;
+
+       unsigned product;
+       unsigned version;
+
+       __be16 xfer_buf[16] ____cacheline_aligned;
+
+};
  
  int ad714x_disable(struct ad714x_chip *ad714x);
  int ad714x_enable(struct ad714x_chip *ad714x);
diff --git a/drivers/input/misc/cm109.c b/drivers/input/misc/cm109.c

index b09c7d127219f19c4ae04b4a2ce545e639278d27..ab860511f01690a73ed2fd3dbefb49d11aaa3f73 100644 (file)
--- a/drivers/input/misc/cm109.c
+++ b/drivers/input/misc/cm109.c
@@ -475,7 +475,7 @@ static void cm109_toggle_buzzer_sync(struct cm109_dev *dev, int on)
                                 le16_to_cpu(dev->ctl_req->wIndex),
                                 dev->ctl_data,
                                 USB_PKT_LEN, USB_CTRL_SET_TIMEOUT);
-       if (error && error != EINTR)
+       if (error < 0 && error != -EINTR)
                 err("%s: usb_control_msg() failed %d", __func__, error);
  }
  
diff --git a/drivers/input/misc/mma8450.c b/drivers/input/misc/mma8450.c

index 6c76cf79299143ce16542af8e734e49e4a21bf12..0794778295fc60619311eb619c5eee2a7a97ef07 100644 (file)
--- a/drivers/input/misc/mma8450.c
+++ b/drivers/input/misc/mma8450.c
@@ -234,7 +234,7 @@ static const struct of_device_id mma8450_dt_ids[] = {
         { .compatible = "fsl,mma8450", },
         { /* sentinel */ }
  };
-MODULE_DEVICE_TABLE(i2c, mma8450_dt_ids);
+MODULE_DEVICE_TABLE(of, mma8450_dt_ids);
  
  static struct i2c_driver mma8450_driver = {
         .driver = {
diff --git a/drivers/input/misc/mpu3050.c b/drivers/input/misc/mpu3050.c

index b95fac15b2ea46ec40af163359a71bfb033ecef2..f71dc728da58adcad257da28f053b98fc29fa3fa 100644 (file)
--- a/drivers/input/misc/mpu3050.c
+++ b/drivers/input/misc/mpu3050.c
@@ -282,7 +282,7 @@ err_free_irq:
  err_pm_set_suspended:
         pm_runtime_set_suspended(&client->dev);
  err_free_mem:
-       input_unregister_device(idev);
+       input_free_device(idev);
         kfree(sensor);
         return error;
  }
diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c

index 3126983c004a96860d4b042295eec0cce7b09ef1..5ec617e28f7ececfa02729beb5addfd085cae7cb 100644 (file)
--- a/drivers/input/mouse/bcm5974.c
+++ b/drivers/input/mouse/bcm5974.c
@@ -67,6 +67,18 @@
  #define USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI   0x0245
  #define USB_DEVICE_ID_APPLE_WELLSPRING5_ISO    0x0246
  #define USB_DEVICE_ID_APPLE_WELLSPRING5_JIS    0x0247
+/* MacbookAir4,1 (unibody, July 2011) */
+#define USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI  0x0249
+#define USB_DEVICE_ID_APPLE_WELLSPRING6A_ISO   0x024a
+#define USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS   0x024b
+/* MacbookAir4,2 (unibody, July 2011) */
+#define USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI   0x024c
+#define USB_DEVICE_ID_APPLE_WELLSPRING6_ISO    0x024d
+#define USB_DEVICE_ID_APPLE_WELLSPRING6_JIS    0x024e
+/* Macbook8,2 (unibody) */
+#define USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI  0x0252
+#define USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO   0x0253
+#define USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS   0x0254
  
  #define BCM5974_DEVICE(prod) {                                 \
         .match_flags = (USB_DEVICE_ID_MATCH_DEVICE |            \
@@ -104,6 +116,18 @@ static const struct usb_device_id bcm5974_table[] = {
         BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI),
         BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_ISO),
         BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_JIS),
+       /* MacbookAir4,1 */
+       BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI),
+       BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6A_ISO),
+       BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS),
+       /* MacbookAir4,2 */
+       BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI),
+       BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ISO),
+       BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_JIS),
+       /* MacbookPro8,2 */
+       BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI),
+       BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO),
+       BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS),
         /* Terminating entry */
         {}
  };
@@ -294,6 +318,42 @@ static const struct bcm5974_config bcm5974_config_table[] = {
                 { DIM_X, DIM_X / SN_COORD, -4415, 5050 },
                 { DIM_Y, DIM_Y / SN_COORD, -55, 6680 }
         },
+       {
+               USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI,
+               USB_DEVICE_ID_APPLE_WELLSPRING6_ISO,
+               USB_DEVICE_ID_APPLE_WELLSPRING6_JIS,
+               HAS_INTEGRATED_BUTTON,
+               0x84, sizeof(struct bt_data),
+               0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS,
+               { DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 },
+               { DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 },
+               { DIM_X, DIM_X / SN_COORD, -4620, 5140 },
+               { DIM_Y, DIM_Y / SN_COORD, -150, 6600 }
+       },
+       {
+               USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI,
+               USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO,
+               USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS,
+               HAS_INTEGRATED_BUTTON,
+               0x84, sizeof(struct bt_data),
+               0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS,
+               { DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 },
+               { DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 },
+               { DIM_X, DIM_X / SN_COORD, -4750, 5280 },
+               { DIM_Y, DIM_Y / SN_COORD, -150, 6730 }
+       },
+       {
+               USB_DEVICE_ID_APPLE_WELLSPRING6A_ANSI,
+               USB_DEVICE_ID_APPLE_WELLSPRING6A_ISO,
+               USB_DEVICE_ID_APPLE_WELLSPRING6A_JIS,
+               HAS_INTEGRATED_BUTTON,
+               0x84, sizeof(struct bt_data),
+               0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS,
+               { DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 },
+               { DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 },
+               { DIM_X, DIM_X / SN_COORD, -4620, 5140 },
+               { DIM_Y, DIM_Y / SN_COORD, -150, 6600 }
+       },
         {}
  };
  
diff --git a/drivers/input/tablet/wacom_sys.c b/drivers/input/tablet/wacom_sys.c

index 449c0a46dbac51ba881ba49b8cfff1c3861dfee7..958b4eb6369d087b31d973446d365a8c431b2ec8 100644 (file)
--- a/drivers/input/tablet/wacom_sys.c
+++ b/drivers/input/tablet/wacom_sys.c
@@ -49,6 +49,7 @@ struct hid_descriptor {
  #define USB_REQ_GET_REPORT     0x01
  #define USB_REQ_SET_REPORT     0x09
  #define WAC_HID_FEATURE_REPORT 0x03
+#define WAC_MSG_RETRIES                5
  
  static int usb_get_report(struct usb_interface *intf, unsigned char type,
                                 unsigned char id, void *buf, int size)
@@ -165,7 +166,7 @@ static int wacom_parse_hid(struct usb_interface *intf, struct hid_descriptor *hi
                         report,
                         hid_desc->wDescriptorLength,
                         5000); /* 5 secs */
-       } while (result < 0 && limit++ < 5);
+       } while (result < 0 && limit++ < WAC_MSG_RETRIES);
  
         /* No need to parse the Descriptor. It isn't an error though */
         if (result < 0)
@@ -228,13 +229,6 @@ static int wacom_parse_hid(struct usb_interface *intf, struct hid_descriptor *hi
                                                         get_unaligned_le16(&report[i + 3]);
                                                 i += 4;
                                         }
-                               } else if (usage == WCM_DIGITIZER) {
-                                       /* max pressure isn't reported
-                                       features->pressure_max = (unsigned short)
-                                                       (report[i+4] << 8  | report[i + 3]);
-                                       */
-                                       features->pressure_max = 255;
-                                       i += 4;
                                 }
                                 break;
  
@@ -290,13 +284,6 @@ static int wacom_parse_hid(struct usb_interface *intf, struct hid_descriptor *hi
                                 pen = 1;
                                 i++;
                                 break;
-
-                       case HID_USAGE_UNDEFINED:
-                               if (usage == WCM_DESKTOP && finger) /* capacity */
-                                       features->pressure_max =
-                                               get_unaligned_le16(&report[i + 3]);
-                               i += 4;
-                               break;
                         }
                         break;
  
@@ -319,24 +306,26 @@ static int wacom_query_tablet_data(struct usb_interface *intf, struct wacom_feat
         int limit = 0, report_id = 2;
         int error = -ENOMEM;
  
-       rep_data = kmalloc(2, GFP_KERNEL);
+       rep_data = kmalloc(4, GFP_KERNEL);
         if (!rep_data)
                 return error;
  
-       /* ask to report tablet data if it is 2FGT Tablet PC or
+       /* ask to report tablet data if it is MT Tablet PC or
          * not a Tablet PC */
         if (features->type == TABLETPC2FG) {
                 do {
                         rep_data[0] = 3;
                         rep_data[1] = 4;
+                       rep_data[2] = 0;
+                       rep_data[3] = 0;
                         report_id = 3;
                         error = usb_set_report(intf, WAC_HID_FEATURE_REPORT,
-                               report_id, rep_data, 2);
+                               report_id, rep_data, 4);
                         if (error >= 0)
                                 error = usb_get_report(intf,
                                         WAC_HID_FEATURE_REPORT, report_id,
-                                       rep_data, 3);
-               } while ((error < 0 || rep_data[1] != 4) && limit++ < 5);
+                                       rep_data, 4);
+               } while ((error < 0 || rep_data[1] != 4) && limit++ < WAC_MSG_RETRIES);
         } else if (features->type != TABLETPC) {
                 do {
                         rep_data[0] = 2;
@@ -347,7 +336,7 @@ static int wacom_query_tablet_data(struct usb_interface *intf, struct wacom_feat
                                 error = usb_get_report(intf,
                                         WAC_HID_FEATURE_REPORT, report_id,
                                         rep_data, 2);
-               } while ((error < 0 || rep_data[1] != 2) && limit++ < 5);
+               } while ((error < 0 || rep_data[1] != 2) && limit++ < WAC_MSG_RETRIES);
         }
  
         kfree(rep_data);
diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c

index 03ebcc8b24b59bc1ec5efa8f2d5d5465c53e3e56..0dc97ec15c28cc59cda96622951c44c0a4fab6fd 100644 (file)
--- a/drivers/input/tablet/wacom_wac.c
+++ b/drivers/input/tablet/wacom_wac.c
@@ -800,25 +800,26 @@ static int wacom_bpt_touch(struct wacom_wac *wacom)
         int i;
  
         for (i = 0; i < 2; i++) {
-               int p = data[9 * i + 2];
-               bool touch = p && !wacom->shared->stylus_in_proximity;
+               int offset = (data[1] & 0x80) ? (8 * i) : (9 * i);
+               bool touch = data[offset + 3] & 0x80;
  
-               input_mt_slot(input, i);
-               input_mt_report_slot_state(input, MT_TOOL_FINGER, touch);
                 /*
                  * Touch events need to be disabled while stylus is
                  * in proximity because user's hand is resting on touchpad
                  * and sending unwanted events.  User expects tablet buttons
                  * to continue working though.
                  */
+               touch = touch && !wacom->shared->stylus_in_proximity;
+
+               input_mt_slot(input, i);
+               input_mt_report_slot_state(input, MT_TOOL_FINGER, touch);
                 if (touch) {
-                       int x = get_unaligned_be16(&data[9 * i + 3]) & 0x7ff;
-                       int y = get_unaligned_be16(&data[9 * i + 5]) & 0x7ff;
+                       int x = get_unaligned_be16(&data[offset + 3]) & 0x7ff;
+                       int y = get_unaligned_be16(&data[offset + 5]) & 0x7ff;
                         if (features->quirks & WACOM_QUIRK_BBTOUCH_LOWRES) {
                                 x <<= 5;
                                 y <<= 5;
                         }
-                       input_report_abs(input, ABS_MT_PRESSURE, p);
                         input_report_abs(input, ABS_MT_POSITION_X, x);
                         input_report_abs(input, ABS_MT_POSITION_Y, y);
                 }
@@ -1056,10 +1057,11 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev,
                              features->x_fuzz, 0);
         input_set_abs_params(input_dev, ABS_Y, 0, features->y_max,
                              features->y_fuzz, 0);
-       input_set_abs_params(input_dev, ABS_PRESSURE, 0, features->pressure_max,
-                            features->pressure_fuzz, 0);
  
         if (features->device_type == BTN_TOOL_PEN) {
+               input_set_abs_params(input_dev, ABS_PRESSURE, 0, features->pressure_max,
+                            features->pressure_fuzz, 0);
+
                 /* penabled devices have fixed resolution for each model */
                 input_abs_set_res(input_dev, ABS_X, features->x_resolution);
                 input_abs_set_res(input_dev, ABS_Y, features->y_resolution);
@@ -1098,6 +1100,8 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev,
                 __set_bit(BTN_TOOL_MOUSE, input_dev->keybit);
                 __set_bit(BTN_STYLUS, input_dev->keybit);
                 __set_bit(BTN_STYLUS2, input_dev->keybit);
+
+               __set_bit(INPUT_PROP_POINTER, input_dev->propbit);
                 break;
  
         case WACOM_21UX2:
@@ -1126,6 +1130,9 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev,
                 }
  
                 input_set_abs_params(input_dev, ABS_Z, -900, 899, 0, 0);
+
+               __set_bit(INPUT_PROP_DIRECT, input_dev->propbit);
+
                 wacom_setup_cintiq(wacom_wac);
                 break;
  
@@ -1150,6 +1157,8 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev,
                 /* fall through */
  
         case INTUOS:
+               __set_bit(INPUT_PROP_POINTER, input_dev->propbit);
+
                 wacom_setup_intuos(wacom_wac);
                 break;
  
@@ -1165,6 +1174,8 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev,
  
                 input_set_abs_params(input_dev, ABS_Z, -900, 899, 0, 0);
                 wacom_setup_intuos(wacom_wac);
+
+               __set_bit(INPUT_PROP_POINTER, input_dev->propbit);
                 break;
  
         case TABLETPC2FG:
@@ -1183,26 +1194,40 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev,
         case TABLETPC:
                 __clear_bit(ABS_MISC, input_dev->absbit);
  
+               __set_bit(INPUT_PROP_DIRECT, input_dev->propbit);
+
                 if (features->device_type != BTN_TOOL_PEN)
                         break;  /* no need to process stylus stuff */
  
                 /* fall through */
  
         case PL:
-       case PTU:
         case DTU:
                 __set_bit(BTN_TOOL_PEN, input_dev->keybit);
+               __set_bit(BTN_TOOL_RUBBER, input_dev->keybit);
                 __set_bit(BTN_STYLUS, input_dev->keybit);
                 __set_bit(BTN_STYLUS2, input_dev->keybit);
+
+               __set_bit(INPUT_PROP_DIRECT, input_dev->propbit);
+               break;
+
+       case PTU:
+               __set_bit(BTN_STYLUS2, input_dev->keybit);
                 /* fall through */
  
         case PENPARTNER:
+               __set_bit(BTN_TOOL_PEN, input_dev->keybit);
                 __set_bit(BTN_TOOL_RUBBER, input_dev->keybit);
+               __set_bit(BTN_STYLUS, input_dev->keybit);
+
+               __set_bit(INPUT_PROP_POINTER, input_dev->propbit);
                 break;
  
         case BAMBOO_PT:
                 __clear_bit(ABS_MISC, input_dev->absbit);
  
+               __set_bit(INPUT_PROP_POINTER, input_dev->propbit);
+
                 if (features->device_type == BTN_TOOL_DOUBLETAP) {
                         __set_bit(BTN_LEFT, input_dev->keybit);
                         __set_bit(BTN_FORWARD, input_dev->keybit);
@@ -1460,6 +1485,9 @@ static const struct wacom_features wacom_features_0xD3 =
  static const struct wacom_features wacom_features_0xD4 =
         { "Wacom Bamboo Pen",     WACOM_PKGLEN_BBFUN,     14720,  9200, 1023,
           63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
+static const struct wacom_features wacom_features_0xD5 =
+       { "Wacom Bamboo Pen 6x8",     WACOM_PKGLEN_BBFUN, 21648, 13530, 1023,
+         63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
  static const struct wacom_features wacom_features_0xD6 =
         { "Wacom BambooPT 2FG 4x5", WACOM_PKGLEN_BBFUN,   14720,  9200, 1023,
           63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES };
@@ -1564,6 +1592,7 @@ const struct usb_device_id wacom_ids[] = {
         { USB_DEVICE_WACOM(0xD2) },
         { USB_DEVICE_WACOM(0xD3) },
         { USB_DEVICE_WACOM(0xD4) },
+       { USB_DEVICE_WACOM(0xD5) },
         { USB_DEVICE_WACOM(0xD6) },
         { USB_DEVICE_WACOM(0xD7) },
         { USB_DEVICE_WACOM(0xD8) },
diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c

index ae00604a6a81d8c1cf37d65823d14eb728e36e3a..f5d66859f2322b362b4f2e3c747f62db8b20fe5d 100644 (file)
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -244,6 +244,7 @@ struct mxt_finger {
         int x;
         int y;
         int area;
+       int pressure;
  };
  
  /* Each client has this additional data */
@@ -536,6 +537,8 @@ static void mxt_input_report(struct mxt_data *data, int single_id)
                                         finger[id].x);
                         input_report_abs(input_dev, ABS_MT_POSITION_Y,
                                         finger[id].y);
+                       input_report_abs(input_dev, ABS_MT_PRESSURE,
+                                       finger[id].pressure);
                 } else {
                         finger[id].status = 0;
                 }
@@ -546,6 +549,8 @@ static void mxt_input_report(struct mxt_data *data, int single_id)
         if (status != MXT_RELEASE) {
                 input_report_abs(input_dev, ABS_X, finger[single_id].x);
                 input_report_abs(input_dev, ABS_Y, finger[single_id].y);
+               input_report_abs(input_dev,
+                                ABS_PRESSURE, finger[single_id].pressure);
         }
  
         input_sync(input_dev);
@@ -560,6 +565,7 @@ static void mxt_input_touchevent(struct mxt_data *data,
         int x;
         int y;
         int area;
+       int pressure;
  
         /* Check the touch is present on the screen */
         if (!(status & MXT_DETECT)) {
@@ -584,6 +590,7 @@ static void mxt_input_touchevent(struct mxt_data *data,
                 y = y >> 2;
  
         area = message->message[4];
+       pressure = message->message[5];
  
         dev_dbg(dev, "[%d] %s x: %d, y: %d, area: %d\n", id,
                 status & MXT_MOVE ? "moved" : "pressed",
@@ -594,6 +601,7 @@ static void mxt_input_touchevent(struct mxt_data *data,
         finger[id].x = x;
         finger[id].y = y;
         finger[id].area = area;
+       finger[id].pressure = pressure;
  
         mxt_input_report(data, id);
  }
@@ -1116,6 +1124,8 @@ static int __devinit mxt_probe(struct i2c_client *client,
                              0, data->max_x, 0, 0);
         input_set_abs_params(input_dev, ABS_Y,
                              0, data->max_y, 0, 0);
+       input_set_abs_params(input_dev, ABS_PRESSURE,
+                            0, 255, 0, 0);
  
         /* For multi touch */
         input_mt_init_slots(input_dev, MXT_MAX_FINGER);
@@ -1125,6 +1135,8 @@ static int __devinit mxt_probe(struct i2c_client *client,
                              0, data->max_x, 0, 0);
         input_set_abs_params(input_dev, ABS_MT_POSITION_Y,
                              0, data->max_y, 0, 0);
+       input_set_abs_params(input_dev, ABS_MT_PRESSURE,
+                            0, 255, 0, 0);
  
         input_set_drvdata(input_dev, data);
         i2c_set_clientdata(client, data);
diff --git a/drivers/input/touchscreen/max11801_ts.c b/drivers/input/touchscreen/max11801_ts.c

index 4f2713d92791352c1fddd806b2252af69ed24fb6..4627fe55b4011b8f40809938f1fc497672c14b5e 100644 (file)
--- a/drivers/input/touchscreen/max11801_ts.c
+++ b/drivers/input/touchscreen/max11801_ts.c
@@ -9,7 +9,8 @@
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License.
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
   */
  
  /*
diff --git a/drivers/input/touchscreen/tnetv107x-ts.c b/drivers/input/touchscreen/tnetv107x-ts.c

index 089b0a0f3d8c3f51174bb9e31c5326c61e66d679..0e8f63e5b36ffbd3ee99e1a751dff2d4400e06e7 100644 (file)
--- a/drivers/input/touchscreen/tnetv107x-ts.c
+++ b/drivers/input/touchscreen/tnetv107x-ts.c
@@ -13,6 +13,7 @@
   * GNU General Public License for more details.
   */
  
+#include <linux/module.h>
  #include <linux/kernel.h>
  #include <linux/err.h>
  #include <linux/errno.h>
diff --git a/drivers/input/touchscreen/wacom_w8001.c b/drivers/input/touchscreen/wacom_w8001.c

index c14412ef4648ba45bd78d81a7367ca670fc444f2..9941d39df43d3519751345102d29d09266411dbb 100644 (file)
--- a/drivers/input/touchscreen/wacom_w8001.c
+++ b/drivers/input/touchscreen/wacom_w8001.c
@@ -383,6 +383,8 @@ static int w8001_setup(struct w8001 *w8001)
         dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
         strlcat(w8001->name, "Wacom Serial", sizeof(w8001->name));
  
+       __set_bit(INPUT_PROP_DIRECT, dev->propbit);
+
         /* penabled? */
         error = w8001_command(w8001, W8001_CMD_QUERY, true);
         if (!error) {
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c

index a14f8dc23462229c8ba34d6aec18a14eb3fcaf7a..0e4227f457af38c9e1c6fc6bf1e065b94e1e9c4f 100644 (file)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -605,7 +605,9 @@ static void build_inv_all(struct iommu_cmd *cmd)
   * Writes the command to the IOMMUs command buffer and informs the
   * hardware about the new command.
   */
-static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
+static int iommu_queue_command_sync(struct amd_iommu *iommu,
+                                   struct iommu_cmd *cmd,
+                                   bool sync)
  {
         u32 left, tail, head, next_tail;
         unsigned long flags;
@@ -639,13 +641,18 @@ again:
         copy_cmd_to_buffer(iommu, cmd, tail);
  
         /* We need to sync now to make sure all commands are processed */
-       iommu->need_sync = true;
+       iommu->need_sync = sync;
  
         spin_unlock_irqrestore(&iommu->lock, flags);
  
         return 0;
  }
  
+static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
+{
+       return iommu_queue_command_sync(iommu, cmd, true);
+}
+
  /*
   * This function queues a completion wait command into the command
   * buffer of an IOMMU
@@ -661,7 +668,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
  
         build_completion_wait(&cmd, (u64)&sem);
  
-       ret = iommu_queue_command(iommu, &cmd);
+       ret = iommu_queue_command_sync(iommu, &cmd, false);
         if (ret)
                 return ret;
  
@@ -840,14 +847,9 @@ static void domain_flush_complete(struct protection_domain *domain)
  static void domain_flush_devices(struct protection_domain *domain)
  {
         struct iommu_dev_data *dev_data;
-       unsigned long flags;
-
-       spin_lock_irqsave(&domain->lock, flags);
  
         list_for_each_entry(dev_data, &domain->dev_list, list)
                 device_flush_dte(dev_data);
-
-       spin_unlock_irqrestore(&domain->lock, flags);
  }
  
  /****************************************************************************
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c

index 3dc9befa5aec06343691474420ad203963119bdb..6dcc7e2d54de33b1c3c78add5d328bc44c88e76a 100644 (file)
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1388,7 +1388,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu)
                 return ret;
         }
  
-       ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
+       ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
         if (ret)
                 printk(KERN_ERR "IOMMU: can't request irq\n");
         return ret;
diff --git a/drivers/leds/leds-ams-delta.c b/drivers/leds/leds-ams-delta.c

index b9826032450b4609c955260938a7d0889de40ef3..8c00937bf7e74d02bc1759cbab01ca6eba1bc13e 100644 (file)
--- a/drivers/leds/leds-ams-delta.c
+++ b/drivers/leds/leds-ams-delta.c
@@ -8,6 +8,7 @@
   * published by the Free Software Foundation.
   */
  
+#include <linux/module.h>
  #include <linux/kernel.h>
  #include <linux/init.h>
  #include <linux/platform_device.h>
diff --git a/drivers/leds/leds-bd2802.c b/drivers/leds/leds-bd2802.c

index 3ebe3824662d6d4da520d3172be1e0789efa0be2..ea2185531f826e064d53437f956fbfb04e5e35f5 100644 (file)
--- a/drivers/leds/leds-bd2802.c
+++ b/drivers/leds/leds-bd2802.c
@@ -662,6 +662,11 @@ failed_unregister_led1_R:
  static void bd2802_unregister_led_classdev(struct bd2802_led *led)
  {
         cancel_work_sync(&led->work);
+       led_classdev_unregister(&led->cdev_led2b);
+       led_classdev_unregister(&led->cdev_led2g);
+       led_classdev_unregister(&led->cdev_led2r);
+       led_classdev_unregister(&led->cdev_led1b);
+       led_classdev_unregister(&led->cdev_led1g);
         led_classdev_unregister(&led->cdev_led1r);
  }
  
diff --git a/drivers/leds/leds-hp6xx.c b/drivers/leds/leds-hp6xx.c

index e4ce1fd46338122b6e93b39f102a81be4ea621cb..bcfbd3a60eab6b8ee4ae0ecb4cf14efce17a2ac3 100644 (file)
--- a/drivers/leds/leds-hp6xx.c
+++ b/drivers/leds/leds-hp6xx.c
@@ -10,6 +10,7 @@
   * published by the Free Software Foundation.
   */
  
+#include <linux/module.h>
  #include <linux/kernel.h>
  #include <linux/init.h>
  #include <linux/platform_device.h>
diff --git a/drivers/leds/ledtrig-timer.c b/drivers/leds/ledtrig-timer.c

index d87c9d02f786df2570385faea517e54e883c5b30..328c64c0841cdda76f0d4ac7ed2b13cf801bec7b 100644 (file)
--- a/drivers/leds/ledtrig-timer.c
+++ b/drivers/leds/ledtrig-timer.c
@@ -41,6 +41,7 @@ static ssize_t led_delay_on_store(struct device *dev,
  
         if (count == size) {
                 led_blink_set(led_cdev, &state, &led_cdev->blink_delay_off);
+               led_cdev->blink_delay_on = state;
                 ret = count;
         }
  
@@ -69,6 +70,7 @@ static ssize_t led_delay_off_store(struct device *dev,
  
         if (count == size) {
                 led_blink_set(led_cdev, &led_cdev->blink_delay_on, &state);
+               led_cdev->blink_delay_off = state;
                 ret = count;
         }
  
diff --git a/drivers/md/linear.h b/drivers/md/linear.h

index 0ce29b61605a29904608c43a1cf57d87c6f25d43..2f2da05b2ce9c8cae70fa5cdd316541457b42920 100644 (file)
--- a/drivers/md/linear.h
+++ b/drivers/md/linear.h
@@ -10,9 +10,9 @@ typedef struct dev_info dev_info_t;
  
  struct linear_private_data
  {
+       struct rcu_head         rcu;
         sector_t                array_sectors;
         dev_info_t              disks[0];
-       struct rcu_head         rcu;
  };
  
  
diff --git a/drivers/md/md.c b/drivers/md/md.c

index 8e221a20f5d98b362e8f49660454add87a5e3b38..5404b229582021c9c3811934ea73c6fa4efa1eca 100644 (file)
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -848,7 +848,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
         bio->bi_end_io = super_written;
  
         atomic_inc(&mddev->pending_writes);
-       submit_bio(REQ_WRITE | REQ_SYNC | REQ_FLUSH | REQ_FUA, bio);
+       submit_bio(WRITE_FLUSH_FUA, bio);
  }
  
  void md_super_wait(mddev_t *mddev)
@@ -1138,8 +1138,11 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
                         ret = 0;
         }
         rdev->sectors = rdev->sb_start;
+       /* Limit to 4TB as metadata cannot record more than that */
+       if (rdev->sectors >= (2ULL << 32))
+               rdev->sectors = (2ULL << 32) - 2;
  
-       if (rdev->sectors < sb->size * 2 && sb->level > 1)
+       if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
                 /* "this cannot possibly happen" ... */
                 ret = -EINVAL;
  
@@ -1173,7 +1176,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
                 mddev->clevel[0] = 0;
                 mddev->layout = sb->layout;
                 mddev->raid_disks = sb->raid_disks;
-               mddev->dev_sectors = sb->size * 2;
+               mddev->dev_sectors = ((sector_t)sb->size) * 2;
                 mddev->events = ev1;
                 mddev->bitmap_info.offset = 0;
                 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
@@ -1415,6 +1418,11 @@ super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
         rdev->sb_start = calc_dev_sboffset(rdev);
         if (!num_sectors || num_sectors > rdev->sb_start)
                 num_sectors = rdev->sb_start;
+       /* Limit to 4TB as metadata cannot record more than that.
+        * 4TB == 2^32 KB, or 2*2^32 sectors.
+        */
+       if (num_sectors >= (2ULL << 32))
+               num_sectors = (2ULL << 32) - 2;
         md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
                        rdev->sb_page);
         md_super_wait(rdev->mddev);
@@ -1738,6 +1746,11 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
         sb->level = cpu_to_le32(mddev->level);
         sb->layout = cpu_to_le32(mddev->layout);
  
+       if (test_bit(WriteMostly, &rdev->flags))
+               sb->devflags |= WriteMostly1;
+       else
+               sb->devflags &= ~WriteMostly1;
+
         if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
                 sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
                 sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
@@ -2561,7 +2574,10 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
         int err = -EINVAL;
         if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
                 md_error(rdev->mddev, rdev);
-               err = 0;
+               if (test_bit(Faulty, &rdev->flags))
+                       err = 0;
+               else
+                       err = -EBUSY;
         } else if (cmd_match(buf, "remove")) {
                 if (rdev->raid_disk >= 0)
                         err = -EBUSY;
@@ -2584,7 +2600,7 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
                 err = 0;
         } else if (cmd_match(buf, "-blocked")) {
                 if (!test_bit(Faulty, &rdev->flags) &&
-                   test_bit(BlockedBadBlocks, &rdev->flags)) {
+                   rdev->badblocks.unacked_exist) {
                         /* metadata handler doesn't understand badblocks,
                          * so we need to fail the device
                          */
@@ -5983,6 +5999,8 @@ static int set_disk_faulty(mddev_t *mddev, dev_t dev)
                 return -ENODEV;
  
         md_error(mddev, rdev);
+       if (!test_bit(Faulty, &rdev->flags))
+               return -EBUSY;
         return 0;
  }
  
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c

index 32323f0afd8954714401cb8ed8c5b455b32a9743..f4622dd8fc590b2bbcaee444b24ab2f55336503d 100644 (file)
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1099,12 +1099,11 @@ read_again:
                 bio_list_add(&conf->pending_bio_list, mbio);
                 spin_unlock_irqrestore(&conf->device_lock, flags);
         }
-       r1_bio_write_done(r1_bio);
-
-       /* In case raid1d snuck in to freeze_array */
-       wake_up(&conf->wait_barrier);
-
+       /* Mustn't call r1_bio_write_done before this next test,
+        * as it could result in the bio being freed.
+        */
         if (sectors_handled < (bio->bi_size >> 9)) {
+               r1_bio_write_done(r1_bio);
                 /* We need another r1_bio.  It has already been counted
                  * in bio->bi_phys_segments
                  */
@@ -1117,6 +1116,11 @@ read_again:
                 goto retry_write;
         }
  
+       r1_bio_write_done(r1_bio);
+
+       /* In case raid1d snuck in to freeze_array */
+       wake_up(&conf->wait_barrier);
+
         if (do_sync || !bitmap || !plugged)
                 md_wakeup_thread(mddev->thread);
  
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c

index 8b29cd4f01c89f8213d7dcc43cd1aaf6dd5799c7..d7a8468ddeabc493284d5acc2f81529f9e638461 100644 (file)
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -337,6 +337,21 @@ static void close_write(r10bio_t *r10_bio)
         md_write_end(r10_bio->mddev);
  }
  
+static void one_write_done(r10bio_t *r10_bio)
+{
+       if (atomic_dec_and_test(&r10_bio->remaining)) {
+               if (test_bit(R10BIO_WriteError, &r10_bio->state))
+                       reschedule_retry(r10_bio);
+               else {
+                       close_write(r10_bio);
+                       if (test_bit(R10BIO_MadeGood, &r10_bio->state))
+                               reschedule_retry(r10_bio);
+                       else
+                               raid_end_bio_io(r10_bio);
+               }
+       }
+}
+
  static void raid10_end_write_request(struct bio *bio, int error)
  {
         int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -387,17 +402,7 @@ static void raid10_end_write_request(struct bio *bio, int error)
          * Let's see if all mirrored write operations have finished
          * already.
          */
-       if (atomic_dec_and_test(&r10_bio->remaining)) {
-               if (test_bit(R10BIO_WriteError, &r10_bio->state))
-                       reschedule_retry(r10_bio);
-               else {
-                       close_write(r10_bio);
-                       if (test_bit(R10BIO_MadeGood, &r10_bio->state))
-                               reschedule_retry(r10_bio);
-                       else
-                               raid_end_bio_io(r10_bio);
-               }
-       }
+       one_write_done(r10_bio);
         if (dec_rdev)
                 rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
  }
@@ -1127,20 +1132,12 @@ retry_write:
                 spin_unlock_irqrestore(&conf->device_lock, flags);
         }
  
-       if (atomic_dec_and_test(&r10_bio->remaining)) {
-               /* This matches the end of raid10_end_write_request() */
-               bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
-                               r10_bio->sectors,
-                               !test_bit(R10BIO_Degraded, &r10_bio->state),
-                               0);
-               md_write_end(mddev);
-               raid_end_bio_io(r10_bio);
-       }
-
-       /* In case raid10d snuck in to freeze_array */
-       wake_up(&conf->wait_barrier);
+       /* Don't remove the bias on 'remaining' (one_write_done) until
+        * after checking if we need to go around again.
+        */
  
         if (sectors_handled < (bio->bi_size >> 9)) {
+               one_write_done(r10_bio);
                 /* We need another r10_bio.  It has already been counted
                  * in bio->bi_phys_segments.
                  */
@@ -1154,6 +1151,10 @@ retry_write:
                 r10_bio->state = 0;
                 goto retry_write;
         }
+       one_write_done(r10_bio);
+
+       /* In case raid10d snuck in to freeze_array */
+       wake_up(&conf->wait_barrier);
  
         if (do_sync || !mddev->bitmap || !plugged)
                 md_wakeup_thread(mddev->thread);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index dbae459fb02d7171291e1d8dcc3d7fa538ab02f4..43709fa6b6dfc2c30358887aa33e8b51958554b6 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3336,7 +3336,7 @@ static void handle_stripe(struct stripe_head *sh)
  
  finish:
         /* wait for this device to become unblocked */
-       if (unlikely(s.blocked_rdev))
+       if (conf->mddev->external && unlikely(s.blocked_rdev))
                 md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev);
  
         if (s.handle_bad_blocks)
diff --git a/drivers/media/dvb/dvb-usb/vp7045.c b/drivers/media/dvb/dvb-usb/vp7045.c

index 3db89e3cb0bbf7dbb13dc71d835068656d7c5c5e..536c16c943bd7450874b6fc0ab67983f4015742b 100644 (file)
--- a/drivers/media/dvb/dvb-usb/vp7045.c
+++ b/drivers/media/dvb/dvb-usb/vp7045.c
@@ -224,26 +224,8 @@ static struct dvb_usb_device_properties vp7045_properties;
  static int vp7045_usb_probe(struct usb_interface *intf,
                 const struct usb_device_id *id)
  {
-       struct dvb_usb_device *d;
-       int ret = dvb_usb_device_init(intf, &vp7045_properties,
-                                  THIS_MODULE, &d, adapter_nr);
-       if (ret)
-               return ret;
-
-       d->priv = kmalloc(20, GFP_KERNEL);
-       if (!d->priv) {
-               dvb_usb_device_exit(intf);
-               return -ENOMEM;
-       }
-
-       return ret;
-}
-
-static void vp7045_usb_disconnect(struct usb_interface *intf)
-{
-       struct dvb_usb_device *d = usb_get_intfdata(intf);
-       kfree(d->priv);
-       dvb_usb_device_exit(intf);
+       return dvb_usb_device_init(intf, &vp7045_properties,
+                                  THIS_MODULE, NULL, adapter_nr);
  }
  
  static struct usb_device_id vp7045_usb_table [] = {
@@ -258,7 +240,7 @@ MODULE_DEVICE_TABLE(usb, vp7045_usb_table);
  static struct dvb_usb_device_properties vp7045_properties = {
         .usb_ctrl = CYPRESS_FX2,
         .firmware = "dvb-usb-vp7045-01.fw",
-       .size_of_priv = sizeof(u8 *),
+       .size_of_priv = 20,
  
         .num_adapters = 1,
         .adapter = {
@@ -305,7 +287,7 @@ static struct dvb_usb_device_properties vp7045_properties = {
  static struct usb_driver vp7045_usb_driver = {
         .name           = "dvb_usb_vp7045",
         .probe          = vp7045_usb_probe,
-       .disconnect     = vp7045_usb_disconnect,
+       .disconnect     = dvb_usb_device_exit,
         .id_table       = vp7045_usb_table,
  };
  
diff --git a/drivers/media/rc/nuvoton-cir.c b/drivers/media/rc/nuvoton-cir.c

index eae05b5004761d334519a8aca4784b2db815f959..144f3f55d7656a51999b8a12f673fdf7bcc5ebcf 100644 (file)
--- a/drivers/media/rc/nuvoton-cir.c
+++ b/drivers/media/rc/nuvoton-cir.c
@@ -618,7 +618,6 @@ static void nvt_dump_rx_buf(struct nvt_dev *nvt)
  static void nvt_process_rx_ir_data(struct nvt_dev *nvt)
  {
         DEFINE_IR_RAW_EVENT(rawir);
-       unsigned int count;
         u32 carrier;
         u8 sample;
         int i;
@@ -631,65 +630,38 @@ static void nvt_process_rx_ir_data(struct nvt_dev *nvt)
         if (nvt->carrier_detect_enabled)
                 carrier = nvt_rx_carrier_detect(nvt);
  
-       count = nvt->pkts;
-       nvt_dbg_verbose("Processing buffer of len %d", count);
+       nvt_dbg_verbose("Processing buffer of len %d", nvt->pkts);
  
         init_ir_raw_event(&rawir);
  
-       for (i = 0; i < count; i++) {
-               nvt->pkts--;
+       for (i = 0; i < nvt->pkts; i++) {
                 sample = nvt->buf[i];
  
                 rawir.pulse = ((sample & BUF_PULSE_BIT) != 0);
                 rawir.duration = US_TO_NS((sample & BUF_LEN_MASK)
                                           * SAMPLE_PERIOD);
  
-               if ((sample & BUF_LEN_MASK) == BUF_LEN_MASK) {
-                       if (nvt->rawir.pulse == rawir.pulse)
-                               nvt->rawir.duration += rawir.duration;
-                       else {
-                               nvt->rawir.duration = rawir.duration;
-                               nvt->rawir.pulse = rawir.pulse;
-                       }
-                       continue;
-               }
-
-               rawir.duration += nvt->rawir.duration;
+               nvt_dbg("Storing %s with duration %d",
+                       rawir.pulse ? "pulse" : "space", rawir.duration);
  
-               init_ir_raw_event(&nvt->rawir);
-               nvt->rawir.duration = 0;
-               nvt->rawir.pulse = rawir.pulse;
-
-               if (sample == BUF_PULSE_BIT)
-                       rawir.pulse = false;
-
-               if (rawir.duration) {
-                       nvt_dbg("Storing %s with duration %d",
-                               rawir.pulse ? "pulse" : "space",
-                               rawir.duration);
-
-                       ir_raw_event_store_with_filter(nvt->rdev, &rawir);
-               }
+               ir_raw_event_store_with_filter(nvt->rdev, &rawir);
  
                 /*
                  * BUF_PULSE_BIT indicates end of IR data, BUF_REPEAT_BYTE
                  * indicates end of IR signal, but new data incoming. In both
                  * cases, it means we're ready to call ir_raw_event_handle
                  */
-               if ((sample == BUF_PULSE_BIT) && nvt->pkts) {
+               if ((sample == BUF_PULSE_BIT) && (i + 1 < nvt->pkts)) {
                         nvt_dbg("Calling ir_raw_event_handle (signal end)\n");
                         ir_raw_event_handle(nvt->rdev);
                 }
         }
  
+       nvt->pkts = 0;
+
         nvt_dbg("Calling ir_raw_event_handle (buffer empty)\n");
         ir_raw_event_handle(nvt->rdev);
  
-       if (nvt->pkts) {
-               nvt_dbg("Odd, pkts should be 0 now... (its %u)", nvt->pkts);
-               nvt->pkts = 0;
-       }
-
         nvt_dbg_verbose("%s done", __func__);
  }
  
@@ -1048,7 +1020,6 @@ static int nvt_probe(struct pnp_dev *pdev, const struct pnp_device_id *dev_id)
  
         spin_lock_init(&nvt->nvt_lock);
         spin_lock_init(&nvt->tx.lock);
-       init_ir_raw_event(&nvt->rawir);
  
         ret = -EBUSY;
         /* now claim resources */
diff --git a/drivers/media/rc/nuvoton-cir.h b/drivers/media/rc/nuvoton-cir.h

index 1241fc89a36c10c02dfa56ed5e351c89f28ac4e7..0d5e0872a2ea2f43b2d9fd31f18ffeac78361118 100644 (file)
--- a/drivers/media/rc/nuvoton-cir.h
+++ b/drivers/media/rc/nuvoton-cir.h
@@ -67,7 +67,6 @@ static int debug;
  struct nvt_dev {
         struct pnp_dev *pdev;
         struct rc_dev *rdev;
-       struct ir_raw_event rawir;
  
         spinlock_t nvt_lock;
  
diff --git a/drivers/media/video/gspca/ov519.c b/drivers/media/video/gspca/ov519.c

index 0800433b209287c45c9f4672a41f20e99b43adfe..18305c89083c567944890ec3ee451aa52b5182df 100644 (file)
--- a/drivers/media/video/gspca/ov519.c
+++ b/drivers/media/video/gspca/ov519.c
@@ -2858,7 +2858,6 @@ static void ov7xx0_configure(struct sd *sd)
                         case 0x60:
                                 PDEBUG(D_PROBE, "Sensor is a OV7660");
                                 sd->sensor = SEN_OV7660;
-                               sd->invert_led = 0;
                                 break;
                         default:
                                 PDEBUG(D_PROBE, "Unknown sensor: 0x76%x", low);
@@ -3337,7 +3336,6 @@ static int sd_config(struct gspca_dev *gspca_dev,
         case BRIDGE_OV519:
                 cam->cam_mode = ov519_vga_mode;
                 cam->nmodes = ARRAY_SIZE(ov519_vga_mode);
-               sd->invert_led = !sd->invert_led;
                 break;
         case BRIDGE_OVFX2:
                 cam->cam_mode = ov519_vga_mode;
@@ -5005,24 +5003,24 @@ static const struct sd_desc sd_desc = {
  /* -- module initialisation -- */
  static const struct usb_device_id device_table[] = {
         {USB_DEVICE(0x041e, 0x4003), .driver_info = BRIDGE_W9968CF },
-       {USB_DEVICE(0x041e, 0x4052), .driver_info = BRIDGE_OV519 },
-       {USB_DEVICE(0x041e, 0x405f),
+       {USB_DEVICE(0x041e, 0x4052),
                 .driver_info = BRIDGE_OV519 | BRIDGE_INVERT_LED },
+       {USB_DEVICE(0x041e, 0x405f), .driver_info = BRIDGE_OV519 },
         {USB_DEVICE(0x041e, 0x4060), .driver_info = BRIDGE_OV519 },
         {USB_DEVICE(0x041e, 0x4061), .driver_info = BRIDGE_OV519 },
-       {USB_DEVICE(0x041e, 0x4064),
-               .driver_info = BRIDGE_OV519 | BRIDGE_INVERT_LED },
+       {USB_DEVICE(0x041e, 0x4064), .driver_info = BRIDGE_OV519 },
         {USB_DEVICE(0x041e, 0x4067), .driver_info = BRIDGE_OV519 },
-       {USB_DEVICE(0x041e, 0x4068),
+       {USB_DEVICE(0x041e, 0x4068), .driver_info = BRIDGE_OV519 },
+       {USB_DEVICE(0x045e, 0x028c),
                 .driver_info = BRIDGE_OV519 | BRIDGE_INVERT_LED },
-       {USB_DEVICE(0x045e, 0x028c), .driver_info = BRIDGE_OV519 },
         {USB_DEVICE(0x054c, 0x0154), .driver_info = BRIDGE_OV519 },
-       {USB_DEVICE(0x054c, 0x0155),
-               .driver_info = BRIDGE_OV519 | BRIDGE_INVERT_LED },
+       {USB_DEVICE(0x054c, 0x0155), .driver_info = BRIDGE_OV519 },
         {USB_DEVICE(0x05a9, 0x0511), .driver_info = BRIDGE_OV511 },
         {USB_DEVICE(0x05a9, 0x0518), .driver_info = BRIDGE_OV518 },
-       {USB_DEVICE(0x05a9, 0x0519), .driver_info = BRIDGE_OV519 },
-       {USB_DEVICE(0x05a9, 0x0530), .driver_info = BRIDGE_OV519 },
+       {USB_DEVICE(0x05a9, 0x0519),
+               .driver_info = BRIDGE_OV519 | BRIDGE_INVERT_LED },
+       {USB_DEVICE(0x05a9, 0x0530),
+               .driver_info = BRIDGE_OV519 | BRIDGE_INVERT_LED },
         {USB_DEVICE(0x05a9, 0x2800), .driver_info = BRIDGE_OVFX2 },
         {USB_DEVICE(0x05a9, 0x4519), .driver_info = BRIDGE_OV519 },
         {USB_DEVICE(0x05a9, 0x8519), .driver_info = BRIDGE_OV519 },
diff --git a/drivers/media/video/gspca/sonixj.c b/drivers/media/video/gspca/sonixj.c

index 81b8a600783b575290814af47931ee5b1702ebcf..c477ad11f103486128c0ce13b69b7949908dc900 100644 (file)
--- a/drivers/media/video/gspca/sonixj.c
+++ b/drivers/media/video/gspca/sonixj.c
@@ -2386,7 +2386,7 @@ static int sd_start(struct gspca_dev *gspca_dev)
                 reg_w1(gspca_dev, 0x01, 0x22);
                 msleep(100);
                 reg01 = SCL_SEL_OD | S_PDN_INV;
-               reg17 &= MCK_SIZE_MASK;
+               reg17 &= ~MCK_SIZE_MASK;
                 reg17 |= 0x04;          /* clock / 4 */
                 break;
         }
@@ -2532,6 +2532,10 @@ static int sd_start(struct gspca_dev *gspca_dev)
                 if (!mode) {                    /* if 640x480 */
                         reg17 &= ~MCK_SIZE_MASK;
                         reg17 |= 0x04;          /* clock / 4 */
+               } else {
+                       reg01 &= ~SYS_SEL_48M;  /* clk 24Mz */
+                       reg17 &= ~MCK_SIZE_MASK;
+                       reg17 |= 0x02;          /* clock / 2 */
                 }
                 break;
         case SENSOR_OV7630:
diff --git a/drivers/media/video/pwc/pwc-v4l.c b/drivers/media/video/pwc/pwc-v4l.c

index e9a0e94b9995003e26191583f4e91b8ba7690f56..8c70e64444e7b38bc03b00fdafb36d7bd43d21e7 100644 (file)
--- a/drivers/media/video/pwc/pwc-v4l.c
+++ b/drivers/media/video/pwc/pwc-v4l.c
@@ -338,7 +338,7 @@ int pwc_init_controls(struct pwc_device *pdev)
         if (pdev->restore_factory)
                 pdev->restore_factory->flags = V4L2_CTRL_FLAG_UPDATE;
  
-       if (!pdev->features & FEATURE_MOTOR_PANTILT)
+       if (!(pdev->features & FEATURE_MOTOR_PANTILT))
                 return hdl->error;
  
         /* Motor pan / tilt / reset */
diff --git a/drivers/media/video/via-camera.c b/drivers/media/video/via-camera.c

index 85d3048c1d679dcc83cdf6a7afb8d0f6af1f15c2..bb7f17f2a33c20452ce9d6d37bbb29990215fe07 100644 (file)
--- a/drivers/media/video/via-camera.c
+++ b/drivers/media/video/via-camera.c
@@ -1332,6 +1332,8 @@ static __devinit bool viacam_serial_is_enabled(void)
         struct pci_bus *pbus = pci_find_bus(0, 0);
         u8 cbyte;
  
+       if (!pbus)
+               return false;
         pci_bus_read_config_byte(pbus, VIACAM_SERIAL_DEVFN,
                         VIACAM_SERIAL_CREG, &cbyte);
         if ((cbyte & VIACAM_SERIAL_BIT) == 0)
diff --git a/drivers/mfd/max8997.c b/drivers/mfd/max8997.c

index 5d1fca0277ef3da09560e219912a76c5e3d0af14..f83103b8970d09c2c8166049465a29a280067f78 100644 (file)
--- a/drivers/mfd/max8997.c
+++ b/drivers/mfd/max8997.c
@@ -135,10 +135,13 @@ static int max8997_i2c_probe(struct i2c_client *i2c,
         max8997->dev = &i2c->dev;
         max8997->i2c = i2c;
         max8997->type = id->driver_data;
+       max8997->irq = i2c->irq;
  
         if (!pdata)
                 goto err;
  
+       max8997->irq_base = pdata->irq_base;
+       max8997->ono = pdata->ono;
         max8997->wakeup = pdata->wakeup;
  
         mutex_init(&max8997->iolock);
@@ -152,6 +155,8 @@ static int max8997_i2c_probe(struct i2c_client *i2c,
  
         pm_runtime_set_active(max8997->dev);
  
+       max8997_irq_init(max8997);
+
         mfd_add_devices(max8997->dev, -1, max8997_devs,
                         ARRAY_SIZE(max8997_devs),
                         NULL, 0);
diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c

index 29601e7d606dc425591205597817cc19935066a4..86e14583a08276fd7f76dddd3afe3433f1dfa9a8 100644 (file)
--- a/drivers/mfd/omap-usb-host.c
+++ b/drivers/mfd/omap-usb-host.c
@@ -17,6 +17,7 @@
   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
   */
  #include <linux/kernel.h>
+#include <linux/module.h>
  #include <linux/types.h>
  #include <linux/slab.h>
  #include <linux/delay.h>
@@ -676,7 +677,6 @@ static void usbhs_omap_tll_init(struct device *dev, u8 tll_channel_count)
                                 | OMAP_TLL_CHANNEL_CONF_ULPINOBITSTUFF
                                 | OMAP_TLL_CHANNEL_CONF_ULPIDDRMODE);
  
-                       reg |= (1 << (i + 1));
                 } else
                         continue;
  
diff --git a/drivers/mfd/tps65910-irq.c b/drivers/mfd/tps65910-irq.c

index 2bfad5c86cc76875cc0765300a4f4f3bf24d7332..a56be931551c9ac7ee34e546a18891e8a2b6549d 100644 (file)
--- a/drivers/mfd/tps65910-irq.c
+++ b/drivers/mfd/tps65910-irq.c
@@ -178,8 +178,10 @@ int tps65910_irq_init(struct tps65910 *tps65910, int irq,
         switch (tps65910_chip_id(tps65910)) {
         case TPS65910:
                 tps65910->irq_num = TPS65910_NUM_IRQ;
+               break;
         case TPS65911:
                 tps65910->irq_num = TPS65911_NUM_IRQ;
+               break;
         }
  
         /* Register with genirq */
diff --git a/drivers/mfd/twl4030-madc.c b/drivers/mfd/twl4030-madc.c

index b5d598c3aa71b356f40fb9a5f89738eed16c5350..7cbf2aa9e64f2fb744dbf717ce90057e08a8eaf2 100644 (file)
--- a/drivers/mfd/twl4030-madc.c
+++ b/drivers/mfd/twl4030-madc.c
@@ -510,8 +510,9 @@ int twl4030_madc_conversion(struct twl4030_madc_request *req)
         u8 ch_msb, ch_lsb;
         int ret;
  
-       if (!req)
+       if (!req || !twl4030_madc)
                 return -EINVAL;
+
         mutex_lock(&twl4030_madc->lock);
         if (req->method < TWL4030_MADC_RT || req->method > TWL4030_MADC_SW2) {
                 ret = -EINVAL;
@@ -706,6 +707,8 @@ static int __devinit twl4030_madc_probe(struct platform_device *pdev)
         if (!madc)
                 return -ENOMEM;
  
+       madc->dev = &pdev->dev;
+
         /*
          * Phoenix provides 2 interrupt lines. The first one is connected to
          * the OMAP. The other one can be connected to the other processor such
diff --git a/drivers/mfd/wm8350-gpio.c b/drivers/mfd/wm8350-gpio.c

index ebf99bef392f67e669bb0cd9c14ea9548c8ddfd8..d584f6b4d6e2dfd8fd982da3954b40e5fe412342 100644 (file)
--- a/drivers/mfd/wm8350-gpio.c
+++ b/drivers/mfd/wm8350-gpio.c
@@ -37,7 +37,7 @@ static int gpio_set_dir(struct wm8350 *wm8350, int gpio, int dir)
         return ret;
  }
  
-static int gpio_set_debounce(struct wm8350 *wm8350, int gpio, int db)
+static int wm8350_gpio_set_debounce(struct wm8350 *wm8350, int gpio, int db)
  {
         if (db == WM8350_GPIO_DEBOUNCE_ON)
                 return wm8350_set_bits(wm8350, WM8350_GPIO_DEBOUNCE,
@@ -210,7 +210,7 @@ int wm8350_gpio_config(struct wm8350 *wm8350, int gpio, int dir, int func,
                 goto err;
         if (gpio_set_polarity(wm8350, gpio, pol))
                 goto err;
-       if (gpio_set_debounce(wm8350, gpio, debounce))
+       if (wm8350_gpio_set_debounce(wm8350, gpio, debounce))
                 goto err;
         if (gpio_set_dir(wm8350, gpio, dir))
                 goto err;
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig

index 0a4d86c6c4a4d107502bc00ab3f3d25943d9e0f8..2d6423c2d19340015c9f4be263342387db661d9a 100644 (file)
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -146,6 +146,7 @@ config PHANTOM
  
  config INTEL_MID_PTI
         tristate "Parallel Trace Interface for MIPI P1149.7 cJTAG standard"
+       depends on PCI
         default n
         help
           The PTI (Parallel Trace Interface) driver directs
diff --git a/drivers/misc/ab8500-pwm.c b/drivers/misc/ab8500-pwm.c

index 54e3d05b63cc6466549f532de704e5f2d0faf082..35903154ca2ee0e5c2adaa7971c0370aac8f19cc 100644 (file)
--- a/drivers/misc/ab8500-pwm.c
+++ b/drivers/misc/ab8500-pwm.c
@@ -164,5 +164,5 @@ subsys_initcall(ab8500_pwm_init);
  module_exit(ab8500_pwm_exit);
  MODULE_AUTHOR("Arun MURTHY <arun.murthy@stericsson.com>");
  MODULE_DESCRIPTION("AB8500 Pulse Width Modulation Driver");
-MODULE_ALIAS("AB8500 PWM driver");
+MODULE_ALIAS("platform:ab8500-pwm");
  MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/cb710/core.c b/drivers/misc/cb710/core.c

index efec4139c3f68f512cded3fd51631e7097895d19..68cd05b6d829d1f3b7fa15f256f78b3bb6f82f82 100644 (file)
--- a/drivers/misc/cb710/core.c
+++ b/drivers/misc/cb710/core.c
@@ -33,7 +33,7 @@ EXPORT_SYMBOL_GPL(cb710_pci_update_config_reg);
  static int __devinit cb710_pci_configure(struct pci_dev *pdev)
  {
         unsigned int devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0);
-       struct pci_dev *pdev0 = pci_get_slot(pdev->bus, devfn);
+       struct pci_dev *pdev0;
         u32 val;
  
         cb710_pci_update_config_reg(pdev, 0x48,
@@ -43,6 +43,7 @@ static int __devinit cb710_pci_configure(struct pci_dev *pdev)
         if (val & 0x80000000)
                 return 0;
  
+       pdev0 = pci_get_slot(pdev->bus, devfn);
         if (!pdev0)
                 return -ENODEV;
  
diff --git a/drivers/misc/fsa9480.c b/drivers/misc/fsa9480.c

index 5325a7e70dcf47b6a357b59146b5cfd24821455e..27dc0d21aafa36b73955105aa6097eb5c7836352 100644 (file)
--- a/drivers/misc/fsa9480.c
+++ b/drivers/misc/fsa9480.c
@@ -455,7 +455,7 @@ static int __devinit fsa9480_probe(struct i2c_client *client,
  
  fail2:
         if (client->irq)
-               free_irq(client->irq, NULL);
+               free_irq(client->irq, usbsw);
  fail1:
         i2c_set_clientdata(client, NULL);
         kfree(usbsw);
@@ -466,7 +466,7 @@ static int __devexit fsa9480_remove(struct i2c_client *client)
  {
         struct fsa9480_usbsw *usbsw = i2c_get_clientdata(client);
         if (client->irq)
-               free_irq(client->irq, NULL);
+               free_irq(client->irq, usbsw);
         i2c_set_clientdata(client, NULL);
  
         sysfs_remove_group(&client->dev.kobj, &fsa9480_group);
diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c

index 8653bd0b1a33ca68b541c8161338ce8ee9e5357d..0b56e3f43573d9664f6e2a778d3384b47b9de161 100644 (file)
--- a/drivers/misc/pti.c
+++ b/drivers/misc/pti.c
@@ -33,6 +33,8 @@
  #include <linux/mutex.h>
  #include <linux/miscdevice.h>
  #include <linux/pti.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
  
  #define DRIVERNAME             "pti"
  #define PCINAME                        "pciPTI"
@@ -163,6 +165,11 @@ static void pti_write_to_aperture(struct pti_masterchannel *mc,
  static void pti_control_frame_built_and_sent(struct pti_masterchannel *mc,
                                              const char *thread_name)
  {
+       /*
+        * Since we access the comm member in current's task_struct, we only
+        * need to be as large as what 'comm' in that structure is.
+        */
+       char comm[TASK_COMM_LEN];
         struct pti_masterchannel mccontrol = {.master = CONTROL_ID,
                                               .channel = 0};
         const char *thread_name_p;
@@ -170,13 +177,6 @@ static void pti_control_frame_built_and_sent(struct pti_masterchannel *mc,
         u8 control_frame[CONTROL_FRAME_LEN];
  
         if (!thread_name) {
-               /*
-                * Since we access the comm member in current's task_struct,
-                * we only need to be as large as what 'comm' in that
-                * structure is.
-                */
-               char comm[TASK_COMM_LEN];
-
                 if (!in_interrupt())
                         get_task_comm(comm, current);
                 else
diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c

index 54c91ffe4a9154fb7142f5f61a654658e8ab1fd0..ba168a7d54d42318d7d192e4158b482844c85be4 100644 (file)
--- a/drivers/misc/ti-st/st_core.c
+++ b/drivers/misc/ti-st/st_core.c
@@ -338,6 +338,12 @@ void st_int_recv(void *disc_data,
                         /* Unknow packet? */
                 default:
                         type = *ptr;
+                       if (st_gdata->list[type] == NULL) {
+                               pr_err("chip/interface misbehavior dropping"
+                                       " frame starting with 0x%02x", type);
+                               goto done;
+
+                       }
                         st_gdata->rx_skb = alloc_skb(
                                         st_gdata->list[type]->max_frame_size,
                                         GFP_ATOMIC);
@@ -354,6 +360,7 @@ void st_int_recv(void *disc_data,
                 ptr++;
                 count--;
         }
+done:
         spin_unlock_irqrestore(&st_gdata->lock, flags);
         pr_debug("done %s", __func__);
         return;
@@ -717,9 +724,10 @@ static void st_tty_close(struct tty_struct *tty)
          */
         spin_lock_irqsave(&st_gdata->lock, flags);
         for (i = ST_BT; i < ST_MAX_CHANNELS; i++) {
-               if (st_gdata->list[i] != NULL)
+               if (st_gdata->is_registered[i] == true)
                         pr_err("%d not un-registered", i);
                 st_gdata->list[i] = NULL;
+               st_gdata->is_registered[i] = false;
         }
         st_gdata->protos_registered = 0;
         spin_unlock_irqrestore(&st_gdata->lock, flags);
diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c

index 38fd2f04c07eed8df424dd7ef6f04ba25d7f4054..3a3580566dfca39fa5e2882f1658418665a9e72d 100644 (file)
--- a/drivers/misc/ti-st/st_kim.c
+++ b/drivers/misc/ti-st/st_kim.c
@@ -68,6 +68,7 @@ void validate_firmware_response(struct kim_data_s *kim_gdata)
         if (unlikely(skb->data[5] != 0)) {
                 pr_err("no proper response during fw download");
                 pr_err("data6 %x", skb->data[5]);
+               kfree_skb(skb);
                 return;         /* keep waiting for the proper response */
         }
         /* becos of all the script being downloaded */
@@ -210,6 +211,7 @@ static long read_local_version(struct kim_data_s *kim_gdata, char *bts_scr_name)
                 pr_err(" waiting for ver info- timed out ");
                 return -ETIMEDOUT;
         }
+       INIT_COMPLETION(kim_gdata->kim_rcvd);
  
         version =
                 MAKEWORD(kim_gdata->resp_buffer[13],
@@ -298,6 +300,7 @@ static long download_firmware(struct kim_data_s *kim_gdata)
  
                 switch (((struct bts_action *)ptr)->type) {
                 case ACTION_SEND_COMMAND:       /* action send */
+                       pr_debug("S");
                         action_ptr = &(((struct bts_action *)ptr)->data[0]);
                         if (unlikely
                             (((struct hci_command *)action_ptr)->opcode ==
@@ -335,6 +338,10 @@ static long download_firmware(struct kim_data_s *kim_gdata)
                                 release_firmware(kim_gdata->fw_entry);
                                 return -ETIMEDOUT;
                         }
+                       /* reinit completion before sending for the
+                        * relevant wait
+                        */
+                       INIT_COMPLETION(kim_gdata->kim_rcvd);
  
                         /*
                          * Free space found in uart buffer, call st_int_write
@@ -361,6 +368,7 @@ static long download_firmware(struct kim_data_s *kim_gdata)
                         }
                         break;
                 case ACTION_WAIT_EVENT:  /* wait */
+                       pr_debug("W");
                         if (!wait_for_completion_timeout
                                         (&kim_gdata->kim_rcvd,
                                          msecs_to_jiffies(CMD_RESP_TIME))) {
@@ -434,11 +442,17 @@ long st_kim_start(void *kim_data)
  {
         long err = 0;
         long retry = POR_RETRY_COUNT;
+       struct ti_st_plat_data  *pdata;
         struct kim_data_s       *kim_gdata = (struct kim_data_s *)kim_data;
  
         pr_info(" %s", __func__);
+       pdata = kim_gdata->kim_pdev->dev.platform_data;
  
         do {
+               /* platform specific enabling code here */
+               if (pdata->chip_enable)
+                       pdata->chip_enable(kim_gdata);
+
                 /* Configure BT nShutdown to HIGH state */
                 gpio_set_value(kim_gdata->nshutdown, GPIO_LOW);
                 mdelay(5);      /* FIXME: a proper toggle */
@@ -460,6 +474,12 @@ long st_kim_start(void *kim_data)
                         pr_info("ldisc_install = 0");
                         sysfs_notify(&kim_gdata->kim_pdev->dev.kobj,
                                         NULL, "install");
+                       /* the following wait is never going to be completed,
+                        * since the ldisc was never installed, hence serving
+                        * as a mdelay of LDISC_TIME msecs */
+                       err = wait_for_completion_timeout
+                               (&kim_gdata->ldisc_installed,
+                                msecs_to_jiffies(LDISC_TIME));
                         err = -ETIMEDOUT;
                         continue;
                 } else {
@@ -472,6 +492,13 @@ long st_kim_start(void *kim_data)
                                 pr_info("ldisc_install = 0");
                                 sysfs_notify(&kim_gdata->kim_pdev->dev.kobj,
                                                 NULL, "install");
+                               /* this wait might be completed, though in the
+                                * tty_close() since the ldisc is already
+                                * installed */
+                               err = wait_for_completion_timeout
+                                       (&kim_gdata->ldisc_installed,
+                                        msecs_to_jiffies(LDISC_TIME));
+                               err = -EINVAL;
                                 continue;
                         } else {        /* on success don't retry */
                                 break;
@@ -489,6 +516,8 @@ long st_kim_stop(void *kim_data)
  {
         long err = 0;
         struct kim_data_s       *kim_gdata = (struct kim_data_s *)kim_data;
+       struct ti_st_plat_data  *pdata =
+               kim_gdata->kim_pdev->dev.platform_data;
  
         INIT_COMPLETION(kim_gdata->ldisc_installed);
  
@@ -515,6 +544,10 @@ long st_kim_stop(void *kim_data)
         gpio_set_value(kim_gdata->nshutdown, GPIO_HIGH);
         mdelay(1);
         gpio_set_value(kim_gdata->nshutdown, GPIO_LOW);
+
+       /* platform specific disable */
+       if (pdata->chip_disable)
+               pdata->chip_disable(kim_gdata);
         return err;
  }
  
diff --git a/drivers/misc/ti-st/st_ll.c b/drivers/misc/ti-st/st_ll.c

index 3f2495138855457ac6dfc6cfa5de554ed47ee071..1ff460a8e9c74f67e584d634016fa25332fd351c 100644 (file)
--- a/drivers/misc/ti-st/st_ll.c
+++ b/drivers/misc/ti-st/st_ll.c
@@ -22,6 +22,7 @@
  #define pr_fmt(fmt) "(stll) :" fmt
  #include <linux/skbuff.h>
  #include <linux/module.h>
+#include <linux/platform_device.h>
  #include <linux/ti_wilink_st.h>
  
  /**********************************************************************/
@@ -37,6 +38,9 @@ static void send_ll_cmd(struct st_data_s *st_data,
  
  static void ll_device_want_to_sleep(struct st_data_s *st_data)
  {
+       struct kim_data_s       *kim_data;
+       struct ti_st_plat_data  *pdata;
+
         pr_debug("%s", __func__);
         /* sanity check */
         if (st_data->ll_state != ST_LL_AWAKE)
@@ -46,10 +50,19 @@ static void ll_device_want_to_sleep(struct st_data_s *st_data)
         send_ll_cmd(st_data, LL_SLEEP_ACK);
         /* update state */
         st_data->ll_state = ST_LL_ASLEEP;
+
+       /* communicate to platform about chip asleep */
+       kim_data = st_data->kim_data;
+       pdata = kim_data->kim_pdev->dev.platform_data;
+       if (pdata->chip_asleep)
+               pdata->chip_asleep(NULL);
  }
  
  static void ll_device_want_to_wakeup(struct st_data_s *st_data)
  {
+       struct kim_data_s       *kim_data;
+       struct ti_st_plat_data  *pdata;
+
         /* diff actions in diff states */
         switch (st_data->ll_state) {
         case ST_LL_ASLEEP:
@@ -70,6 +83,12 @@ static void ll_device_want_to_wakeup(struct st_data_s *st_data)
         }
         /* update state */
         st_data->ll_state = ST_LL_AWAKE;
+
+       /* communicate to platform about chip wakeup */
+       kim_data = st_data->kim_data;
+       pdata = kim_data->kim_pdev->dev.platform_data;
+       if (pdata->chip_asleep)
+               pdata->chip_awake(NULL);
  }
  
  /**********************************************************************/
diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c

index 006a5e9f8ab83803229638a37683acdda6e1d520..2bf229acd3b8110c3a03bc3dbb9783e2451ef099 100644 (file)
--- a/drivers/mmc/card/mmc_test.c
+++ b/drivers/mmc/card/mmc_test.c
@@ -224,7 +224,7 @@ static void mmc_test_prepare_mrq(struct mmc_test_card *test,
  static int mmc_test_busy(struct mmc_command *cmd)
  {
         return !(cmd->resp[0] & R1_READY_FOR_DATA) ||
-               (R1_CURRENT_STATE(cmd->resp[0]) == 7);
+               (R1_CURRENT_STATE(cmd->resp[0]) == R1_STATE_PRG);
  }
  
  /*
@@ -2900,7 +2900,7 @@ static const struct file_operations mmc_test_fops_testlist = {
         .release        = single_release,
  };
  
-static void mmc_test_free_file_test(struct mmc_card *card)
+static void mmc_test_free_dbgfs_file(struct mmc_card *card)
  {
         struct mmc_test_dbgfs_file *df, *dfs;
  
@@ -2917,34 +2917,21 @@ static void mmc_test_free_file_test(struct mmc_card *card)
         mutex_unlock(&mmc_test_lock);
  }
  
-static int mmc_test_register_file_test(struct mmc_card *card)
+static int __mmc_test_register_dbgfs_file(struct mmc_card *card,
+       const char *name, mode_t mode, const struct file_operations *fops)
  {
         struct dentry *file = NULL;
         struct mmc_test_dbgfs_file *df;
-       int ret = 0;
-
-       mutex_lock(&mmc_test_lock);
-
-       if (card->debugfs_root)
-               file = debugfs_create_file("test", S_IWUSR | S_IRUGO,
-                       card->debugfs_root, card, &mmc_test_fops_test);
-
-       if (IS_ERR_OR_NULL(file)) {
-               dev_err(&card->dev,
-                       "Can't create test. Perhaps debugfs is disabled.\n");
-               ret = -ENODEV;
-               goto err;
-       }
  
         if (card->debugfs_root)
-               file = debugfs_create_file("testlist", S_IRUGO,
-                       card->debugfs_root, card, &mmc_test_fops_testlist);
+               file = debugfs_create_file(name, mode, card->debugfs_root,
+                       card, fops);
  
         if (IS_ERR_OR_NULL(file)) {
                 dev_err(&card->dev,
-                       "Can't create testlist. Perhaps debugfs is disabled.\n");
-               ret = -ENODEV;
-               goto err;
+                       "Can't create %s. Perhaps debugfs is disabled.\n",
+                       name);
+               return -ENODEV;
         }
  
         df = kmalloc(sizeof(struct mmc_test_dbgfs_file), GFP_KERNEL);
@@ -2952,14 +2939,31 @@ static int mmc_test_register_file_test(struct mmc_card *card)
                 debugfs_remove(file);
                 dev_err(&card->dev,
                         "Can't allocate memory for internal usage.\n");
-               ret = -ENOMEM;
-               goto err;
+               return -ENOMEM;
         }
  
         df->card = card;
         df->file = file;
  
         list_add(&df->link, &mmc_test_file_test);
+       return 0;
+}
+
+static int mmc_test_register_dbgfs_file(struct mmc_card *card)
+{
+       int ret;
+
+       mutex_lock(&mmc_test_lock);
+
+       ret = __mmc_test_register_dbgfs_file(card, "test", S_IWUSR | S_IRUGO,
+               &mmc_test_fops_test);
+       if (ret)
+               goto err;
+
+       ret = __mmc_test_register_dbgfs_file(card, "testlist", S_IRUGO,
+               &mmc_test_fops_testlist);
+       if (ret)
+               goto err;
  
  err:
         mutex_unlock(&mmc_test_lock);
@@ -2974,7 +2978,7 @@ static int mmc_test_probe(struct mmc_card *card)
         if (!mmc_card_mmc(card) && !mmc_card_sd(card))
                 return -ENODEV;
  
-       ret = mmc_test_register_file_test(card);
+       ret = mmc_test_register_dbgfs_file(card);
         if (ret)
                 return ret;
  
@@ -2986,7 +2990,7 @@ static int mmc_test_probe(struct mmc_card *card)
  static void mmc_test_remove(struct mmc_card *card)
  {
         mmc_test_free_result(card);
-       mmc_test_free_file_test(card);
+       mmc_test_free_dbgfs_file(card);
  }
  
  static struct mmc_driver mmc_driver = {
@@ -3006,7 +3010,7 @@ static void __exit mmc_test_exit(void)
  {
         /* Clear stalled data if card is still plugged */
         mmc_test_free_result(NULL);
-       mmc_test_free_file_test(NULL);
+       mmc_test_free_dbgfs_file(NULL);
  
         mmc_unregister_driver(&mmc_driver);
  }
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c

index 89bdeaec7182904f83a4e3810925bcefffc3c55b..b27b94078c21888bd472346e92e04fcb8a87464e 100644 (file)
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -133,7 +133,7 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq)
                 if (mrq->done)
                         mrq->done(mrq);
  
-               mmc_host_clk_gate(host);
+               mmc_host_clk_release(host);
         }
  }
  
@@ -192,7 +192,7 @@ mmc_start_request(struct mmc_host *host, struct mmc_request *mrq)
                         mrq->stop->mrq = mrq;
                 }
         }
-       mmc_host_clk_ungate(host);
+       mmc_host_clk_hold(host);
         led_trigger_event(host->led, LED_FULL);
         host->ops->request(host, mrq);
  }
@@ -728,15 +728,17 @@ static inline void mmc_set_ios(struct mmc_host *host)
   */
  void mmc_set_chip_select(struct mmc_host *host, int mode)
  {
+       mmc_host_clk_hold(host);
         host->ios.chip_select = mode;
         mmc_set_ios(host);
+       mmc_host_clk_release(host);
  }
  
  /*
   * Sets the host clock to the highest possible frequency that
   * is below "hz".
   */
-void mmc_set_clock(struct mmc_host *host, unsigned int hz)
+static void __mmc_set_clock(struct mmc_host *host, unsigned int hz)
  {
         WARN_ON(hz < host->f_min);
  
@@ -747,6 +749,13 @@ void mmc_set_clock(struct mmc_host *host, unsigned int hz)
         mmc_set_ios(host);
  }
  
+void mmc_set_clock(struct mmc_host *host, unsigned int hz)
+{
+       mmc_host_clk_hold(host);
+       __mmc_set_clock(host, hz);
+       mmc_host_clk_release(host);
+}
+
  #ifdef CONFIG_MMC_CLKGATE
  /*
   * This gates the clock by setting it to 0 Hz.
@@ -779,7 +788,7 @@ void mmc_ungate_clock(struct mmc_host *host)
         if (host->clk_old) {
                 BUG_ON(host->ios.clock);
                 /* This call will also set host->clk_gated to false */
-               mmc_set_clock(host, host->clk_old);
+               __mmc_set_clock(host, host->clk_old);
         }
  }
  
@@ -807,8 +816,10 @@ void mmc_set_ungated(struct mmc_host *host)
   */
  void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode)
  {
+       mmc_host_clk_hold(host);
         host->ios.bus_mode = mode;
         mmc_set_ios(host);
+       mmc_host_clk_release(host);
  }
  
  /*
@@ -816,8 +827,10 @@ void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode)
   */
  void mmc_set_bus_width(struct mmc_host *host, unsigned int width)
  {
+       mmc_host_clk_hold(host);
         host->ios.bus_width = width;
         mmc_set_ios(host);
+       mmc_host_clk_release(host);
  }
  
  /**
@@ -1015,8 +1028,10 @@ u32 mmc_select_voltage(struct mmc_host *host, u32 ocr)
  
                 ocr &= 3 << bit;
  
+               mmc_host_clk_hold(host);
                 host->ios.vdd = bit;
                 mmc_set_ios(host);
+               mmc_host_clk_release(host);
         } else {
                 pr_warning("%s: host doesn't support card's voltages\n",
                                 mmc_hostname(host));
@@ -1063,8 +1078,10 @@ int mmc_set_signal_voltage(struct mmc_host *host, int signal_voltage, bool cmd11
   */
  void mmc_set_timing(struct mmc_host *host, unsigned int timing)
  {
+       mmc_host_clk_hold(host);
         host->ios.timing = timing;
         mmc_set_ios(host);
+       mmc_host_clk_release(host);
  }
  
  /*
@@ -1072,8 +1089,10 @@ void mmc_set_timing(struct mmc_host *host, unsigned int timing)
   */
  void mmc_set_driver_type(struct mmc_host *host, unsigned int drv_type)
  {
+       mmc_host_clk_hold(host);
         host->ios.drv_type = drv_type;
         mmc_set_ios(host);
+       mmc_host_clk_release(host);
  }
  
  /*
@@ -1091,6 +1110,8 @@ static void mmc_power_up(struct mmc_host *host)
  {
         int bit;
  
+       mmc_host_clk_hold(host);
+
         /* If ocr is set, we use it */
         if (host->ocr)
                 bit = ffs(host->ocr) - 1;
@@ -1126,10 +1147,14 @@ static void mmc_power_up(struct mmc_host *host)
          * time required to reach a stable voltage.
          */
         mmc_delay(10);
+
+       mmc_host_clk_release(host);
  }
  
  static void mmc_power_off(struct mmc_host *host)
  {
+       mmc_host_clk_hold(host);
+
         host->ios.clock = 0;
         host->ios.vdd = 0;
  
@@ -1147,6 +1172,8 @@ static void mmc_power_off(struct mmc_host *host)
         host->ios.bus_width = MMC_BUS_WIDTH_1;
         host->ios.timing = MMC_TIMING_LEGACY;
         mmc_set_ios(host);
+
+       mmc_host_clk_release(host);
  }
  
  /*
@@ -1502,7 +1529,7 @@ static int mmc_do_erase(struct mmc_card *card, unsigned int from,
                         goto out;
                 }
         } while (!(cmd.resp[0] & R1_READY_FOR_DATA) ||
-                R1_CURRENT_STATE(cmd.resp[0]) == 7);
+                R1_CURRENT_STATE(cmd.resp[0]) == R1_STATE_PRG);
  out:
         return err;
  }
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c

index b29d3e8fd3a2ad713525c4be82c9b2f9699910ec..793d0a0dad8d74d85d239da2c3b107e47a5d1b4e 100644 (file)
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -119,14 +119,14 @@ static void mmc_host_clk_gate_work(struct work_struct *work)
  }
  
  /**
- *     mmc_host_clk_ungate - ungate hardware MCI clocks
+ *     mmc_host_clk_hold - ungate hardware MCI clocks
   *     @host: host to ungate.
   *
   *     Makes sure the host ios.clock is restored to a non-zero value
   *     past this call. Increase clock reference count and ungate clock
   *     if we're the first user.
   */
-void mmc_host_clk_ungate(struct mmc_host *host)
+void mmc_host_clk_hold(struct mmc_host *host)
  {
         unsigned long flags;
  
@@ -164,14 +164,14 @@ static bool mmc_host_may_gate_card(struct mmc_card *card)
  }
  
  /**
- *     mmc_host_clk_gate - gate off hardware MCI clocks
+ *     mmc_host_clk_release - gate off hardware MCI clocks
   *     @host: host to gate.
   *
   *     Calls the host driver with ios.clock set to zero as often as possible
   *     in order to gate off hardware MCI clocks. Decrease clock reference
   *     count and schedule disabling of clock.
   */
-void mmc_host_clk_gate(struct mmc_host *host)
+void mmc_host_clk_release(struct mmc_host *host)
  {
         unsigned long flags;
  
@@ -179,7 +179,7 @@ void mmc_host_clk_gate(struct mmc_host *host)
         host->clk_requests--;
         if (mmc_host_may_gate_card(host->card) &&
             !host->clk_requests)
-               schedule_work(&host->clk_gate_work);
+               queue_work(system_nrt_wq, &host->clk_gate_work);
         spin_unlock_irqrestore(&host->clk_lock, flags);
  }
  
@@ -231,7 +231,7 @@ static inline void mmc_host_clk_exit(struct mmc_host *host)
         if (cancel_work_sync(&host->clk_gate_work))
                 mmc_host_clk_gate_delayed(host);
         if (host->clk_gated)
-               mmc_host_clk_ungate(host);
+               mmc_host_clk_hold(host);
         /* There should be only one user now */
         WARN_ON(host->clk_requests > 1);
  }
diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h

index de199f91192851b0d4f3162a6dcbe78199fa9e59..fb8a5cd2e4a1e87bfab9439f8c9e44a3d75ee4b4 100644 (file)
--- a/drivers/mmc/core/host.h
+++ b/drivers/mmc/core/host.h
@@ -16,16 +16,16 @@ int mmc_register_host_class(void);
  void mmc_unregister_host_class(void);
  
  #ifdef CONFIG_MMC_CLKGATE
-void mmc_host_clk_ungate(struct mmc_host *host);
-void mmc_host_clk_gate(struct mmc_host *host);
+void mmc_host_clk_hold(struct mmc_host *host);
+void mmc_host_clk_release(struct mmc_host *host);
  unsigned int mmc_host_clk_rate(struct mmc_host *host);
  
  #else
-static inline void mmc_host_clk_ungate(struct mmc_host *host)
+static inline void mmc_host_clk_hold(struct mmc_host *host)
  {
  }
  
-static inline void mmc_host_clk_gate(struct mmc_host *host)
+static inline void mmc_host_clk_release(struct mmc_host *host)
  {
  }
  
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c

index aa7d1d79b8c554c143c004db5e3ef9565ca653dd..5700b1cbdfec05ffc606879d4da293d24f63484d 100644 (file)
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -259,7 +259,7 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
         }
  
         card->ext_csd.rev = ext_csd[EXT_CSD_REV];
-       if (card->ext_csd.rev > 5) {
+       if (card->ext_csd.rev > 6) {
                 printk(KERN_ERR "%s: unrecognised EXT_CSD revision %d\n",
                         mmc_hostname(card->host), card->ext_csd.rev);
                 err = -EINVAL;
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c

index 845ce7c533b901b0895a6bf4c5715c676c5cd902..770c3d06f5dcd1a01814065464d7c20a6f7d051f 100644 (file)
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -407,7 +407,7 @@ int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
                         break;
                 if (mmc_host_is_spi(card->host))
                         break;
-       } while (R1_CURRENT_STATE(status) == 7);
+       } while (R1_CURRENT_STATE(status) == R1_STATE_PRG);
  
         if (mmc_host_is_spi(card->host)) {
                 if (status & R1_SPI_ILLEGAL_COMMAND)
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c

index 633975ff2bb395f6e2e883c2e7ce05fb51ed8df2..0370e03e314253f027a36aa994591ea2817f0826 100644 (file)
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -469,56 +469,75 @@ static int sd_select_driver_type(struct mmc_card *card, u8 *status)
         return 0;
  }
  
-static int sd_set_bus_speed_mode(struct mmc_card *card, u8 *status)
+static void sd_update_bus_speed_mode(struct mmc_card *card)
  {
-       unsigned int bus_speed = 0, timing = 0;
-       int err;
-
         /*
          * If the host doesn't support any of the UHS-I modes, fallback on
          * default speed.
          */
         if (!(card->host->caps & (MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 |
-           MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_DDR50)))
-               return 0;
+           MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_DDR50))) {
+               card->sd_bus_speed = 0;
+               return;
+       }
  
         if ((card->host->caps & MMC_CAP_UHS_SDR104) &&
             (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR104)) {
-                       bus_speed = UHS_SDR104_BUS_SPEED;
-                       timing = MMC_TIMING_UHS_SDR104;
-                       card->sw_caps.uhs_max_dtr = UHS_SDR104_MAX_DTR;
+                       card->sd_bus_speed = UHS_SDR104_BUS_SPEED;
         } else if ((card->host->caps & MMC_CAP_UHS_DDR50) &&
                    (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_DDR50)) {
-                       bus_speed = UHS_DDR50_BUS_SPEED;
-                       timing = MMC_TIMING_UHS_DDR50;
-                       card->sw_caps.uhs_max_dtr = UHS_DDR50_MAX_DTR;
+                       card->sd_bus_speed = UHS_DDR50_BUS_SPEED;
         } else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
                     MMC_CAP_UHS_SDR50)) && (card->sw_caps.sd3_bus_mode &
                     SD_MODE_UHS_SDR50)) {
-                       bus_speed = UHS_SDR50_BUS_SPEED;
-                       timing = MMC_TIMING_UHS_SDR50;
-                       card->sw_caps.uhs_max_dtr = UHS_SDR50_MAX_DTR;
+                       card->sd_bus_speed = UHS_SDR50_BUS_SPEED;
         } else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
                     MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR25)) &&
                    (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR25)) {
-                       bus_speed = UHS_SDR25_BUS_SPEED;
-                       timing = MMC_TIMING_UHS_SDR25;
-                       card->sw_caps.uhs_max_dtr = UHS_SDR25_MAX_DTR;
+                       card->sd_bus_speed = UHS_SDR25_BUS_SPEED;
         } else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
                     MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR25 |
                     MMC_CAP_UHS_SDR12)) && (card->sw_caps.sd3_bus_mode &
                     SD_MODE_UHS_SDR12)) {
-                       bus_speed = UHS_SDR12_BUS_SPEED;
-                       timing = MMC_TIMING_UHS_SDR12;
-                       card->sw_caps.uhs_max_dtr = UHS_SDR12_MAX_DTR;
+                       card->sd_bus_speed = UHS_SDR12_BUS_SPEED;
+       }
+}
+
+static int sd_set_bus_speed_mode(struct mmc_card *card, u8 *status)
+{
+       int err;
+       unsigned int timing = 0;
+
+       switch (card->sd_bus_speed) {
+       case UHS_SDR104_BUS_SPEED:
+               timing = MMC_TIMING_UHS_SDR104;
+               card->sw_caps.uhs_max_dtr = UHS_SDR104_MAX_DTR;
+               break;
+       case UHS_DDR50_BUS_SPEED:
+               timing = MMC_TIMING_UHS_DDR50;
+               card->sw_caps.uhs_max_dtr = UHS_DDR50_MAX_DTR;
+               break;
+       case UHS_SDR50_BUS_SPEED:
+               timing = MMC_TIMING_UHS_SDR50;
+               card->sw_caps.uhs_max_dtr = UHS_SDR50_MAX_DTR;
+               break;
+       case UHS_SDR25_BUS_SPEED:
+               timing = MMC_TIMING_UHS_SDR25;
+               card->sw_caps.uhs_max_dtr = UHS_SDR25_MAX_DTR;
+               break;
+       case UHS_SDR12_BUS_SPEED:
+               timing = MMC_TIMING_UHS_SDR12;
+               card->sw_caps.uhs_max_dtr = UHS_SDR12_MAX_DTR;
+               break;
+       default:
+               return 0;
         }
  
-       card->sd_bus_speed = bus_speed;
-       err = mmc_sd_switch(card, 1, 0, bus_speed, status);
+       err = mmc_sd_switch(card, 1, 0, card->sd_bus_speed, status);
         if (err)
                 return err;
  
-       if ((status[16] & 0xF) != bus_speed)
+       if ((status[16] & 0xF) != card->sd_bus_speed)
                 printk(KERN_WARNING "%s: Problem setting bus speed mode!\n",
                         mmc_hostname(card->host));
         else {
@@ -618,18 +637,24 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card)
                 mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4);
         }
  
+       /*
+        * Select the bus speed mode depending on host
+        * and card capability.
+        */
+       sd_update_bus_speed_mode(card);
+
         /* Set the driver strength for the card */
         err = sd_select_driver_type(card, status);
         if (err)
                 goto out;
  
-       /* Set bus speed mode of the card */
-       err = sd_set_bus_speed_mode(card, status);
+       /* Set current limit for the card */
+       err = sd_set_current_limit(card, status);
         if (err)
                 goto out;
  
-       /* Set current limit for the card */
-       err = sd_set_current_limit(card, status);
+       /* Set bus speed mode of the card */
+       err = sd_set_bus_speed_mode(card, status);
         if (err)
                 goto out;
  
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c

index 77f0b6b1681ddc8ba74658a06a4aa8ad62276053..ff0f714b012c0babf2b9faddcc36fe4647c87892 100644 (file)
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -62,7 +62,7 @@ struct idmac_desc {
  
         u32             des1;   /* Buffer sizes */
  #define IDMAC_SET_BUFFER1_SIZE(d, s) \
-       ((d)->des1 = ((d)->des1 & 0x03ffc000) | ((s) & 0x3fff))
+       ((d)->des1 = ((d)->des1 & 0x03ffe000) | ((s) & 0x1fff))
  
         u32             des2;   /* buffer 1 physical address */
  
@@ -699,7 +699,7 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
         }
  
         /* DDR mode set */
-       if (ios->ddr) {
+       if (ios->timing == MMC_TIMING_UHS_DDR50) {
                 regs = mci_readl(slot->host, UHS_REG);
                 regs |= (0x1 << slot->id) << 16;
                 mci_writel(slot->host, UHS_REG, regs);
@@ -1646,7 +1646,7 @@ static int __init dw_mci_init_slot(struct dw_mci *host, unsigned int id)
                         mmc->caps |= MMC_CAP_4_BIT_DATA;
  
         if (host->pdata->quirks & DW_MCI_QUIRK_HIGHSPEED)
-               mmc->caps |= MMC_CAP_SD_HIGHSPEED;
+               mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED;
  
  #ifdef CONFIG_MMC_DW_IDMAC
         mmc->max_segs = host->ring_size;
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c

index 9ebfb4b482f5983870169b1b4f0662b1fd4c7e24..4dc0028086a34357ef5b5578678ce7ca4a1ccdd6 100644 (file)
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -16,6 +16,7 @@
  #include <linux/err.h>
  #include <linux/clk.h>
  #include <linux/gpio.h>
+#include <linux/module.h>
  #include <linux/slab.h>
  #include <linux/mmc/host.h>
  #include <linux/mmc/mmc.h>
@@ -27,6 +28,7 @@
  #include "sdhci-pltfm.h"
  #include "sdhci-esdhc.h"
  
+#define        SDHCI_CTRL_D3CD                 0x08
  /* VENDOR SPEC register */
  #define SDHCI_VENDOR_SPEC              0xC0
  #define  SDHCI_VENDOR_SPEC_SDIO_QUIRK  0x00000002
@@ -141,13 +143,32 @@ static void esdhc_writel_le(struct sdhci_host *host, u32 val, int reg)
         struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
         struct pltfm_imx_data *imx_data = pltfm_host->priv;
         struct esdhc_platform_data *boarddata = &imx_data->boarddata;
-
-       if (unlikely((reg == SDHCI_INT_ENABLE || reg == SDHCI_SIGNAL_ENABLE)
-                       && (boarddata->cd_type == ESDHC_CD_GPIO)))
-               /*
-                * these interrupts won't work with a custom card_detect gpio
-                */
-               val &= ~(SDHCI_INT_CARD_REMOVE | SDHCI_INT_CARD_INSERT);
+       u32 data;
+
+       if (unlikely(reg == SDHCI_INT_ENABLE || reg == SDHCI_SIGNAL_ENABLE)) {
+               if (boarddata->cd_type == ESDHC_CD_GPIO)
+                       /*
+                        * These interrupts won't work with a custom
+                        * card_detect gpio (only applied to mx25/35)
+                        */
+                       val &= ~(SDHCI_INT_CARD_REMOVE | SDHCI_INT_CARD_INSERT);
+
+               if (val & SDHCI_INT_CARD_INT) {
+                       /*
+                        * Clear and then set D3CD bit to avoid missing the
+                        * card interrupt.  This is a eSDHC controller problem
+                        * so we need to apply the following workaround: clear
+                        * and set D3CD bit will make eSDHC re-sample the card
+                        * interrupt. In case a card interrupt was lost,
+                        * re-sample it by the following steps.
+                        */
+                       data = readl(host->ioaddr + SDHCI_HOST_CONTROL);
+                       data &= ~SDHCI_CTRL_D3CD;
+                       writel(data, host->ioaddr + SDHCI_HOST_CONTROL);
+                       data |= SDHCI_CTRL_D3CD;
+                       writel(data, host->ioaddr + SDHCI_HOST_CONTROL);
+               }
+       }
  
         if (unlikely((imx_data->flags & ESDHC_FLAG_MULTIBLK_NO_INT)
                                 && (reg == SDHCI_INT_STATUS)
@@ -217,8 +238,10 @@ static void esdhc_writeb_le(struct sdhci_host *host, u8 val, int reg)
                  */
                 return;
         case SDHCI_HOST_CONTROL:
-               /* FSL messed up here, so we can just keep those two */
-               new_val = val & (SDHCI_CTRL_LED | SDHCI_CTRL_4BITBUS);
+               /* FSL messed up here, so we can just keep those three */
+               new_val = val & (SDHCI_CTRL_LED | \
+                               SDHCI_CTRL_4BITBUS | \
+                               SDHCI_CTRL_D3CD);
                 /* ensure the endianess */
                 new_val |= ESDHC_HOST_CONTROL_LE;
                 /* DMA mode bits are shifted */
diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c

index 4198dbbc5c205f76c54e4b96c31a71901cf7f213..fc7e4a51562934064bf4345bbe0851461d011362 100644 (file)
--- a/drivers/mmc/host/sdhci-pxav3.c
+++ b/drivers/mmc/host/sdhci-pxav3.c
@@ -195,7 +195,8 @@ static int __devinit sdhci_pxav3_probe(struct platform_device *pdev)
         clk_enable(clk);
  
         host->quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL
-               | SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC;
+               | SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC
+               | SDHCI_QUIRK_32BIT_ADMA_SIZE;
  
         /* enable 1/8V DDR capable */
         host->mmc->caps |= MMC_CAP_1_8V_DDR;
diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c

index 460ffaf0f6d7ebaa86f64a205292504e97965d1d..fe886d6c474a41718fb4b388c90c14ad26bf823e 100644 (file)
--- a/drivers/mmc/host/sdhci-s3c.c
+++ b/drivers/mmc/host/sdhci-s3c.c
@@ -19,6 +19,7 @@
  #include <linux/clk.h>
  #include <linux/io.h>
  #include <linux/gpio.h>
+#include <linux/module.h>
  
  #include <linux/mmc/host.h>
  
@@ -301,6 +302,8 @@ static int sdhci_s3c_platform_8bit_width(struct sdhci_host *host, int width)
                 ctrl &= ~SDHCI_CTRL_8BITBUS;
                 break;
         default:
+               ctrl &= ~SDHCI_CTRL_4BITBUS;
+               ctrl &= ~SDHCI_CTRL_8BITBUS;
                 break;
         }
  
@@ -502,6 +505,9 @@ static int __devinit sdhci_s3c_probe(struct platform_device *pdev)
         /* This host supports the Auto CMD12 */
         host->quirks |= SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12;
  
+       /* Samsung SoCs need BROKEN_ADMA_ZEROLEN_DESC */
+       host->quirks |= SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC;
+
         if (pdata->cd_type == S3C_SDHCI_CD_NONE ||
             pdata->cd_type == S3C_SDHCI_CD_PERMANENT)
                 host->quirks |= SDHCI_QUIRK_BROKEN_CARD_DETECTION;
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c

index c31a3343340d34779b77c4a00b280da47b942816..0e02cc1df12e40b66b4302ee9af6d69a5f9947fe 100644 (file)
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -628,12 +628,11 @@ static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd)
         /* timeout in us */
         if (!data)
                 target_timeout = cmd->cmd_timeout_ms * 1000;
-       else
-               target_timeout = data->timeout_ns / 1000 +
-                       data->timeout_clks / host->clock;
-
-       if (host->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK)
-               host->timeout_clk = host->clock / 1000;
+       else {
+               target_timeout = data->timeout_ns / 1000;
+               if (host->clock)
+                       target_timeout += data->timeout_clks / host->clock;
+       }
  
         /*
          * Figure out needed cycles.
@@ -645,7 +644,6 @@ static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd)
          *     =>
          *     (1) / (2) > 2^6
          */
-       BUG_ON(!host->timeout_clk);
         count = 0;
         current_timeout = (1 << 13) * 1000 / host->timeout_clk;
         while (current_timeout < target_timeout) {
@@ -1867,9 +1865,6 @@ static void sdhci_tasklet_finish(unsigned long param)
  
         del_timer(&host->timer);
  
-       if (host->version >= SDHCI_SPEC_300)
-               del_timer(&host->tuning_timer);
-
         mrq = host->mrq;
  
         /*
@@ -2461,22 +2456,6 @@ int sdhci_add_host(struct sdhci_host *host)
                 host->max_clk = host->ops->get_max_clock(host);
         }
  
-       host->timeout_clk =
-               (caps[0] & SDHCI_TIMEOUT_CLK_MASK) >> SDHCI_TIMEOUT_CLK_SHIFT;
-       if (host->timeout_clk == 0) {
-               if (host->ops->get_timeout_clock) {
-                       host->timeout_clk = host->ops->get_timeout_clock(host);
-               } else if (!(host->quirks &
-                               SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK)) {
-                       printk(KERN_ERR
-                              "%s: Hardware doesn't specify timeout clock "
-                              "frequency.\n", mmc_hostname(mmc));
-                       return -ENODEV;
-               }
-       }
-       if (caps[0] & SDHCI_TIMEOUT_CLK_UNIT)
-               host->timeout_clk *= 1000;
-
         /*
          * In case of Host Controller v3.00, find out whether clock
          * multiplier is supported.
@@ -2509,10 +2488,26 @@ int sdhci_add_host(struct sdhci_host *host)
         } else
                 mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_200;
  
+       host->timeout_clk =
+               (caps[0] & SDHCI_TIMEOUT_CLK_MASK) >> SDHCI_TIMEOUT_CLK_SHIFT;
+       if (host->timeout_clk == 0) {
+               if (host->ops->get_timeout_clock) {
+                       host->timeout_clk = host->ops->get_timeout_clock(host);
+               } else if (!(host->quirks &
+                               SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK)) {
+                       printk(KERN_ERR
+                              "%s: Hardware doesn't specify timeout clock "
+                              "frequency.\n", mmc_hostname(mmc));
+                       return -ENODEV;
+               }
+       }
+       if (caps[0] & SDHCI_TIMEOUT_CLK_UNIT)
+               host->timeout_clk *= 1000;
+
         if (host->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK)
-               mmc->max_discard_to = (1 << 27) / (mmc->f_max / 1000);
-       else
-               mmc->max_discard_to = (1 << 27) / host->timeout_clk;
+               host->timeout_clk = mmc->f_max / 1000;
+
+       mmc->max_discard_to = (1 << 27) / host->timeout_clk;
  
         mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE | MMC_CAP_CMD23;
  
diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c

index 774f6439d7ce06be72eee93ee1896e0e32fab9ea..0c4a672f5db618bd68c96b4bb7a6239bdf7f12aa 100644 (file)
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c
@@ -120,11 +120,11 @@ static int __devinit sh_mobile_sdhi_probe(struct platform_device *pdev)
         mmc_data->hclk = clk_get_rate(priv->clk);
         mmc_data->set_pwr = sh_mobile_sdhi_set_pwr;
         mmc_data->get_cd = sh_mobile_sdhi_get_cd;
-       if (mmc_data->flags & TMIO_MMC_HAS_IDLE_WAIT)
-               mmc_data->write16_hook = sh_mobile_sdhi_write16_hook;
         mmc_data->capabilities = MMC_CAP_MMC_HIGHSPEED;
         if (p) {
                 mmc_data->flags = p->tmio_flags;
+               if (mmc_data->flags & TMIO_MMC_HAS_IDLE_WAIT)
+                       mmc_data->write16_hook = sh_mobile_sdhi_write16_hook;
                 mmc_data->ocr_mask = p->tmio_ocr_mask;
                 mmc_data->capabilities |= p->tmio_caps;
  
diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c

index 8d185de90d207ded1a256b32cdd69ae11d6ec2c6..44a9668c4b7ad759483b286569470a105d7ecc53 100644 (file)
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -27,7 +27,6 @@
  static int tmio_mmc_suspend(struct platform_device *dev, pm_message_t state)
  {
         const struct mfd_cell *cell = mfd_get_cell(dev);
-       struct mmc_host *mmc = platform_get_drvdata(dev);
         int ret;
  
         ret = tmio_mmc_host_suspend(&dev->dev);
@@ -42,7 +41,6 @@ static int tmio_mmc_suspend(struct platform_device *dev, pm_message_t state)
  static int tmio_mmc_resume(struct platform_device *dev)
  {
         const struct mfd_cell *cell = mfd_get_cell(dev);
-       struct mmc_host *mmc = platform_get_drvdata(dev);
         int ret = 0;
  
         /* Tell the MFD core we are ready to be enabled */
diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h

index 65b5b76cc3794102af8229bd239499829705f61a..64fbb002182518310e56693cc88b294465cc2383 100644 (file)
--- a/drivers/mtd/ubi/debug.h
+++ b/drivers/mtd/ubi/debug.h
@@ -181,7 +181,7 @@ static inline int ubi_dbg_is_erase_failure(const struct ubi_device *ubi)
  
  #define ubi_dbg_msg(fmt, ...) do {                                           \
         if (0)                                                               \
-               pr_debug(fmt "\n", ##__VA_ARGS__);                           \
+               printk(KERN_DEBUG fmt "\n", ##__VA_ARGS__);                  \
  } while (0)
  
  #define dbg_msg(fmt, ...)  ubi_dbg_msg(fmt, ##__VA_ARGS__)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig

index 8d0314dbd946c99141d0870542376c367bbf35cb..a44874e24f2a81a987426171b901a5e201257ae2 100644 (file)
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2535,7 +2535,7 @@ config S6GMAC
  source "drivers/net/stmmac/Kconfig"
  
  config PCH_GBE
-       tristate "Intel EG20T PCH / OKI SEMICONDUCTOR ML7223 IOH GbE"
+       tristate "Intel EG20T PCH/OKI SEMICONDUCTOR IOH(ML7223/ML7831) GbE"
         depends on PCI
         select MII
         ---help---
@@ -2548,10 +2548,11 @@ config PCH_GBE
           This driver enables Gigabit Ethernet function.
  
           This driver also can be used for OKI SEMICONDUCTOR IOH(Input/
-         Output Hub), ML7223.
-         ML7223 IOH is for MP(Media Phone) use.
-         ML7223 is companion chip for Intel Atom E6xx series.
-         ML7223 is completely compatible for Intel EG20T PCH.
+         Output Hub), ML7223/ML7831.
+         ML7223 IOH is for MP(Media Phone) use. ML7831 IOH is for general
+         purpose use.
+         ML7223/ML7831 is companion chip for Intel Atom E6xx series.
+         ML7223/ML7831 is completely compatible for Intel EG20T PCH.
  
  config FTGMAC100
         tristate "Faraday FTGMAC100 Gigabit Ethernet support"
diff --git a/drivers/net/arm/am79c961a.c b/drivers/net/arm/am79c961a.c

index 52fe21e1e2cd11b20f50d3c59c7672818ac485bc..3b1416e3d217a51ca0c53f05816994073a49ed33 100644 (file)
--- a/drivers/net/arm/am79c961a.c
+++ b/drivers/net/arm/am79c961a.c
@@ -308,8 +308,11 @@ static void am79c961_timer(unsigned long data)
         struct net_device *dev = (struct net_device *)data;
         struct dev_priv *priv = netdev_priv(dev);
         unsigned int lnkstat, carrier;
+       unsigned long flags;
  
+       spin_lock_irqsave(&priv->chip_lock, flags);
         lnkstat = read_ireg(dev->base_addr, ISALED0) & ISALED0_LNKST;
+       spin_unlock_irqrestore(&priv->chip_lock, flags);
         carrier = netif_carrier_ok(dev);
  
         if (lnkstat && !carrier) {
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h

index c423504a755f555703c88bbe303820eb532ea1a2..e46df5331c55f4b73d9981270c0aa61769bed784 100644 (file)
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -315,6 +315,14 @@ union db_prod {
         u32             raw;
  };
  
+/* dropless fc FW/HW related params */
+#define BRB_SIZE(bp)           (CHIP_IS_E3(bp) ? 1024 : 512)
+#define MAX_AGG_QS(bp)         (CHIP_IS_E1(bp) ? \
+                                       ETH_MAX_AGGREGATION_QUEUES_E1 :\
+                                       ETH_MAX_AGGREGATION_QUEUES_E1H_E2)
+#define FW_DROP_LEVEL(bp)      (3 + MAX_SPQ_PENDING + MAX_AGG_QS(bp))
+#define FW_PREFETCH_CNT                16
+#define DROPLESS_FC_HEADROOM   100
  
  /* MC hsi */
  #define BCM_PAGE_SHIFT         12
@@ -331,15 +339,35 @@ union db_prod {
  /* SGE ring related macros */
  #define NUM_RX_SGE_PAGES       2
  #define RX_SGE_CNT             (BCM_PAGE_SIZE / sizeof(struct eth_rx_sge))
-#define MAX_RX_SGE_CNT         (RX_SGE_CNT - 2)
+#define NEXT_PAGE_SGE_DESC_CNT 2
+#define MAX_RX_SGE_CNT         (RX_SGE_CNT - NEXT_PAGE_SGE_DESC_CNT)
  /* RX_SGE_CNT is promised to be a power of 2 */
  #define RX_SGE_MASK            (RX_SGE_CNT - 1)
  #define NUM_RX_SGE             (RX_SGE_CNT * NUM_RX_SGE_PAGES)
  #define MAX_RX_SGE             (NUM_RX_SGE - 1)
  #define NEXT_SGE_IDX(x)                ((((x) & RX_SGE_MASK) == \
-                                 (MAX_RX_SGE_CNT - 1)) ? (x) + 3 : (x) + 1)
+                                 (MAX_RX_SGE_CNT - 1)) ? \
+                                       (x) + 1 + NEXT_PAGE_SGE_DESC_CNT : \
+                                       (x) + 1)
  #define RX_SGE(x)              ((x) & MAX_RX_SGE)
  
+/*
+ * Number of required  SGEs is the sum of two:
+ * 1. Number of possible opened aggregations (next packet for
+ *    these aggregations will probably consume SGE immidiatelly)
+ * 2. Rest of BRB blocks divided by 2 (block will consume new SGE only
+ *    after placement on BD for new TPA aggregation)
+ *
+ * Takes into account NEXT_PAGE_SGE_DESC_CNT "next" elements on each page
+ */
+#define NUM_SGE_REQ            (MAX_AGG_QS(bp) + \
+                                       (BRB_SIZE(bp) - MAX_AGG_QS(bp)) / 2)
+#define NUM_SGE_PG_REQ         ((NUM_SGE_REQ + MAX_RX_SGE_CNT - 1) / \
+                                               MAX_RX_SGE_CNT)
+#define SGE_TH_LO(bp)          (NUM_SGE_REQ + \
+                                NUM_SGE_PG_REQ * NEXT_PAGE_SGE_DESC_CNT)
+#define SGE_TH_HI(bp)          (SGE_TH_LO(bp) + DROPLESS_FC_HEADROOM)
+
  /* Manipulate a bit vector defined as an array of u64 */
  
  /* Number of bits in one sge_mask array element */
@@ -551,24 +579,43 @@ struct bnx2x_fastpath {
  
  #define NUM_TX_RINGS           16
  #define TX_DESC_CNT            (BCM_PAGE_SIZE / sizeof(union eth_tx_bd_types))
-#define MAX_TX_DESC_CNT                (TX_DESC_CNT - 1)
+#define NEXT_PAGE_TX_DESC_CNT  1
+#define MAX_TX_DESC_CNT                (TX_DESC_CNT - NEXT_PAGE_TX_DESC_CNT)
  #define NUM_TX_BD              (TX_DESC_CNT * NUM_TX_RINGS)
  #define MAX_TX_BD              (NUM_TX_BD - 1)
  #define MAX_TX_AVAIL           (MAX_TX_DESC_CNT * NUM_TX_RINGS - 2)
  #define NEXT_TX_IDX(x)         ((((x) & MAX_TX_DESC_CNT) == \
-                                 (MAX_TX_DESC_CNT - 1)) ? (x) + 2 : (x) + 1)
+                                 (MAX_TX_DESC_CNT - 1)) ? \
+                                       (x) + 1 + NEXT_PAGE_TX_DESC_CNT : \
+                                       (x) + 1)
  #define TX_BD(x)               ((x) & MAX_TX_BD)
  #define TX_BD_POFF(x)          ((x) & MAX_TX_DESC_CNT)
  
  /* The RX BD ring is special, each bd is 8 bytes but the last one is 16 */
  #define NUM_RX_RINGS           8
  #define RX_DESC_CNT            (BCM_PAGE_SIZE / sizeof(struct eth_rx_bd))
-#define MAX_RX_DESC_CNT                (RX_DESC_CNT - 2)
+#define NEXT_PAGE_RX_DESC_CNT  2
+#define MAX_RX_DESC_CNT                (RX_DESC_CNT - NEXT_PAGE_RX_DESC_CNT)
  #define RX_DESC_MASK           (RX_DESC_CNT - 1)
  #define NUM_RX_BD              (RX_DESC_CNT * NUM_RX_RINGS)
  #define MAX_RX_BD              (NUM_RX_BD - 1)
  #define MAX_RX_AVAIL           (MAX_RX_DESC_CNT * NUM_RX_RINGS - 2)
-#define MIN_RX_AVAIL           128
+
+/* dropless fc calculations for BDs
+ *
+ * Number of BDs should as number of buffers in BRB:
+ * Low threshold takes into account NEXT_PAGE_RX_DESC_CNT
+ * "next" elements on each page
+ */
+#define NUM_BD_REQ             BRB_SIZE(bp)
+#define NUM_BD_PG_REQ          ((NUM_BD_REQ + MAX_RX_DESC_CNT - 1) / \
+                                             MAX_RX_DESC_CNT)
+#define BD_TH_LO(bp)           (NUM_BD_REQ + \
+                                NUM_BD_PG_REQ * NEXT_PAGE_RX_DESC_CNT + \
+                                FW_DROP_LEVEL(bp))
+#define BD_TH_HI(bp)           (BD_TH_LO(bp) + DROPLESS_FC_HEADROOM)
+
+#define MIN_RX_AVAIL           ((bp)->dropless_fc ? BD_TH_HI(bp) + 128 : 128)
  
  #define MIN_RX_SIZE_TPA_HW     (CHIP_IS_E1(bp) ? \
                                         ETH_MIN_RX_CQES_WITH_TPA_E1 : \
@@ -579,7 +626,9 @@ struct bnx2x_fastpath {
                                                                 MIN_RX_AVAIL))
  
  #define NEXT_RX_IDX(x)         ((((x) & RX_DESC_MASK) == \
-                                 (MAX_RX_DESC_CNT - 1)) ? (x) + 3 : (x) + 1)
+                                 (MAX_RX_DESC_CNT - 1)) ? \
+                                       (x) + 1 + NEXT_PAGE_RX_DESC_CNT : \
+                                       (x) + 1)
  #define RX_BD(x)               ((x) & MAX_RX_BD)
  
  /*
@@ -589,14 +638,31 @@ struct bnx2x_fastpath {
  #define CQE_BD_REL     (sizeof(union eth_rx_cqe) / sizeof(struct eth_rx_bd))
  #define NUM_RCQ_RINGS          (NUM_RX_RINGS * CQE_BD_REL)
  #define RCQ_DESC_CNT           (BCM_PAGE_SIZE / sizeof(union eth_rx_cqe))
-#define MAX_RCQ_DESC_CNT       (RCQ_DESC_CNT - 1)
+#define NEXT_PAGE_RCQ_DESC_CNT 1
+#define MAX_RCQ_DESC_CNT       (RCQ_DESC_CNT - NEXT_PAGE_RCQ_DESC_CNT)
  #define NUM_RCQ_BD             (RCQ_DESC_CNT * NUM_RCQ_RINGS)
  #define MAX_RCQ_BD             (NUM_RCQ_BD - 1)
  #define MAX_RCQ_AVAIL          (MAX_RCQ_DESC_CNT * NUM_RCQ_RINGS - 2)
  #define NEXT_RCQ_IDX(x)                ((((x) & MAX_RCQ_DESC_CNT) == \
-                                 (MAX_RCQ_DESC_CNT - 1)) ? (x) + 2 : (x) + 1)
+                                 (MAX_RCQ_DESC_CNT - 1)) ? \
+                                       (x) + 1 + NEXT_PAGE_RCQ_DESC_CNT : \
+                                       (x) + 1)
  #define RCQ_BD(x)              ((x) & MAX_RCQ_BD)
  
+/* dropless fc calculations for RCQs
+ *
+ * Number of RCQs should be as number of buffers in BRB:
+ * Low threshold takes into account NEXT_PAGE_RCQ_DESC_CNT
+ * "next" elements on each page
+ */
+#define NUM_RCQ_REQ            BRB_SIZE(bp)
+#define NUM_RCQ_PG_REQ         ((NUM_BD_REQ + MAX_RCQ_DESC_CNT - 1) / \
+                                             MAX_RCQ_DESC_CNT)
+#define RCQ_TH_LO(bp)          (NUM_RCQ_REQ + \
+                                NUM_RCQ_PG_REQ * NEXT_PAGE_RCQ_DESC_CNT + \
+                                FW_DROP_LEVEL(bp))
+#define RCQ_TH_HI(bp)          (RCQ_TH_LO(bp) + DROPLESS_FC_HEADROOM)
+
  
  /* This is needed for determining of last_max */
  #define SUB_S16(a, b)          (s16)((s16)(a) - (s16)(b))
@@ -685,24 +751,17 @@ struct bnx2x_fastpath {
  #define FP_CSB_FUNC_OFF        \
                         offsetof(struct cstorm_status_block_c, func)
  
-#define HC_INDEX_TOE_RX_CQ_CONS                0 /* Formerly Ustorm TOE CQ index */
-                                         /* (HC_INDEX_U_TOE_RX_CQ_CONS)  */
-#define HC_INDEX_ETH_RX_CQ_CONS                1 /* Formerly Ustorm ETH CQ index */
-                                         /* (HC_INDEX_U_ETH_RX_CQ_CONS)  */
-#define HC_INDEX_ETH_RX_BD_CONS                2 /* Formerly Ustorm ETH BD index */
-                                         /* (HC_INDEX_U_ETH_RX_BD_CONS)  */
-
-#define HC_INDEX_TOE_TX_CQ_CONS                4 /* Formerly Cstorm TOE CQ index   */
-                                         /* (HC_INDEX_C_TOE_TX_CQ_CONS)    */
-#define HC_INDEX_ETH_TX_CQ_CONS_COS0   5 /* Formerly Cstorm ETH CQ index   */
-                                         /* (HC_INDEX_C_ETH_TX_CQ_CONS)    */
-#define HC_INDEX_ETH_TX_CQ_CONS_COS1   6 /* Formerly Cstorm ETH CQ index   */
-                                         /* (HC_INDEX_C_ETH_TX_CQ_CONS)    */
-#define HC_INDEX_ETH_TX_CQ_CONS_COS2   7 /* Formerly Cstorm ETH CQ index   */
-                                         /* (HC_INDEX_C_ETH_TX_CQ_CONS)    */
+#define HC_INDEX_ETH_RX_CQ_CONS                1
  
-#define HC_INDEX_ETH_FIRST_TX_CQ_CONS  HC_INDEX_ETH_TX_CQ_CONS_COS0
+#define HC_INDEX_OOO_TX_CQ_CONS                4
  
+#define HC_INDEX_ETH_TX_CQ_CONS_COS0   5
+
+#define HC_INDEX_ETH_TX_CQ_CONS_COS1   6
+
+#define HC_INDEX_ETH_TX_CQ_CONS_COS2   7
+
+#define HC_INDEX_ETH_FIRST_TX_CQ_CONS  HC_INDEX_ETH_TX_CQ_CONS_COS0
  
  #define BNX2X_RX_SB_INDEX \
         (&fp->sb_index_values[HC_INDEX_ETH_RX_CQ_CONS])
@@ -1100,11 +1159,12 @@ struct bnx2x {
  #define BP_PORT(bp)                    (bp->pfid & 1)
  #define BP_FUNC(bp)                    (bp->pfid)
  #define BP_ABS_FUNC(bp)                        (bp->pf_num)
-#define BP_E1HVN(bp)                   (bp->pfid >> 1)
-#define BP_VN(bp)                      (BP_E1HVN(bp)) /*remove when approved*/
-#define BP_L_ID(bp)                    (BP_E1HVN(bp) << 2)
-#define BP_FW_MB_IDX(bp)               (BP_PORT(bp) +\
-         BP_VN(bp) * ((CHIP_IS_E1x(bp) || (CHIP_MODE_IS_4_PORT(bp))) ? 2  : 1))
+#define BP_VN(bp)                      ((bp)->pfid >> 1)
+#define BP_MAX_VN_NUM(bp)              (CHIP_MODE_IS_4_PORT(bp) ? 2 : 4)
+#define BP_L_ID(bp)                    (BP_VN(bp) << 2)
+#define BP_FW_MB_IDX_VN(bp, vn)                (BP_PORT(bp) +\
+         (vn) * ((CHIP_IS_E1x(bp) || (CHIP_MODE_IS_4_PORT(bp))) ? 2  : 1))
+#define BP_FW_MB_IDX(bp)               BP_FW_MB_IDX_VN(bp, BP_VN(bp))
  
         struct net_device       *dev;
         struct pci_dev          *pdev;
@@ -1767,7 +1827,7 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms,
  
  #define MAX_DMAE_C_PER_PORT            8
  #define INIT_DMAE_C(bp)                        (BP_PORT(bp) * MAX_DMAE_C_PER_PORT + \
-                                        BP_E1HVN(bp))
+                                        BP_VN(bp))
  #define PMF_DMAE_C(bp)                 (BP_PORT(bp) * MAX_DMAE_C_PER_PORT + \
                                          E1HVN_MAX)
  
@@ -1793,7 +1853,7 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms,
  
  /* must be used on a CID before placing it on a HW ring */
  #define HW_CID(bp, x)                  ((BP_PORT(bp) << 23) | \
-                                        (BP_E1HVN(bp) << BNX2X_SWCID_SHIFT) | \
+                                        (BP_VN(bp) << BNX2X_SWCID_SHIFT) | \
                                          (x))
  
  #define SP_DESC_CNT            (BCM_PAGE_SIZE / sizeof(struct eth_spe))
diff --git a/drivers/net/bnx2x/bnx2x_cmn.c b/drivers/net/bnx2x/bnx2x_cmn.c

index d724a18b5285dd13e9caffdbd14f907fd81b5d08..c4cbf9736414830cde80218192066d2290b00e47 100644 (file)
--- a/drivers/net/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/bnx2x/bnx2x_cmn.c
@@ -63,8 +63,9 @@ static inline void bnx2x_bz_fp(struct bnx2x *bp, int index)
         fp->disable_tpa = ((bp->flags & TPA_ENABLE_FLAG) == 0);
  
  #ifdef BCM_CNIC
-       /* We don't want TPA on FCoE, FWD and OOO L2 rings */
-       bnx2x_fcoe(bp, disable_tpa) = 1;
+       /* We don't want TPA on an FCoE L2 ring */
+       if (IS_FCOE_FP(fp))
+               fp->disable_tpa = 1;
  #endif
  }
  
@@ -986,8 +987,6 @@ void __bnx2x_link_report(struct bnx2x *bp)
  void bnx2x_init_rx_rings(struct bnx2x *bp)
  {
         int func = BP_FUNC(bp);
-       int max_agg_queues = CHIP_IS_E1(bp) ? ETH_MAX_AGGREGATION_QUEUES_E1 :
-                                             ETH_MAX_AGGREGATION_QUEUES_E1H_E2;
         u16 ring_prod;
         int i, j;
  
@@ -1000,7 +999,7 @@ void bnx2x_init_rx_rings(struct bnx2x *bp)
  
                 if (!fp->disable_tpa) {
                         /* Fill the per-aggregtion pool */
-                       for (i = 0; i < max_agg_queues; i++) {
+                       for (i = 0; i < MAX_AGG_QS(bp); i++) {
                                 struct bnx2x_agg_info *tpa_info =
                                         &fp->tpa_info[i];
                                 struct sw_rx_bd *first_buf =
@@ -1040,7 +1039,7 @@ void bnx2x_init_rx_rings(struct bnx2x *bp)
                                         bnx2x_free_rx_sge_range(bp, fp,
                                                                 ring_prod);
                                         bnx2x_free_tpa_pool(bp, fp,
-                                                           max_agg_queues);
+                                                           MAX_AGG_QS(bp));
                                         fp->disable_tpa = 1;
                                         ring_prod = 0;
                                         break;
@@ -1136,9 +1135,7 @@ static void bnx2x_free_rx_skbs(struct bnx2x *bp)
                 bnx2x_free_rx_bds(fp);
  
                 if (!fp->disable_tpa)
-                       bnx2x_free_tpa_pool(bp, fp, CHIP_IS_E1(bp) ?
-                                           ETH_MAX_AGGREGATION_QUEUES_E1 :
-                                           ETH_MAX_AGGREGATION_QUEUES_E1H_E2);
+                       bnx2x_free_tpa_pool(bp, fp, MAX_AGG_QS(bp));
         }
  }
  
@@ -1404,10 +1401,9 @@ void bnx2x_netif_stop(struct bnx2x *bp, int disable_hw)
  u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb)
  {
         struct bnx2x *bp = netdev_priv(dev);
+
  #ifdef BCM_CNIC
-       if (NO_FCOE(bp))
-               return skb_tx_hash(dev, skb);
-       else {
+       if (!NO_FCOE(bp)) {
                 struct ethhdr *hdr = (struct ethhdr *)skb->data;
                 u16 ether_type = ntohs(hdr->h_proto);
  
@@ -1424,8 +1420,7 @@ u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb)
                         return bnx2x_fcoe_tx(bp, txq_index);
         }
  #endif
-       /* Select a none-FCoE queue:  if FCoE is enabled, exclude FCoE L2 ring
-        */
+       /* select a non-FCoE queue */
         return __skb_tx_hash(dev, skb, BNX2X_NUM_ETH_QUEUES(bp));
  }
  
@@ -1448,6 +1443,28 @@ void bnx2x_set_num_queues(struct bnx2x *bp)
         bp->num_queues += NON_ETH_CONTEXT_USE;
  }
  
+/**
+ * bnx2x_set_real_num_queues - configure netdev->real_num_[tx,rx]_queues
+ *
+ * @bp:                Driver handle
+ *
+ * We currently support for at most 16 Tx queues for each CoS thus we will
+ * allocate a multiple of 16 for ETH L2 rings according to the value of the
+ * bp->max_cos.
+ *
+ * If there is an FCoE L2 queue the appropriate Tx queue will have the next
+ * index after all ETH L2 indices.
+ *
+ * If the actual number of Tx queues (for each CoS) is less than 16 then there
+ * will be the holes at the end of each group of 16 ETh L2 indices (0..15,
+ * 16..31,...) with indicies that are not coupled with any real Tx queue.
+ *
+ * The proper configuration of skb->queue_mapping is handled by
+ * bnx2x_select_queue() and __skb_tx_hash().
+ *
+ * bnx2x_setup_tc() takes care of the proper TC mappings so that __skb_tx_hash()
+ * will return a proper Tx index if TC is enabled (netdev->num_tc > 0).
+ */
  static inline int bnx2x_set_real_num_queues(struct bnx2x *bp)
  {
         int rc, tx, rx;
@@ -3074,15 +3091,20 @@ static int bnx2x_alloc_fp_mem_at(struct bnx2x *bp, int index)
         struct bnx2x_fastpath *fp = &bp->fp[index];
         int ring_size = 0;
         u8 cos;
+       int rx_ring_size = 0;
  
         /* if rx_ring_size specified - use it */
-       int rx_ring_size = bp->rx_ring_size ? bp->rx_ring_size :
-                          MAX_RX_AVAIL/BNX2X_NUM_RX_QUEUES(bp);
+       if (!bp->rx_ring_size) {
  
-       /* allocate at least number of buffers required by FW */
-       rx_ring_size = max_t(int, bp->disable_tpa ? MIN_RX_SIZE_NONTPA :
-                                                   MIN_RX_SIZE_TPA,
-                                 rx_ring_size);
+               rx_ring_size = MAX_RX_AVAIL/BNX2X_NUM_RX_QUEUES(bp);
+
+               /* allocate at least number of buffers required by FW */
+               rx_ring_size = max_t(int, bp->disable_tpa ? MIN_RX_SIZE_NONTPA :
+                                    MIN_RX_SIZE_TPA, rx_ring_size);
+
+               bp->rx_ring_size = rx_ring_size;
+       } else
+               rx_ring_size = bp->rx_ring_size;
  
         /* Common */
         sb = &bnx2x_fp(bp, index, status_blk);
diff --git a/drivers/net/bnx2x/bnx2x_dcb.c b/drivers/net/bnx2x/bnx2x_dcb.c

index a4ea35f6a4568cb1dd018a93ab8097f4d2f2ecec..a1e004a82f7ae92ae8250684352132892a4ced02 100644 (file)
--- a/drivers/net/bnx2x/bnx2x_dcb.c
+++ b/drivers/net/bnx2x/bnx2x_dcb.c
@@ -920,7 +920,7 @@ static void bnx2x_dcbx_admin_mib_updated_params(struct bnx2x *bp,
  
  void bnx2x_dcbx_set_state(struct bnx2x *bp, bool dcb_on, u32 dcbx_enabled)
  {
-       if (!CHIP_IS_E1x(bp)) {
+       if (!CHIP_IS_E1x(bp) && !CHIP_IS_E3(bp)) {
                 bp->dcb_state = dcb_on;
                 bp->dcbx_enabled = dcbx_enabled;
         } else {
diff --git a/drivers/net/bnx2x/bnx2x_ethtool.c b/drivers/net/bnx2x/bnx2x_ethtool.c

index 221863059dae850e96d824e8eb4994ea2a93923d..cf3e47914dd7250063a5245ac2a9779738a3a60d 100644 (file)
--- a/drivers/net/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/bnx2x/bnx2x_ethtool.c
@@ -363,13 +363,50 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
                 }
  
                 /* advertise the requested speed and duplex if supported */
-               cmd->advertising &= bp->port.supported[cfg_idx];
+               if (cmd->advertising & ~(bp->port.supported[cfg_idx])) {
+                       DP(NETIF_MSG_LINK, "Advertisement parameters "
+                                          "are not supported\n");
+                       return -EINVAL;
+               }
  
                 bp->link_params.req_line_speed[cfg_idx] = SPEED_AUTO_NEG;
-               bp->link_params.req_duplex[cfg_idx] = DUPLEX_FULL;
-               bp->port.advertising[cfg_idx] |= (ADVERTISED_Autoneg |
+               bp->link_params.req_duplex[cfg_idx] = cmd->duplex;
+               bp->port.advertising[cfg_idx] = (ADVERTISED_Autoneg |
                                          cmd->advertising);
+               if (cmd->advertising) {
+
+                       bp->link_params.speed_cap_mask[cfg_idx] = 0;
+                       if (cmd->advertising & ADVERTISED_10baseT_Half) {
+                               bp->link_params.speed_cap_mask[cfg_idx] |=
+                               PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_HALF;
+                       }
+                       if (cmd->advertising & ADVERTISED_10baseT_Full)
+                               bp->link_params.speed_cap_mask[cfg_idx] |=
+                               PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_FULL;
  
+                       if (cmd->advertising & ADVERTISED_100baseT_Full)
+                               bp->link_params.speed_cap_mask[cfg_idx] |=
+                               PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_FULL;
+
+                       if (cmd->advertising & ADVERTISED_100baseT_Half) {
+                               bp->link_params.speed_cap_mask[cfg_idx] |=
+                                    PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_HALF;
+                       }
+                       if (cmd->advertising & ADVERTISED_1000baseT_Half) {
+                               bp->link_params.speed_cap_mask[cfg_idx] |=
+                                       PORT_HW_CFG_SPEED_CAPABILITY_D0_1G;
+                       }
+                       if (cmd->advertising & (ADVERTISED_1000baseT_Full |
+                                               ADVERTISED_1000baseKX_Full))
+                               bp->link_params.speed_cap_mask[cfg_idx] |=
+                                       PORT_HW_CFG_SPEED_CAPABILITY_D0_1G;
+
+                       if (cmd->advertising & (ADVERTISED_10000baseT_Full |
+                                               ADVERTISED_10000baseKX4_Full |
+                                               ADVERTISED_10000baseKR_Full))
+                               bp->link_params.speed_cap_mask[cfg_idx] |=
+                                       PORT_HW_CFG_SPEED_CAPABILITY_D0_10G;
+               }
         } else { /* forced speed */
                 /* advertise the requested speed and duplex if supported */
                 switch (speed) {
@@ -1310,10 +1347,7 @@ static void bnx2x_get_ringparam(struct net_device *dev,
         if (bp->rx_ring_size)
                 ering->rx_pending = bp->rx_ring_size;
         else
-               if (bp->state == BNX2X_STATE_OPEN && bp->num_queues)
-                       ering->rx_pending = MAX_RX_AVAIL/bp->num_queues;
-               else
-                       ering->rx_pending = MAX_RX_AVAIL;
+               ering->rx_pending = MAX_RX_AVAIL;
  
         ering->rx_mini_pending = 0;
         ering->rx_jumbo_pending = 0;
diff --git a/drivers/net/bnx2x/bnx2x_link.c b/drivers/net/bnx2x/bnx2x_link.c

index d45b1555a6022a39d10270ed53393755b382af3a..ba15bdc5a1a9fbb6c860acf6d6a272e07382d3b7 100644 (file)
--- a/drivers/net/bnx2x/bnx2x_link.c
+++ b/drivers/net/bnx2x/bnx2x_link.c
@@ -778,9 +778,9 @@ static int bnx2x_ets_e3b0_set_cos_bw(struct bnx2x *bp,
  {
         u32 nig_reg_adress_crd_weight = 0;
         u32 pbf_reg_adress_crd_weight = 0;
-       /* Calculate and set BW for this COS*/
-       const u32 cos_bw_nig = (bw * min_w_val_nig) / total_bw;
-       const u32 cos_bw_pbf = (bw * min_w_val_pbf) / total_bw;
+       /* Calculate and set BW for this COS - use 1 instead of 0 for BW */
+       const u32 cos_bw_nig = ((bw ? bw : 1) * min_w_val_nig) / total_bw;
+       const u32 cos_bw_pbf = ((bw ? bw : 1) * min_w_val_pbf) / total_bw;
  
         switch (cos_entry) {
         case 0:
@@ -852,18 +852,12 @@ static int bnx2x_ets_e3b0_get_total_bw(
         /* Calculate total BW requested */
         for (cos_idx = 0; cos_idx < ets_params->num_of_cos; cos_idx++) {
                 if (bnx2x_cos_state_bw == ets_params->cos[cos_idx].state) {
-
-                       if (0 == ets_params->cos[cos_idx].params.bw_params.bw) {
-                               DP(NETIF_MSG_LINK, "bnx2x_ets_E3B0_config BW"
-                                                  "was set to 0\n");
-                       return -EINVAL;
+                       *total_bw +=
+                               ets_params->cos[cos_idx].params.bw_params.bw;
                 }
-               *total_bw +=
-                   ets_params->cos[cos_idx].params.bw_params.bw;
-           }
         }
  
-       /*Check taotl BW is valid */
+       /* Check total BW is valid */
         if ((100 != *total_bw) || (0 == *total_bw)) {
                 if (0 == *total_bw) {
                         DP(NETIF_MSG_LINK, "bnx2x_ets_E3B0_config toatl BW"
@@ -1726,7 +1720,7 @@ static int bnx2x_xmac_enable(struct link_params *params,
  
         /* Check loopback mode */
         if (lb)
-               val |= XMAC_CTRL_REG_CORE_LOCAL_LPBK;
+               val |= XMAC_CTRL_REG_LINE_LOCAL_LPBK;
         REG_WR(bp, xmac_base + XMAC_REG_CTRL, val);
         bnx2x_set_xumac_nig(params,
                             ((vars->flow_ctrl & BNX2X_FLOW_CTRL_TX) != 0), 1);
@@ -3630,6 +3624,12 @@ static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy,
         bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
                          MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT1, val16);
  
+       /* Advertised and set FEC (Forward Error Correction) */
+       bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD,
+                        MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT2,
+                        (MDIO_WC_REG_AN_IEEE1BLK_AN_ADV2_FEC_ABILITY |
+                         MDIO_WC_REG_AN_IEEE1BLK_AN_ADV2_FEC_REQ));
+
         /* Enable CL37 BAM */
         if (REG_RD(bp, params->shmem_base +
                    offsetof(struct shmem_region, dev_info.
@@ -5924,7 +5924,7 @@ int bnx2x_set_led(struct link_params *params,
                                         (tmp | EMAC_LED_OVERRIDE));
                                 /*
                                  * return here without enabling traffic
-                                * LED blink andsetting rate in ON mode.
+                                * LED blink and setting rate in ON mode.
                                  * In oper mode, enabling LED blink
                                  * and setting rate is needed.
                                  */
@@ -5936,7 +5936,11 @@ int bnx2x_set_led(struct link_params *params,
                          * This is a work-around for HW issue found when link
                          * is up in CL73
                          */
-                       REG_WR(bp, NIG_REG_LED_10G_P0 + port*4, 1);
+                       if ((!CHIP_IS_E3(bp)) ||
+                           (CHIP_IS_E3(bp) &&
+                            mode == LED_MODE_ON))
+                               REG_WR(bp, NIG_REG_LED_10G_P0 + port*4, 1);
+
                         if (CHIP_IS_E1x(bp) ||
                             CHIP_IS_E2(bp) ||
                             (mode == LED_MODE_ON))
@@ -10638,8 +10642,7 @@ static struct bnx2x_phy phy_warpcore = {
         .type           = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT,
         .addr           = 0xff,
         .def_md_devad   = 0,
-       .flags          = (FLAGS_HW_LOCK_REQUIRED |
-                          FLAGS_TX_ERROR_CHECK),
+       .flags          = FLAGS_HW_LOCK_REQUIRED,
         .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
         .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
         .mdio_ctrl      = 0,
@@ -10765,8 +10768,7 @@ static struct bnx2x_phy phy_8706 = {
         .type           = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8706,
         .addr           = 0xff,
         .def_md_devad   = 0,
-       .flags          = (FLAGS_INIT_XGXS_FIRST |
-                          FLAGS_TX_ERROR_CHECK),
+       .flags          = FLAGS_INIT_XGXS_FIRST,
         .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
         .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
         .mdio_ctrl      = 0,
@@ -10797,8 +10799,7 @@ static struct bnx2x_phy phy_8726 = {
         .addr           = 0xff,
         .def_md_devad   = 0,
         .flags          = (FLAGS_HW_LOCK_REQUIRED |
-                          FLAGS_INIT_XGXS_FIRST |
-                          FLAGS_TX_ERROR_CHECK),
+                          FLAGS_INIT_XGXS_FIRST),
         .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
         .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
         .mdio_ctrl      = 0,
@@ -10829,8 +10830,7 @@ static struct bnx2x_phy phy_8727 = {
         .type           = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8727,
         .addr           = 0xff,
         .def_md_devad   = 0,
-       .flags          = (FLAGS_FAN_FAILURE_DET_REQ |
-                          FLAGS_TX_ERROR_CHECK),
+       .flags          = FLAGS_FAN_FAILURE_DET_REQ,
         .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
         .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
         .mdio_ctrl      = 0,
diff --git a/drivers/net/bnx2x/bnx2x_main.c b/drivers/net/bnx2x/bnx2x_main.c

index 15070911154833d4e83582fe68dba67bd9e32f73..c027e9341a1af44965dd37d99236c5c6d0d410b0 100644 (file)
--- a/drivers/net/bnx2x/bnx2x_main.c
+++ b/drivers/net/bnx2x/bnx2x_main.c
@@ -407,8 +407,8 @@ u32 bnx2x_dmae_opcode(struct bnx2x *bp, u8 src_type, u8 dst_type,
         opcode |= (DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET);
  
         opcode |= (BP_PORT(bp) ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0);
-       opcode |= ((BP_E1HVN(bp) << DMAE_CMD_E1HVN_SHIFT) |
-                  (BP_E1HVN(bp) << DMAE_COMMAND_DST_VN_SHIFT));
+       opcode |= ((BP_VN(bp) << DMAE_CMD_E1HVN_SHIFT) |
+                  (BP_VN(bp) << DMAE_COMMAND_DST_VN_SHIFT));
         opcode |= (DMAE_COM_SET_ERR << DMAE_COMMAND_ERR_POLICY_SHIFT);
  
  #ifdef __BIG_ENDIAN
@@ -1419,7 +1419,7 @@ static void bnx2x_hc_int_enable(struct bnx2x *bp)
         if (!CHIP_IS_E1(bp)) {
                 /* init leading/trailing edge */
                 if (IS_MF(bp)) {
-                       val = (0xee0f | (1 << (BP_E1HVN(bp) + 4)));
+                       val = (0xee0f | (1 << (BP_VN(bp) + 4)));
                         if (bp->port.pmf)
                                 /* enable nig and gpio3 attention */
                                 val |= 0x1100;
@@ -1471,7 +1471,7 @@ static void bnx2x_igu_int_enable(struct bnx2x *bp)
  
         /* init leading/trailing edge */
         if (IS_MF(bp)) {
-               val = (0xee0f | (1 << (BP_E1HVN(bp) + 4)));
+               val = (0xee0f | (1 << (BP_VN(bp) + 4)));
                 if (bp->port.pmf)
                         /* enable nig and gpio3 attention */
                         val |= 0x1100;
@@ -2287,7 +2287,7 @@ static void bnx2x_calc_vn_weight_sum(struct bnx2x *bp)
         int vn;
  
         bp->vn_weight_sum = 0;
-       for (vn = VN_0; vn < E1HVN_MAX; vn++) {
+       for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++) {
                 u32 vn_cfg = bp->mf_config[vn];
                 u32 vn_min_rate = ((vn_cfg & FUNC_MF_CFG_MIN_BW_MASK) >>
                                    FUNC_MF_CFG_MIN_BW_SHIFT) * 100;
@@ -2320,12 +2320,18 @@ static void bnx2x_calc_vn_weight_sum(struct bnx2x *bp)
                                         CMNG_FLAGS_PER_PORT_FAIRNESS_VN;
  }
  
+/* returns func by VN for current port */
+static inline int func_by_vn(struct bnx2x *bp, int vn)
+{
+       return 2 * vn + BP_PORT(bp);
+}
+
  static void bnx2x_init_vn_minmax(struct bnx2x *bp, int vn)
  {
         struct rate_shaping_vars_per_vn m_rs_vn;
         struct fairness_vars_per_vn m_fair_vn;
         u32 vn_cfg = bp->mf_config[vn];
-       int func = 2*vn + BP_PORT(bp);
+       int func = func_by_vn(bp, vn);
         u16 vn_min_rate, vn_max_rate;
         int i;
  
@@ -2422,7 +2428,7 @@ void bnx2x_read_mf_cfg(struct bnx2x *bp)
          *
          *      and there are 2 functions per port
          */
-       for (vn = VN_0; vn < E1HVN_MAX; vn++) {
+       for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++) {
                 int /*abs*/func = n * (2 * vn + BP_PORT(bp)) + BP_PATH(bp);
  
                 if (func >= E1H_FUNC_MAX)
@@ -2454,7 +2460,7 @@ static void bnx2x_cmng_fns_init(struct bnx2x *bp, u8 read_cfg, u8 cmng_type)
  
                 /* calculate and set min-max rate for each vn */
                 if (bp->port.pmf)
-                       for (vn = VN_0; vn < E1HVN_MAX; vn++)
+                       for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++)
                                 bnx2x_init_vn_minmax(bp, vn);
  
                 /* always enable rate shaping and fairness */
@@ -2473,16 +2479,15 @@ static void bnx2x_cmng_fns_init(struct bnx2x *bp, u8 read_cfg, u8 cmng_type)
  
  static inline void bnx2x_link_sync_notify(struct bnx2x *bp)
  {
-       int port = BP_PORT(bp);
         int func;
         int vn;
  
         /* Set the attention towards other drivers on the same port */
-       for (vn = VN_0; vn < E1HVN_MAX; vn++) {
-               if (vn == BP_E1HVN(bp))
+       for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++) {
+               if (vn == BP_VN(bp))
                         continue;
  
-               func = ((vn << 1) | port);
+               func = func_by_vn(bp, vn);
                 REG_WR(bp, MISC_REG_AEU_GENERAL_ATTN_0 +
                        (LINK_SYNC_ATTENTION_BIT_FUNC_0 + func)*4, 1);
         }
@@ -2577,7 +2582,7 @@ static void bnx2x_pmf_update(struct bnx2x *bp)
         bnx2x_dcbx_pmf_update(bp);
  
         /* enable nig attention */
-       val = (0xff0f | (1 << (BP_E1HVN(bp) + 4)));
+       val = (0xff0f | (1 << (BP_VN(bp) + 4)));
         if (bp->common.int_block == INT_BLOCK_HC) {
                 REG_WR(bp, HC_REG_TRAILING_EDGE_0 + port*8, val);
                 REG_WR(bp, HC_REG_LEADING_EDGE_0 + port*8, val);
@@ -2756,8 +2761,14 @@ static void bnx2x_pf_rx_q_prep(struct bnx2x *bp,
         u16 tpa_agg_size = 0;
  
         if (!fp->disable_tpa) {
-               pause->sge_th_hi = 250;
-               pause->sge_th_lo = 150;
+               pause->sge_th_lo = SGE_TH_LO(bp);
+               pause->sge_th_hi = SGE_TH_HI(bp);
+
+               /* validate SGE ring has enough to cross high threshold */
+               WARN_ON(bp->dropless_fc &&
+                               pause->sge_th_hi + FW_PREFETCH_CNT >
+                               MAX_RX_SGE_CNT * NUM_RX_SGE_PAGES);
+
                 tpa_agg_size = min_t(u32,
                         (min_t(u32, 8, MAX_SKB_FRAGS) *
                         SGE_PAGE_SIZE * PAGES_PER_SGE), 0xffff);
@@ -2771,10 +2782,21 @@ static void bnx2x_pf_rx_q_prep(struct bnx2x *bp,
  
         /* pause - not for e1 */
         if (!CHIP_IS_E1(bp)) {
-               pause->bd_th_hi = 350;
-               pause->bd_th_lo = 250;
-               pause->rcq_th_hi = 350;
-               pause->rcq_th_lo = 250;
+               pause->bd_th_lo = BD_TH_LO(bp);
+               pause->bd_th_hi = BD_TH_HI(bp);
+
+               pause->rcq_th_lo = RCQ_TH_LO(bp);
+               pause->rcq_th_hi = RCQ_TH_HI(bp);
+               /*
+                * validate that rings have enough entries to cross
+                * high thresholds
+                */
+               WARN_ON(bp->dropless_fc &&
+                               pause->bd_th_hi + FW_PREFETCH_CNT >
+                               bp->rx_ring_size);
+               WARN_ON(bp->dropless_fc &&
+                               pause->rcq_th_hi + FW_PREFETCH_CNT >
+                               NUM_RCQ_RINGS * MAX_RCQ_DESC_CNT);
  
                 pause->pri_map = 1;
         }
@@ -2802,9 +2824,7 @@ static void bnx2x_pf_rx_q_prep(struct bnx2x *bp,
          * For PF Clients it should be the maximum avaliable number.
          * VF driver(s) may want to define it to a smaller value.
          */
-       rxq_init->max_tpa_queues =
-               (CHIP_IS_E1(bp) ? ETH_MAX_AGGREGATION_QUEUES_E1 :
-               ETH_MAX_AGGREGATION_QUEUES_E1H_E2);
+       rxq_init->max_tpa_queues = MAX_AGG_QS(bp);
  
         rxq_init->cache_line_log = BNX2X_RX_ALIGN_SHIFT;
         rxq_init->fw_sb_id = fp->fw_sb_id;
@@ -4808,6 +4828,37 @@ void bnx2x_setup_ndsb_state_machine(struct hc_status_block_sm *hc_sm,
         hc_sm->time_to_expire = 0xFFFFFFFF;
  }
  
+
+/* allocates state machine ids. */
+static inline
+void bnx2x_map_sb_state_machines(struct hc_index_data *index_data)
+{
+       /* zero out state machine indices */
+       /* rx indices */
+       index_data[HC_INDEX_ETH_RX_CQ_CONS].flags &= ~HC_INDEX_DATA_SM_ID;
+
+       /* tx indices */
+       index_data[HC_INDEX_OOO_TX_CQ_CONS].flags &= ~HC_INDEX_DATA_SM_ID;
+       index_data[HC_INDEX_ETH_TX_CQ_CONS_COS0].flags &= ~HC_INDEX_DATA_SM_ID;
+       index_data[HC_INDEX_ETH_TX_CQ_CONS_COS1].flags &= ~HC_INDEX_DATA_SM_ID;
+       index_data[HC_INDEX_ETH_TX_CQ_CONS_COS2].flags &= ~HC_INDEX_DATA_SM_ID;
+
+       /* map indices */
+       /* rx indices */
+       index_data[HC_INDEX_ETH_RX_CQ_CONS].flags |=
+               SM_RX_ID << HC_INDEX_DATA_SM_ID_SHIFT;
+
+       /* tx indices */
+       index_data[HC_INDEX_OOO_TX_CQ_CONS].flags |=
+               SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT;
+       index_data[HC_INDEX_ETH_TX_CQ_CONS_COS0].flags |=
+               SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT;
+       index_data[HC_INDEX_ETH_TX_CQ_CONS_COS1].flags |=
+               SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT;
+       index_data[HC_INDEX_ETH_TX_CQ_CONS_COS2].flags |=
+               SM_TX_ID << HC_INDEX_DATA_SM_ID_SHIFT;
+}
+
  static void bnx2x_init_sb(struct bnx2x *bp, dma_addr_t mapping, int vfid,
                           u8 vf_valid, int fw_sb_id, int igu_sb_id)
  {
@@ -4839,6 +4890,7 @@ static void bnx2x_init_sb(struct bnx2x *bp, dma_addr_t mapping, int vfid,
                 hc_sm_p = sb_data_e2.common.state_machine;
                 sb_data_p = (u32 *)&sb_data_e2;
                 data_size = sizeof(struct hc_status_block_data_e2)/sizeof(u32);
+               bnx2x_map_sb_state_machines(sb_data_e2.index_data);
         } else {
                 memset(&sb_data_e1x, 0,
                        sizeof(struct hc_status_block_data_e1x));
@@ -4853,6 +4905,7 @@ static void bnx2x_init_sb(struct bnx2x *bp, dma_addr_t mapping, int vfid,
                 hc_sm_p = sb_data_e1x.common.state_machine;
                 sb_data_p = (u32 *)&sb_data_e1x;
                 data_size = sizeof(struct hc_status_block_data_e1x)/sizeof(u32);
+               bnx2x_map_sb_state_machines(sb_data_e1x.index_data);
         }
  
         bnx2x_setup_ndsb_state_machine(&hc_sm_p[SM_RX_ID],
@@ -5798,6 +5851,12 @@ static int bnx2x_init_hw_common(struct bnx2x *bp)
  
         DP(BNX2X_MSG_MCP, "starting common init  func %d\n", BP_ABS_FUNC(bp));
  
+       /*
+        * take the UNDI lock to protect undi_unload flow from accessing
+        * registers while we're resetting the chip
+        */
+       bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
+
         bnx2x_reset_common(bp);
         REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, 0xffffffff);
  
@@ -5808,6 +5867,8 @@ static int bnx2x_init_hw_common(struct bnx2x *bp)
         }
         REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET, val);
  
+       bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
+
         bnx2x_init_block(bp, BLOCK_MISC, PHASE_COMMON);
  
         if (!CHIP_IS_E1x(bp)) {
@@ -6663,12 +6724,16 @@ static int bnx2x_init_hw_func(struct bnx2x *bp)
                         if (CHIP_MODE_IS_4_PORT(bp))
                                 dsb_idx = BP_FUNC(bp);
                         else
-                               dsb_idx = BP_E1HVN(bp);
+                               dsb_idx = BP_VN(bp);
  
                         prod_offset = (CHIP_INT_MODE_IS_BC(bp) ?
                                        IGU_BC_BASE_DSB_PROD + dsb_idx :
                                        IGU_NORM_BASE_DSB_PROD + dsb_idx);
  
+                       /*
+                        * igu prods come in chunks of E1HVN_MAX (4) -
+                        * does not matters what is the current chip mode
+                        */
                         for (i = 0; i < (num_segs * E1HVN_MAX);
                              i += E1HVN_MAX) {
                                 addr = IGU_REG_PROD_CONS_MEMORY +
@@ -7562,7 +7627,7 @@ u32 bnx2x_send_unload_req(struct bnx2x *bp, int unload_mode)
                 u32 val;
                 /* The mac address is written to entries 1-4 to
                    preserve entry 0 which is used by the PMF */
-               u8 entry = (BP_E1HVN(bp) + 1)*8;
+               u8 entry = (BP_VN(bp) + 1)*8;
  
                 val = (mac_addr[0] << 8) | mac_addr[1];
                 EMAC_WR(bp, EMAC_REG_EMAC_MAC_MATCH + entry, val);
@@ -8538,10 +8603,12 @@ static void __devinit bnx2x_undi_unload(struct bnx2x *bp)
         /* Check if there is any driver already loaded */
         val = REG_RD(bp, MISC_REG_UNPREPARED);
         if (val == 0x1) {
-               /* Check if it is the UNDI driver
+
+               bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
+               /*
+                * Check if it is the UNDI driver
                  * UNDI driver initializes CID offset for normal bell to 0x7
                  */
-               bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_UNDI);
                 val = REG_RD(bp, DORQ_REG_NORM_CID_OFST);
                 if (val == 0x7) {
                         u32 reset_code = DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS;
@@ -8579,9 +8646,6 @@ static void __devinit bnx2x_undi_unload(struct bnx2x *bp)
                                 bnx2x_fw_command(bp, reset_code, 0);
                         }
  
-                       /* now it's safe to release the lock */
-                       bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_UNDI);
-
                         bnx2x_undi_int_disable(bp);
                         port = BP_PORT(bp);
  
@@ -8631,8 +8695,10 @@ static void __devinit bnx2x_undi_unload(struct bnx2x *bp)
                         bp->fw_seq =
                               (SHMEM_RD(bp, func_mb[bp->pf_num].drv_mb_header) &
                                 DRV_MSG_SEQ_NUMBER_MASK);
-               } else
-                       bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_UNDI);
+               }
+
+               /* now it's safe to release the lock */
+               bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
         }
  }
  
@@ -8769,13 +8835,13 @@ static void __devinit bnx2x_get_common_hwinfo(struct bnx2x *bp)
  static void __devinit bnx2x_get_igu_cam_info(struct bnx2x *bp)
  {
         int pfid = BP_FUNC(bp);
-       int vn = BP_E1HVN(bp);
         int igu_sb_id;
         u32 val;
         u8 fid, igu_sb_cnt = 0;
  
         bp->igu_base_sb = 0xff;
         if (CHIP_INT_MODE_IS_BC(bp)) {
+               int vn = BP_VN(bp);
                 igu_sb_cnt = bp->igu_sb_cnt;
                 bp->igu_base_sb = (CHIP_MODE_IS_4_PORT(bp) ? pfid : vn) *
                         FP_SB_MAX_E1x;
@@ -9408,6 +9474,10 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp)
                 bp->igu_base_sb = 0;
         } else {
                 bp->common.int_block = INT_BLOCK_IGU;
+
+               /* do not allow device reset during IGU info preocessing */
+               bnx2x_acquire_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
+
                 val = REG_RD(bp, IGU_REG_BLOCK_CONFIGURATION);
  
                 if (val & IGU_BLOCK_CONFIGURATION_REG_BACKWARD_COMP_EN) {
@@ -9439,6 +9509,7 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp)
  
                 bnx2x_get_igu_cam_info(bp);
  
+               bnx2x_release_hw_lock(bp, HW_LOCK_RESOURCE_RESET);
         }
  
         /*
@@ -9465,7 +9536,7 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp)
  
         bp->mf_ov = 0;
         bp->mf_mode = 0;
-       vn = BP_E1HVN(bp);
+       vn = BP_VN(bp);
  
         if (!CHIP_IS_E1(bp) && !BP_NOMCP(bp)) {
                 BNX2X_DEV_INFO("shmem2base 0x%x, size %d, mfcfg offset %d\n",
@@ -9585,13 +9656,6 @@ static int __devinit bnx2x_get_hwinfo(struct bnx2x *bp)
         /* port info */
         bnx2x_get_port_hwinfo(bp);
  
-       if (!BP_NOMCP(bp)) {
-               bp->fw_seq =
-                       (SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_mb_header) &
-                        DRV_MSG_SEQ_NUMBER_MASK);
-               BNX2X_DEV_INFO("fw_seq 0x%08x\n", bp->fw_seq);
-       }
-
         /* Get MAC addresses */
         bnx2x_get_mac_hwinfo(bp);
  
@@ -9757,6 +9821,14 @@ static int __devinit bnx2x_init_bp(struct bnx2x *bp)
         if (!BP_NOMCP(bp))
                 bnx2x_undi_unload(bp);
  
+       /* init fw_seq after undi_unload! */
+       if (!BP_NOMCP(bp)) {
+               bp->fw_seq =
+                       (SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_mb_header) &
+                        DRV_MSG_SEQ_NUMBER_MASK);
+               BNX2X_DEV_INFO("fw_seq 0x%08x\n", bp->fw_seq);
+       }
+
         if (CHIP_REV_IS_FPGA(bp))
                 dev_err(&bp->pdev->dev, "FPGA detected\n");
  
@@ -10251,10 +10323,21 @@ static int __devinit bnx2x_init_dev(struct pci_dev *pdev,
         /* clean indirect addresses */
         pci_write_config_dword(bp->pdev, PCICFG_GRC_ADDRESS,
                                PCICFG_VENDOR_ID_OFFSET);
-       REG_WR(bp, PXP2_REG_PGL_ADDR_88_F0 + BP_PORT(bp)*16, 0);
-       REG_WR(bp, PXP2_REG_PGL_ADDR_8C_F0 + BP_PORT(bp)*16, 0);
-       REG_WR(bp, PXP2_REG_PGL_ADDR_90_F0 + BP_PORT(bp)*16, 0);
-       REG_WR(bp, PXP2_REG_PGL_ADDR_94_F0 + BP_PORT(bp)*16, 0);
+       /*
+        * Clean the following indirect addresses for all functions since it
+        * is not used by the driver.
+        */
+       REG_WR(bp, PXP2_REG_PGL_ADDR_88_F0, 0);
+       REG_WR(bp, PXP2_REG_PGL_ADDR_8C_F0, 0);
+       REG_WR(bp, PXP2_REG_PGL_ADDR_90_F0, 0);
+       REG_WR(bp, PXP2_REG_PGL_ADDR_94_F0, 0);
+
+       if (CHIP_IS_E1x(bp)) {
+               REG_WR(bp, PXP2_REG_PGL_ADDR_88_F1, 0);
+               REG_WR(bp, PXP2_REG_PGL_ADDR_8C_F1, 0);
+               REG_WR(bp, PXP2_REG_PGL_ADDR_90_F1, 0);
+               REG_WR(bp, PXP2_REG_PGL_ADDR_94_F1, 0);
+       }
  
         /*
          * Enable internal target-read (in case we are probed after PF FLR).
diff --git a/drivers/net/bnx2x/bnx2x_reg.h b/drivers/net/bnx2x/bnx2x_reg.h

index 27b5ecb11830c383f7b17e1545aa52b9c19600cc..750e8445dac4a0dc2de6e90f344d43f5c83a0b9a 100644 (file)
--- a/drivers/net/bnx2x/bnx2x_reg.h
+++ b/drivers/net/bnx2x/bnx2x_reg.h
@@ -3007,11 +3007,27 @@
  /* [R 6] Debug only: Number of used entries in the data FIFO */
  #define PXP2_REG_HST_DATA_FIFO_STATUS                           0x12047c
  /* [R 7] Debug only: Number of used entries in the header FIFO */
-#define PXP2_REG_HST_HEADER_FIFO_STATUS                         0x120478
-#define PXP2_REG_PGL_ADDR_88_F0                                 0x120534
-#define PXP2_REG_PGL_ADDR_8C_F0                                 0x120538
-#define PXP2_REG_PGL_ADDR_90_F0                                 0x12053c
-#define PXP2_REG_PGL_ADDR_94_F0                                 0x120540
+#define PXP2_REG_HST_HEADER_FIFO_STATUS                                 0x120478
+#define PXP2_REG_PGL_ADDR_88_F0                                         0x120534
+/* [R 32] GRC address for configuration access to PCIE config address 0x88.
+ * any write to this PCIE address will cause a GRC write access to the
+ * address that's in t this register */
+#define PXP2_REG_PGL_ADDR_88_F1                                         0x120544
+#define PXP2_REG_PGL_ADDR_8C_F0                                         0x120538
+/* [R 32] GRC address for configuration access to PCIE config address 0x8c.
+ * any write to this PCIE address will cause a GRC write access to the
+ * address that's in t this register */
+#define PXP2_REG_PGL_ADDR_8C_F1                                         0x120548
+#define PXP2_REG_PGL_ADDR_90_F0                                         0x12053c
+/* [R 32] GRC address for configuration access to PCIE config address 0x90.
+ * any write to this PCIE address will cause a GRC write access to the
+ * address that's in t this register */
+#define PXP2_REG_PGL_ADDR_90_F1                                         0x12054c
+#define PXP2_REG_PGL_ADDR_94_F0                                         0x120540
+/* [R 32] GRC address for configuration access to PCIE config address 0x94.
+ * any write to this PCIE address will cause a GRC write access to the
+ * address that's in t this register */
+#define PXP2_REG_PGL_ADDR_94_F1                                         0x120550
  #define PXP2_REG_PGL_CONTROL0                                   0x120490
  #define PXP2_REG_PGL_CONTROL1                                   0x120514
  #define PXP2_REG_PGL_DEBUG                                      0x120520
@@ -5304,7 +5320,7 @@
  #define XCM_REG_XX_OVFL_EVNT_ID                                 0x20058
  #define XMAC_CLEAR_RX_LSS_STATUS_REG_CLEAR_LOCAL_FAULT_STATUS   (0x1<<0)
  #define XMAC_CLEAR_RX_LSS_STATUS_REG_CLEAR_REMOTE_FAULT_STATUS  (0x1<<1)
-#define XMAC_CTRL_REG_CORE_LOCAL_LPBK                           (0x1<<3)
+#define XMAC_CTRL_REG_LINE_LOCAL_LPBK                           (0x1<<2)
  #define XMAC_CTRL_REG_RX_EN                                     (0x1<<1)
  #define XMAC_CTRL_REG_SOFT_RESET                                (0x1<<6)
  #define XMAC_CTRL_REG_TX_EN                                     (0x1<<0)
@@ -5750,7 +5766,7 @@
  #define HW_LOCK_RESOURCE_RECOVERY_LEADER_0                      8
  #define HW_LOCK_RESOURCE_RECOVERY_LEADER_1                      9
  #define HW_LOCK_RESOURCE_SPIO                                   2
-#define HW_LOCK_RESOURCE_UNDI                                   5
+#define HW_LOCK_RESOURCE_RESET                                  5
  #define AEU_INPUTS_ATTN_BITS_ATC_HW_INTERRUPT                   (0x1<<4)
  #define AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR                   (0x1<<5)
  #define AEU_INPUTS_ATTN_BITS_BRB_PARITY_ERROR                   (0x1<<18)
@@ -6837,6 +6853,9 @@ Theotherbitsarereservedandshouldbezero*/
  #define MDIO_WC_REG_IEEE0BLK_AUTONEGNP                 0x7
  #define MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT0      0x10
  #define MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT1      0x11
+#define MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT2      0x12
+#define MDIO_WC_REG_AN_IEEE1BLK_AN_ADV2_FEC_ABILITY    0x4000
+#define MDIO_WC_REG_AN_IEEE1BLK_AN_ADV2_FEC_REQ                0x8000
  #define MDIO_WC_REG_PMD_IEEE9BLK_TENGBASE_KR_PMD_CONTROL_REGISTER_150  0x96
  #define MDIO_WC_REG_XGXSBLK0_XGXSCONTROL               0x8000
  #define MDIO_WC_REG_XGXSBLK0_MISCCONTROL1              0x800e
diff --git a/drivers/net/bnx2x/bnx2x_stats.c b/drivers/net/bnx2x/bnx2x_stats.c

index 771f6803b23856f7ffe928a4d23545d6ac4be896..9908f2bbcf734b4eaf662f68f420b7741717434f 100644 (file)
--- a/drivers/net/bnx2x/bnx2x_stats.c
+++ b/drivers/net/bnx2x/bnx2x_stats.c
@@ -710,7 +710,8 @@ static int bnx2x_hw_stats_update(struct bnx2x *bp)
                 break;
  
         case MAC_TYPE_NONE: /* unreached */
-               BNX2X_ERR("stats updated by DMAE but no MAC active\n");
+               DP(BNX2X_MSG_STATS,
+                  "stats updated by DMAE but no MAC active\n");
                 return -1;
  
         default: /* unreached */
@@ -1391,7 +1392,7 @@ static void bnx2x_port_stats_base_init(struct bnx2x *bp)
  
  static void bnx2x_func_stats_base_init(struct bnx2x *bp)
  {
-       int vn, vn_max = IS_MF(bp) ? E1HVN_MAX : E1VN_MAX;
+       int vn, vn_max = IS_MF(bp) ? BP_MAX_VN_NUM(bp) : E1VN_MAX;
         u32 func_stx;
  
         /* sanity */
@@ -1404,7 +1405,7 @@ static void bnx2x_func_stats_base_init(struct bnx2x *bp)
         func_stx = bp->func_stx;
  
         for (vn = VN_0; vn < vn_max; vn++) {
-               int mb_idx = CHIP_IS_E1x(bp) ? 2*vn + BP_PORT(bp) : vn;
+               int mb_idx = BP_FW_MB_IDX_VN(bp, vn);
  
                 bp->func_stx = SHMEM_RD(bp, func_mb[mb_idx].fw_mb_param);
                 bnx2x_func_stats_init(bp);
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c

index 38a83acd502e6395f06257cff0fa39cde757fa40..43f2ea5410884ae34e5dd8db0fa2a25caea6d81c 100644 (file)
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3419,9 +3419,27 @@ static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
  static int bond_open(struct net_device *bond_dev)
  {
         struct bonding *bond = netdev_priv(bond_dev);
+       struct slave *slave;
+       int i;
  
         bond->kill_timers = 0;
  
+       /* reset slave->backup and slave->inactive */
+       read_lock(&bond->lock);
+       if (bond->slave_cnt > 0) {
+               read_lock(&bond->curr_slave_lock);
+               bond_for_each_slave(bond, slave, i) {
+                       if ((bond->params.mode == BOND_MODE_ACTIVEBACKUP)
+                               && (slave != bond->curr_active_slave)) {
+                               bond_set_slave_inactive_flags(slave);
+                       } else {
+                               bond_set_slave_active_flags(slave);
+                       }
+               }
+               read_unlock(&bond->curr_slave_lock);
+       }
+       read_unlock(&bond->lock);
+
         INIT_DELAYED_WORK(&bond->mcast_work, bond_resend_igmp_join_requests_delayed);
  
         if (bond_is_lb(bond)) {
diff --git a/drivers/net/can/sja1000/plx_pci.c b/drivers/net/can/sja1000/plx_pci.c

index 231385b8e08faa38bfab0850080109d256cf6c2d..c7f3d4ea11672761cf7c9241db738e463cbb00ca 100644 (file)
--- a/drivers/net/can/sja1000/plx_pci.c
+++ b/drivers/net/can/sja1000/plx_pci.c
@@ -408,7 +408,7 @@ static void plx_pci_del_card(struct pci_dev *pdev)
         struct sja1000_priv *priv;
         int i = 0;
  
-       for (i = 0; i < card->channels; i++) {
+       for (i = 0; i < PLX_PCI_MAX_CHAN; i++) {
                 dev = card->net_dev[i];
                 if (!dev)
                         continue;
@@ -536,7 +536,6 @@ static int __devinit plx_pci_add_card(struct pci_dev *pdev,
                         if (err) {
                                 dev_err(&pdev->dev, "Registering device failed "
                                         "(err=%d)\n", err);
-                               free_sja1000dev(dev);
                                 goto failure_cleanup;
                         }
  
@@ -549,6 +548,7 @@ static int __devinit plx_pci_add_card(struct pci_dev *pdev,
                         dev_err(&pdev->dev, "Channel #%d not detected\n",
                                 i + 1);
                         free_sja1000dev(dev);
+                       card->net_dev[i] = NULL;
                 }
         }
  
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c

index f523f1cc5142f05e29a73aaeb2e91b5125627d1b..4b70b7e8bdeb7690a12dd3e9bb9f2a172767f088 100644 (file)
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -197,7 +197,7 @@ static void slc_bump(struct slcan *sl)
         skb->ip_summed = CHECKSUM_UNNECESSARY;
         memcpy(skb_put(skb, sizeof(struct can_frame)),
                &cf, sizeof(struct can_frame));
-       netif_rx(skb);
+       netif_rx_ni(skb);
  
         sl->dev->stats.rx_packets++;
         sl->dev->stats.rx_bytes += cf.can_dlc;
diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c

index f7bbde9eb2cba34db7c27207648fc000320dc690..2adc294f512a8c0add1ba99c49660b0a48a5bd7d 100644 (file)
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -46,6 +46,7 @@
  #include <linux/skbuff.h>
  #include <linux/platform_device.h>
  #include <linux/clk.h>
+#include <linux/io.h>
  
  #include <linux/can/dev.h>
  #include <linux/can/error.h>
@@ -503,9 +504,9 @@ static netdev_tx_t ti_hecc_xmit(struct sk_buff *skb, struct net_device *ndev)
         spin_unlock_irqrestore(&priv->mbx_lock, flags);
  
         /* Prepare mailbox for transmission */
+       data = cf->can_dlc | (get_tx_head_prio(priv) << 8);
         if (cf->can_id & CAN_RTR_FLAG) /* Remote transmission request */
                 data |= HECC_CANMCF_RTR;
-       data |= get_tx_head_prio(priv) << 8;
         hecc_write_mbx(priv, mbxno, HECC_CANMCF, data);
  
         if (cf->can_id & CAN_EFF_FLAG) /* Extended frame format */
@@ -923,6 +924,7 @@ static int ti_hecc_probe(struct platform_device *pdev)
         priv->can.do_get_state = ti_hecc_get_state;
         priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES;
  
+       spin_lock_init(&priv->mbx_lock);
         ndev->irq = irq->start;
         ndev->flags |= IFF_ECHO;
         platform_set_drvdata(pdev, ndev);
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c

index 646c86bcc545e08835cc07b07ca913b1ba64dbcc..fdb7a175640981186b5e891dc7f7d34fedddb223 100644 (file)
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -2452,14 +2452,13 @@ static irqreturn_t cas_interruptN(int irq, void *dev_id)
         struct net_device *dev = dev_id;
         struct cas *cp = netdev_priv(dev);
         unsigned long flags;
-       int ring;
+       int ring = (irq == cp->pci_irq_INTC) ? 2 : 3;
         u32 status = readl(cp->regs + REG_PLUS_INTRN_STATUS(ring));
  
         /* check for shared irq */
         if (status == 0)
                 return IRQ_NONE;
  
-       ring = (irq == cp->pci_irq_INTC) ? 2 : 3;
         spin_lock_irqsave(&cp->lock, flags);
         if (status & INTR_RX_DONE_ALT) { /* handle rx separately */
  #ifdef USE_NAPI
diff --git a/drivers/net/e1000/e1000_hw.c b/drivers/net/e1000/e1000_hw.c

index 8545c7aa93eb674639b6d68dff2e77cb9a7e7d75..a5a89ecb6f3654c730d834a99f090248a6053b21 100644 (file)
--- a/drivers/net/e1000/e1000_hw.c
+++ b/drivers/net/e1000/e1000_hw.c
@@ -4026,6 +4026,12 @@ s32 e1000_validate_eeprom_checksum(struct e1000_hw *hw)
                 checksum += eeprom_data;
         }
  
+#ifdef CONFIG_PARISC
+       /* This is a signature and not a checksum on HP c8000 */
+       if ((hw->subsystem_vendor_id == 0x103C) && (eeprom_data == 0x16d6))
+               return E1000_SUCCESS;
+
+#endif
         if (checksum == (u16) EEPROM_SUM)
                 return E1000_SUCCESS;
         else {
diff --git a/drivers/net/e1000e/82571.c b/drivers/net/e1000e/82571.c

index 480f2592f8a513584afb2b68e973bbc39e326fd4..536b3a55c45f9ca7cf2bc55171477a26477187c4 100644 (file)
--- a/drivers/net/e1000e/82571.c
+++ b/drivers/net/e1000e/82571.c
@@ -2085,7 +2085,8 @@ struct e1000_info e1000_82574_info = {
                                   | FLAG_HAS_AMT
                                   | FLAG_HAS_CTRLEXT_ON_LOAD,
         .flags2                   = FLAG2_CHECK_PHY_HANG
-                                 | FLAG2_DISABLE_ASPM_L0S,
+                                 | FLAG2_DISABLE_ASPM_L0S
+                                 | FLAG2_NO_DISABLE_RX,
         .pba                    = 32,
         .max_hw_frame_size      = DEFAULT_JUMBO,
         .get_variants           = e1000_get_variants_82571,
@@ -2104,7 +2105,8 @@ struct e1000_info e1000_82583_info = {
                                   | FLAG_HAS_AMT
                                   | FLAG_HAS_JUMBO_FRAMES
                                   | FLAG_HAS_CTRLEXT_ON_LOAD,
-       .flags2                 = FLAG2_DISABLE_ASPM_L0S,
+       .flags2                 = FLAG2_DISABLE_ASPM_L0S
+                                 | FLAG2_NO_DISABLE_RX,
         .pba                    = 32,
         .max_hw_frame_size      = DEFAULT_JUMBO,
         .get_variants           = e1000_get_variants_82571,
diff --git a/drivers/net/e1000e/e1000.h b/drivers/net/e1000e/e1000.h

index 638d175792cf368c71aa4a7c55563330734478e7..8533ad7f3559030444d962d82ce3e1f43439e48e 100644 (file)
--- a/drivers/net/e1000e/e1000.h
+++ b/drivers/net/e1000e/e1000.h
@@ -155,6 +155,9 @@ struct e1000_info;
  #define HV_M_STATUS_SPEED_1000            0x0200
  #define HV_M_STATUS_LINK_UP               0x0040
  
+#define E1000_ICH_FWSM_PCIM2PCI                0x01000000 /* ME PCIm-to-PCI active */
+#define E1000_ICH_FWSM_PCIM2PCI_COUNT  2000
+
  /* Time to wait before putting the device into D3 if there's no link (in ms). */
  #define LINK_TIMEOUT           100
  
@@ -453,6 +456,8 @@ struct e1000_info {
  #define FLAG2_DISABLE_ASPM_L0S            (1 << 7)
  #define FLAG2_DISABLE_AIM                 (1 << 8)
  #define FLAG2_CHECK_PHY_HANG              (1 << 9)
+#define FLAG2_NO_DISABLE_RX               (1 << 10)
+#define FLAG2_PCIM2PCI_ARBITER_WA         (1 << 11)
  
  #define E1000_RX_DESC_PS(R, i)     \
         (&(((union e1000_rx_desc_packet_split *)((R).desc))[i]))
diff --git a/drivers/net/e1000e/ethtool.c b/drivers/net/e1000e/ethtool.c

index 06d88f316dce4de827975cc5abdfbf4d2700eb41..6a0526a59a8a38ce00e8c0aaeb9795db02d11d97 100644 (file)
--- a/drivers/net/e1000e/ethtool.c
+++ b/drivers/net/e1000e/ethtool.c
@@ -1206,7 +1206,8 @@ static int e1000_setup_desc_rings(struct e1000_adapter *adapter)
         rx_ring->next_to_clean = 0;
  
         rctl = er32(RCTL);
-       ew32(RCTL, rctl & ~E1000_RCTL_EN);
+       if (!(adapter->flags2 & FLAG2_NO_DISABLE_RX))
+               ew32(RCTL, rctl & ~E1000_RCTL_EN);
         ew32(RDBAL, ((u64) rx_ring->dma & 0xFFFFFFFF));
         ew32(RDBAH, ((u64) rx_ring->dma >> 32));
         ew32(RDLEN, rx_ring->size);
diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c

index 4e36978b8fd8f11c2042d611165a28b119111ad1..54add27c8f760ae871858f837d6d956ee2387f03 100644 (file)
--- a/drivers/net/e1000e/ich8lan.c
+++ b/drivers/net/e1000e/ich8lan.c
@@ -137,8 +137,9 @@
  #define HV_PM_CTRL             PHY_REG(770, 17)
  
  /* PHY Low Power Idle Control */
-#define I82579_LPI_CTRL                        PHY_REG(772, 20)
-#define I82579_LPI_CTRL_ENABLE_MASK    0x6000
+#define I82579_LPI_CTRL                                PHY_REG(772, 20)
+#define I82579_LPI_CTRL_ENABLE_MASK            0x6000
+#define I82579_LPI_CTRL_FORCE_PLL_LOCK_COUNT   0x80
  
  /* EMI Registers */
  #define I82579_EMI_ADDR         0x10
@@ -163,6 +164,11 @@
  #define HV_KMRN_MODE_CTRL      PHY_REG(769, 16)
  #define HV_KMRN_MDIO_SLOW      0x0400
  
+/* KMRN FIFO Control and Status */
+#define HV_KMRN_FIFO_CTRLSTA                  PHY_REG(770, 16)
+#define HV_KMRN_FIFO_CTRLSTA_PREAMBLE_MASK    0x7000
+#define HV_KMRN_FIFO_CTRLSTA_PREAMBLE_SHIFT   12
+
  /* ICH GbE Flash Hardware Sequencing Flash Status Register bit breakdown */
  /* Offset 04h HSFSTS */
  union ich8_hws_flash_status {
@@ -657,6 +663,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
         struct e1000_mac_info *mac = &hw->mac;
         s32 ret_val;
         bool link;
+       u16 phy_reg;
  
         /*
          * We only want to go out to the PHY registers to see if Auto-Neg
@@ -689,16 +696,35 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
  
         mac->get_link_status = false;
  
-       if (hw->phy.type == e1000_phy_82578) {
-               ret_val = e1000_link_stall_workaround_hv(hw);
-               if (ret_val)
-                       goto out;
-       }
-
-       if (hw->mac.type == e1000_pch2lan) {
+       switch (hw->mac.type) {
+       case e1000_pch2lan:
                 ret_val = e1000_k1_workaround_lv(hw);
                 if (ret_val)
                         goto out;
+               /* fall-thru */
+       case e1000_pchlan:
+               if (hw->phy.type == e1000_phy_82578) {
+                       ret_val = e1000_link_stall_workaround_hv(hw);
+                       if (ret_val)
+                               goto out;
+               }
+
+               /*
+                * Workaround for PCHx parts in half-duplex:
+                * Set the number of preambles removed from the packet
+                * when it is passed from the PHY to the MAC to prevent
+                * the MAC from misinterpreting the packet type.
+                */
+               e1e_rphy(hw, HV_KMRN_FIFO_CTRLSTA, &phy_reg);
+               phy_reg &= ~HV_KMRN_FIFO_CTRLSTA_PREAMBLE_MASK;
+
+               if ((er32(STATUS) & E1000_STATUS_FD) != E1000_STATUS_FD)
+                       phy_reg |= (1 << HV_KMRN_FIFO_CTRLSTA_PREAMBLE_SHIFT);
+
+               e1e_wphy(hw, HV_KMRN_FIFO_CTRLSTA, phy_reg);
+               break;
+       default:
+               break;
         }
  
         /*
@@ -788,6 +814,11 @@ static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
             (adapter->hw.phy.type == e1000_phy_igp_3))
                 adapter->flags |= FLAG_LSC_GIG_SPEED_DROP;
  
+       /* Enable workaround for 82579 w/ ME enabled */
+       if ((adapter->hw.mac.type == e1000_pch2lan) &&
+           (er32(FWSM) & E1000_ICH_FWSM_FW_VALID))
+               adapter->flags2 |= FLAG2_PCIM2PCI_ARBITER_WA;
+
         /* Disable EEE by default until IEEE802.3az spec is finalized */
         if (adapter->flags2 & FLAG2_HAS_EEE)
                 adapter->hw.dev_spec.ich8lan.eee_disable = true;
@@ -1355,7 +1386,7 @@ static s32 e1000_hv_phy_workarounds_ich8lan(struct e1000_hw *hw)
                         return ret_val;
  
                 /* Preamble tuning for SSC */
-               ret_val = e1e_wphy(hw, PHY_REG(770, 16), 0xA204);
+               ret_val = e1e_wphy(hw, HV_KMRN_FIFO_CTRLSTA, 0xA204);
                 if (ret_val)
                         return ret_val;
         }
@@ -1645,6 +1676,7 @@ static s32 e1000_k1_workaround_lv(struct e1000_hw *hw)
         s32 ret_val = 0;
         u16 status_reg = 0;
         u32 mac_reg;
+       u16 phy_reg;
  
         if (hw->mac.type != e1000_pch2lan)
                 goto out;
@@ -1659,12 +1691,19 @@ static s32 e1000_k1_workaround_lv(struct e1000_hw *hw)
                 mac_reg = er32(FEXTNVM4);
                 mac_reg &= ~E1000_FEXTNVM4_BEACON_DURATION_MASK;
  
-               if (status_reg & HV_M_STATUS_SPEED_1000)
+               ret_val = e1e_rphy(hw, I82579_LPI_CTRL, &phy_reg);
+               if (ret_val)
+                       goto out;
+
+               if (status_reg & HV_M_STATUS_SPEED_1000) {
                         mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_8USEC;
-               else
+                       phy_reg &= ~I82579_LPI_CTRL_FORCE_PLL_LOCK_COUNT;
+               } else {
                         mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_16USEC;
-
+                       phy_reg |= I82579_LPI_CTRL_FORCE_PLL_LOCK_COUNT;
+               }
                 ew32(FEXTNVM4, mac_reg);
+               ret_val = e1e_wphy(hw, I82579_LPI_CTRL, phy_reg);
         }
  
  out:
diff --git a/drivers/net/e1000e/lib.c b/drivers/net/e1000e/lib.c

index 7898a67d6505478036e19e790ebeb1a1f2295445..0893ab107adf320a5b07293fb64596e6e2794315 100644 (file)
--- a/drivers/net/e1000e/lib.c
+++ b/drivers/net/e1000e/lib.c
@@ -190,7 +190,8 @@ s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw)
         /* Check for LOM (vs. NIC) or one of two valid mezzanine cards */
         if (!((nvm_data & NVM_COMPAT_LOM) ||
               (hw->adapter->pdev->device == E1000_DEV_ID_82571EB_SERDES_DUAL) ||
-             (hw->adapter->pdev->device == E1000_DEV_ID_82571EB_SERDES_QUAD)))
+             (hw->adapter->pdev->device == E1000_DEV_ID_82571EB_SERDES_QUAD) ||
+             (hw->adapter->pdev->device == E1000_DEV_ID_82571EB_SERDES)))
                 goto out;
  
         ret_val = e1000_read_nvm(hw, NVM_ALT_MAC_ADDR_PTR, 1,
@@ -200,10 +201,10 @@ s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw)
                 goto out;
         }
  
-       if (nvm_alt_mac_addr_offset == 0xFFFF) {
+       if ((nvm_alt_mac_addr_offset == 0xFFFF) ||
+           (nvm_alt_mac_addr_offset == 0x0000))
                 /* There is no Alternate MAC Address */
                 goto out;
-       }
  
         if (hw->bus.func == E1000_FUNC_1)
                 nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN1;
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c

index ab4be80f7ab53bd01a28dd911a0dbabd26c27235..2198e615f241c5fc7e640f7c7da078be16ece7f3 100644 (file)
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -56,7 +56,7 @@
  
  #define DRV_EXTRAVERSION "-k"
  
-#define DRV_VERSION "1.3.16" DRV_EXTRAVERSION
+#define DRV_VERSION "1.4.4" DRV_EXTRAVERSION
  char e1000e_driver_name[] = "e1000e";
  const char e1000e_driver_version[] = DRV_VERSION;
  
@@ -518,6 +518,63 @@ static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err,
         adapter->hw_csum_good++;
  }
  
+/**
+ * e1000e_update_tail_wa - helper function for e1000e_update_[rt]dt_wa()
+ * @hw: pointer to the HW structure
+ * @tail: address of tail descriptor register
+ * @i: value to write to tail descriptor register
+ *
+ * When updating the tail register, the ME could be accessing Host CSR
+ * registers at the same time.  Normally, this is handled in h/w by an
+ * arbiter but on some parts there is a bug that acknowledges Host accesses
+ * later than it should which could result in the descriptor register to
+ * have an incorrect value.  Workaround this by checking the FWSM register
+ * which has bit 24 set while ME is accessing Host CSR registers, wait
+ * if it is set and try again a number of times.
+ **/
+static inline s32 e1000e_update_tail_wa(struct e1000_hw *hw, u8 __iomem * tail,
+                                       unsigned int i)
+{
+       unsigned int j = 0;
+
+       while ((j++ < E1000_ICH_FWSM_PCIM2PCI_COUNT) &&
+              (er32(FWSM) & E1000_ICH_FWSM_PCIM2PCI))
+               udelay(50);
+
+       writel(i, tail);
+
+       if ((j == E1000_ICH_FWSM_PCIM2PCI_COUNT) && (i != readl(tail)))
+               return E1000_ERR_SWFW_SYNC;
+
+       return 0;
+}
+
+static void e1000e_update_rdt_wa(struct e1000_adapter *adapter, unsigned int i)
+{
+       u8 __iomem *tail = (adapter->hw.hw_addr + adapter->rx_ring->tail);
+       struct e1000_hw *hw = &adapter->hw;
+
+       if (e1000e_update_tail_wa(hw, tail, i)) {
+               u32 rctl = er32(RCTL);
+               ew32(RCTL, rctl & ~E1000_RCTL_EN);
+               e_err("ME firmware caused invalid RDT - resetting\n");
+               schedule_work(&adapter->reset_task);
+       }
+}
+
+static void e1000e_update_tdt_wa(struct e1000_adapter *adapter, unsigned int i)
+{
+       u8 __iomem *tail = (adapter->hw.hw_addr + adapter->tx_ring->tail);
+       struct e1000_hw *hw = &adapter->hw;
+
+       if (e1000e_update_tail_wa(hw, tail, i)) {
+               u32 tctl = er32(TCTL);
+               ew32(TCTL, tctl & ~E1000_TCTL_EN);
+               e_err("ME firmware caused invalid TDT - resetting\n");
+               schedule_work(&adapter->reset_task);
+       }
+}
+
  /**
   * e1000_alloc_rx_buffers - Replace used receive buffers; legacy & extended
   * @adapter: address of board private structure
@@ -573,7 +630,10 @@ map_skb:
                          * such as IA-64).
                          */
                         wmb();
-                       writel(i, adapter->hw.hw_addr + rx_ring->tail);
+                       if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+                               e1000e_update_rdt_wa(adapter, i);
+                       else
+                               writel(i, adapter->hw.hw_addr + rx_ring->tail);
                 }
                 i++;
                 if (i == rx_ring->count)
@@ -673,7 +733,11 @@ static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
                          * such as IA-64).
                          */
                         wmb();
-                       writel(i << 1, adapter->hw.hw_addr + rx_ring->tail);
+                       if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+                               e1000e_update_rdt_wa(adapter, i << 1);
+                       else
+                               writel(i << 1,
+                                      adapter->hw.hw_addr + rx_ring->tail);
                 }
  
                 i++;
@@ -756,7 +820,10 @@ check_page:
                  * applicable for weak-ordered memory model archs,
                  * such as IA-64). */
                 wmb();
-               writel(i, adapter->hw.hw_addr + rx_ring->tail);
+               if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+                       e1000e_update_rdt_wa(adapter, i);
+               else
+                       writel(i, adapter->hw.hw_addr + rx_ring->tail);
         }
  }
  
@@ -2915,7 +2982,8 @@ static void e1000_configure_rx(struct e1000_adapter *adapter)
  
         /* disable receives while setting up the descriptors */
         rctl = er32(RCTL);
-       ew32(RCTL, rctl & ~E1000_RCTL_EN);
+       if (!(adapter->flags2 & FLAG2_NO_DISABLE_RX))
+               ew32(RCTL, rctl & ~E1000_RCTL_EN);
         e1e_flush();
         usleep_range(10000, 20000);
  
@@ -3394,7 +3462,8 @@ void e1000e_down(struct e1000_adapter *adapter)
  
         /* disable receives in the hardware */
         rctl = er32(RCTL);
-       ew32(RCTL, rctl & ~E1000_RCTL_EN);
+       if (!(adapter->flags2 & FLAG2_NO_DISABLE_RX))
+               ew32(RCTL, rctl & ~E1000_RCTL_EN);
         /* flush and sleep below */
  
         netif_stop_queue(netdev);
@@ -3403,6 +3472,7 @@ void e1000e_down(struct e1000_adapter *adapter)
         tctl = er32(TCTL);
         tctl &= ~E1000_TCTL_EN;
         ew32(TCTL, tctl);
+
         /* flush both disables and wait for them to finish */
         e1e_flush();
         usleep_range(10000, 20000);
@@ -4686,7 +4756,12 @@ static void e1000_tx_queue(struct e1000_adapter *adapter,
         wmb();
  
         tx_ring->next_to_use = i;
-       writel(i, adapter->hw.hw_addr + tx_ring->tail);
+
+       if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+               e1000e_update_tdt_wa(adapter, i);
+       else
+               writel(i, adapter->hw.hw_addr + tx_ring->tail);
+
         /*
          * we need this if more than one processor can write to our tail
          * at a time, it synchronizes IO on IA64/Altix systems
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c

index e55df308a3af219a54beaadc7dfdcd42c9508de8..6d5fbd4d4256ba527ea295030e91c6dd5b7373f6 100644 (file)
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -5615,7 +5615,8 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
                 goto out_error;
         }
  
-       nv_vlan_mode(dev, dev->features);
+       if (id->driver_data & DEV_HAS_VLAN)
+               nv_vlan_mode(dev, dev->features);
  
         netif_carrier_off(dev);
  
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c

index 2659daad783ddb857f5e9597bbb2a2ea95c05f04..31d5c574e5a9aaf923d23c124186dbdfc7defa70 100644 (file)
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -2710,8 +2710,13 @@ static int gfar_process_frame(struct net_device *dev, struct sk_buff *skb,
         /* Tell the skb what kind of packet this is */
         skb->protocol = eth_type_trans(skb, dev);
  
-       /* Set vlan tag */
-       if (fcb->flags & RXFCB_VLN)
+       /*
+        * There's need to check for NETIF_F_HW_VLAN_RX here.
+        * Even if vlan rx accel is disabled, on some chips
+        * RXFCB_VLN is pseudo randomly set.
+        */
+       if (dev->features & NETIF_F_HW_VLAN_RX &&
+           fcb->flags & RXFCB_VLN)
                 __vlan_hwaccel_put_tag(skb, fcb->vlctl);
  
         /* Send the packet up the stack */
diff --git a/drivers/net/gianfar_ethtool.c b/drivers/net/gianfar_ethtool.c

index 6e350692d1184147b0989e19d8be3b8436938d29..25a8c2adb001892acc1c2531de8ed8b62b30d8e0 100644 (file)
--- a/drivers/net/gianfar_ethtool.c
+++ b/drivers/net/gianfar_ethtool.c
@@ -686,10 +686,21 @@ static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, u
  {
         unsigned int last_rule_idx = priv->cur_filer_idx;
         unsigned int cmp_rqfpr;
-       unsigned int local_rqfpr[MAX_FILER_IDX + 1];
-       unsigned int local_rqfcr[MAX_FILER_IDX + 1];
+       unsigned int *local_rqfpr;
+       unsigned int *local_rqfcr;
         int i = 0x0, k = 0x0;
         int j = MAX_FILER_IDX, l = 0x0;
+       int ret = 1;
+
+       local_rqfpr = kmalloc(sizeof(unsigned int) * (MAX_FILER_IDX + 1),
+               GFP_KERNEL);
+       local_rqfcr = kmalloc(sizeof(unsigned int) * (MAX_FILER_IDX + 1),
+               GFP_KERNEL);
+       if (!local_rqfpr || !local_rqfcr) {
+               pr_err("Out of memory\n");
+               ret = 0;
+               goto err;
+       }
  
         switch (class) {
         case TCP_V4_FLOW:
@@ -706,7 +717,8 @@ static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, u
                 break;
         default:
                 pr_err("Right now this class is not supported\n");
-               return 0;
+               ret = 0;
+               goto err;
         }
  
         for (i = 0; i < MAX_FILER_IDX + 1; i++) {
@@ -721,7 +733,8 @@ static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, u
  
         if (i == MAX_FILER_IDX + 1) {
                 pr_err("No parse rule found, can't create hash rules\n");
-               return 0;
+               ret = 0;
+               goto err;
         }
  
         /* If a match was found, then it begins the starting of a cluster rule
@@ -765,7 +778,10 @@ static int gfar_ethflow_to_filer_table(struct gfar_private *priv, u64 ethflow, u
                 priv->cur_filer_idx = priv->cur_filer_idx - 1;
         }
  
-       return 1;
+err:
+       kfree(local_rqfcr);
+       kfree(local_rqfpr);
+       return ret;
  }
  
  static int gfar_set_hash_opts(struct gfar_private *priv, struct ethtool_rxnfc *cmd)
diff --git a/drivers/net/gianfar_ptp.c b/drivers/net/gianfar_ptp.c

index 1c97861596f03cbcdcfa1aa30daac23360f90f4e..f67b8aebc89c3d71fe30c41bd1dff1265a7c1fb1 100644 (file)
--- a/drivers/net/gianfar_ptp.c
+++ b/drivers/net/gianfar_ptp.c
@@ -193,14 +193,9 @@ static void set_alarm(struct etsects *etsects)
  /* Caller must hold etsects->lock. */
  static void set_fipers(struct etsects *etsects)
  {
-       u32 tmr_ctrl = gfar_read(&etsects->regs->tmr_ctrl);
-
-       gfar_write(&etsects->regs->tmr_ctrl,   tmr_ctrl & (~TE));
-       gfar_write(&etsects->regs->tmr_prsc,   etsects->tmr_prsc);
+       set_alarm(etsects);
         gfar_write(&etsects->regs->tmr_fiper1, etsects->tmr_fiper1);
         gfar_write(&etsects->regs->tmr_fiper2, etsects->tmr_fiper2);
-       set_alarm(etsects);
-       gfar_write(&etsects->regs->tmr_ctrl,   tmr_ctrl|TE);
  }
  
  /*
@@ -511,7 +506,7 @@ static int gianfar_ptp_probe(struct platform_device *dev)
         gfar_write(&etsects->regs->tmr_fiper1, etsects->tmr_fiper1);
         gfar_write(&etsects->regs->tmr_fiper2, etsects->tmr_fiper2);
         set_alarm(etsects);
-       gfar_write(&etsects->regs->tmr_ctrl,   tmr_ctrl|FS|RTPE|TE);
+       gfar_write(&etsects->regs->tmr_ctrl,   tmr_ctrl|FS|RTPE|TE|FRD);
  
         spin_unlock_irqrestore(&etsects->lock, flags);
  
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c

index ba99af05bf62744148a4d33d008c04ddece97a98..8dd5fccef7252933429d474418fdd712d841dac7 100644 (file)
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -395,7 +395,7 @@ static inline struct sk_buff *ibmveth_rxq_get_buffer(struct ibmveth_adapter *ada
  }
  
  /* recycle the current buffer on the rx queue */
-static void ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter)
+static int ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter)
  {
         u32 q_index = adapter->rx_queue.index;
         u64 correlator = adapter->rx_queue.queue_addr[q_index].correlator;
@@ -403,6 +403,7 @@ static void ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter)
         unsigned int index = correlator & 0xffffffffUL;
         union ibmveth_buf_desc desc;
         unsigned long lpar_rc;
+       int ret = 1;
  
         BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS);
         BUG_ON(index >= adapter->rx_buff_pool[pool].size);
@@ -410,7 +411,7 @@ static void ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter)
         if (!adapter->rx_buff_pool[pool].active) {
                 ibmveth_rxq_harvest_buffer(adapter);
                 ibmveth_free_buffer_pool(adapter, &adapter->rx_buff_pool[pool]);
-               return;
+               goto out;
         }
  
         desc.fields.flags_len = IBMVETH_BUF_VALID |
@@ -423,12 +424,16 @@ static void ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter)
                 netdev_dbg(adapter->netdev, "h_add_logical_lan_buffer failed "
                            "during recycle rc=%ld", lpar_rc);
                 ibmveth_remove_buffer_from_pool(adapter, adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator);
+               ret = 0;
         }
  
         if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) {
                 adapter->rx_queue.index = 0;
                 adapter->rx_queue.toggle = !adapter->rx_queue.toggle;
         }
+
+out:
+       return ret;
  }
  
  static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter)
@@ -752,7 +757,7 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data)
         struct ibmveth_adapter *adapter = netdev_priv(dev);
         unsigned long set_attr, clr_attr, ret_attr;
         unsigned long set_attr6, clr_attr6;
-       long ret, ret6;
+       long ret, ret4, ret6;
         int rc1 = 0, rc2 = 0;
         int restart = 0;
  
@@ -765,6 +770,8 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data)
  
         set_attr = 0;
         clr_attr = 0;
+       set_attr6 = 0;
+       clr_attr6 = 0;
  
         if (data) {
                 set_attr = IBMVETH_ILLAN_IPV4_TCP_CSUM;
@@ -779,16 +786,20 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data)
         if (ret == H_SUCCESS && !(ret_attr & IBMVETH_ILLAN_ACTIVE_TRUNK) &&
             !(ret_attr & IBMVETH_ILLAN_TRUNK_PRI_MASK) &&
             (ret_attr & IBMVETH_ILLAN_PADDED_PKT_CSUM)) {
-               ret = h_illan_attributes(adapter->vdev->unit_address, clr_attr,
+               ret4 = h_illan_attributes(adapter->vdev->unit_address, clr_attr,
                                          set_attr, &ret_attr);
  
-               if (ret != H_SUCCESS) {
+               if (ret4 != H_SUCCESS) {
                         netdev_err(dev, "unable to change IPv4 checksum "
                                         "offload settings. %d rc=%ld\n",
-                                       data, ret);
+                                       data, ret4);
+
+                       h_illan_attributes(adapter->vdev->unit_address,
+                                          set_attr, clr_attr, &ret_attr);
+
+                       if (data == 1)
+                               dev->features &= ~NETIF_F_IP_CSUM;
  
-                       ret = h_illan_attributes(adapter->vdev->unit_address,
-                                                set_attr, clr_attr, &ret_attr);
                 } else {
                         adapter->fw_ipv4_csum_support = data;
                 }
@@ -799,15 +810,18 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data)
                 if (ret6 != H_SUCCESS) {
                         netdev_err(dev, "unable to change IPv6 checksum "
                                         "offload settings. %d rc=%ld\n",
-                                       data, ret);
+                                       data, ret6);
+
+                       h_illan_attributes(adapter->vdev->unit_address,
+                                          set_attr6, clr_attr6, &ret_attr);
+
+                       if (data == 1)
+                               dev->features &= ~NETIF_F_IPV6_CSUM;
  
-                       ret = h_illan_attributes(adapter->vdev->unit_address,
-                                                set_attr6, clr_attr6,
-                                                &ret_attr);
                 } else
                         adapter->fw_ipv6_csum_support = data;
  
-               if (ret != H_SUCCESS || ret6 != H_SUCCESS)
+               if (ret4 == H_SUCCESS || ret6 == H_SUCCESS)
                         adapter->rx_csum = data;
                 else
                         rc1 = -EIO;
@@ -925,6 +939,7 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb,
         union ibmveth_buf_desc descs[6];
         int last, i;
         int force_bounce = 0;
+       dma_addr_t dma_addr;
  
         /*
          * veth handles a maximum of 6 segments including the header, so
@@ -989,17 +1004,16 @@ retry_bounce:
         }
  
         /* Map the header */
-       descs[0].fields.address = dma_map_single(&adapter->vdev->dev, skb->data,
-                                                skb_headlen(skb),
-                                                DMA_TO_DEVICE);
-       if (dma_mapping_error(&adapter->vdev->dev, descs[0].fields.address))
+       dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
+                                 skb_headlen(skb), DMA_TO_DEVICE);
+       if (dma_mapping_error(&adapter->vdev->dev, dma_addr))
                 goto map_failed;
  
         descs[0].fields.flags_len = desc_flags | skb_headlen(skb);
+       descs[0].fields.address = dma_addr;
  
         /* Map the frags */
         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-               unsigned long dma_addr;
                 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
  
                 dma_addr = dma_map_page(&adapter->vdev->dev, frag->page,
@@ -1021,7 +1035,12 @@ retry_bounce:
                 netdev->stats.tx_bytes += skb->len;
         }
  
-       for (i = 0; i < skb_shinfo(skb)->nr_frags + 1; i++)
+       dma_unmap_single(&adapter->vdev->dev,
+                        descs[0].fields.address,
+                        descs[0].fields.flags_len & IBMVETH_BUF_LEN_MASK,
+                        DMA_TO_DEVICE);
+
+       for (i = 1; i < skb_shinfo(skb)->nr_frags + 1; i++)
                 dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address,
                                descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK,
                                DMA_TO_DEVICE);
@@ -1084,8 +1103,9 @@ restart_poll:
                                 if (rx_flush)
                                         ibmveth_flush_buffer(skb->data,
                                                 length + offset);
+                               if (!ibmveth_rxq_recycle_buffer(adapter))
+                                       kfree_skb(skb);
                                 skb = new_skb;
-                               ibmveth_rxq_recycle_buffer(adapter);
                         } else {
                                 ibmveth_rxq_harvest_buffer(adapter);
                                 skb_reserve(skb, offset);
diff --git a/drivers/net/irda/sh_irda.c b/drivers/net/irda/sh_irda.c

index 4488bd581eca013be6b2cba3d362ce98ba505444..82660672dcd951da07971cff2ce5b0b396bbc823 100644 (file)
--- a/drivers/net/irda/sh_irda.c
+++ b/drivers/net/irda/sh_irda.c
@@ -22,6 +22,8 @@
   *  - DMA transfer support
   *  - FIFO mode support
   */
+#include <linux/io.h>
+#include <linux/interrupt.h>
  #include <linux/module.h>
  #include <linux/platform_device.h>
  #include <linux/clk.h>
diff --git a/drivers/net/irda/sh_sir.c b/drivers/net/irda/sh_sir.c

index 52a7c86af66347c388d24d29b043fe52e6a2ca5f..ed7d7d62bf68ddff78d657fa3d93d264434264a0 100644 (file)
--- a/drivers/net/irda/sh_sir.c
+++ b/drivers/net/irda/sh_sir.c
@@ -12,6 +12,8 @@
   * published by the Free Software Foundation.
   */
  
+#include <linux/io.h>
+#include <linux/interrupt.h>
  #include <linux/module.h>
  #include <linux/platform_device.h>
  #include <linux/slab.h>
@@ -511,7 +513,7 @@ static void sh_sir_tx(struct sh_sir_self *self, int phase)
  
  static int sh_sir_read_data(struct sh_sir_self *self)
  {
-       u16 val;
+       u16 val = 0;
         int timeout = 1024;
  
         while (timeout--) {
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c

index e86297b32733e8a0af34a329a3f839ce2c57205b..22790394318a06085290e179f88647e97480271b 100644 (file)
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -1459,8 +1459,10 @@ static void ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
                 if (ixgbe_rx_is_fcoe(adapter, rx_desc)) {
                         ddp_bytes = ixgbe_fcoe_ddp(adapter, rx_desc, skb,
                                                    staterr);
-                       if (!ddp_bytes)
+                       if (!ddp_bytes) {
+                               dev_kfree_skb_any(skb);
                                 goto next_desc;
+                       }
                 }
  #endif /* IXGBE_FCOE */
                 ixgbe_receive_skb(q_vector, skb, staterr, rx_ring, rx_desc);
diff --git a/drivers/net/pch_gbe/pch_gbe.h b/drivers/net/pch_gbe/pch_gbe.h

index 59fac77d0dbb68a58f95cce5046a243d723e12a1..a09a07197eb58f65122296800959accab87b0f21 100644 (file)
--- a/drivers/net/pch_gbe/pch_gbe.h
+++ b/drivers/net/pch_gbe/pch_gbe.h
@@ -127,8 +127,8 @@ struct pch_gbe_regs {
  
  /* Reset */
  #define PCH_GBE_ALL_RST         0x80000000  /* All reset */
-#define PCH_GBE_TX_RST          0x40000000  /* TX MAC, TX FIFO, TX DMA reset */
-#define PCH_GBE_RX_RST          0x04000000  /* RX MAC, RX FIFO, RX DMA reset */
+#define PCH_GBE_TX_RST          0x00008000  /* TX MAC, TX FIFO, TX DMA reset */
+#define PCH_GBE_RX_RST          0x00004000  /* RX MAC, RX FIFO, RX DMA reset */
  
  /* TCP/IP Accelerator Control */
  #define PCH_GBE_EX_LIST_EN      0x00000008  /* External List Enable */
@@ -276,6 +276,9 @@ struct pch_gbe_regs {
  #define PCH_GBE_RX_DMA_EN       0x00000002   /* Enables Receive DMA */
  #define PCH_GBE_TX_DMA_EN       0x00000001   /* Enables Transmission DMA */
  
+/* RX DMA STATUS */
+#define PCH_GBE_IDLE_CHECK       0xFFFFFFFE
+
  /* Wake On LAN Status */
  #define PCH_GBE_WLS_BR          0x00000008 /* Broadcas Address */
  #define PCH_GBE_WLS_MLT         0x00000004 /* Multicast Address */
@@ -471,6 +474,7 @@ struct pch_gbe_tx_desc {
  struct pch_gbe_buffer {
         struct sk_buff *skb;
         dma_addr_t dma;
+       unsigned char *rx_buffer;
         unsigned long time_stamp;
         u16 length;
         bool mapped;
@@ -511,6 +515,9 @@ struct pch_gbe_tx_ring {
  struct pch_gbe_rx_ring {
         struct pch_gbe_rx_desc *desc;
         dma_addr_t dma;
+       unsigned char *rx_buff_pool;
+       dma_addr_t rx_buff_pool_logic;
+       unsigned int rx_buff_pool_size;
         unsigned int size;
         unsigned int count;
         unsigned int next_to_use;
@@ -622,6 +629,7 @@ struct pch_gbe_adapter {
         unsigned long rx_buffer_len;
         unsigned long tx_queue_len;
         bool have_msi;
+       bool rx_stop_flag;
  };
  
  extern const char pch_driver_version[];
diff --git a/drivers/net/pch_gbe/pch_gbe_main.c b/drivers/net/pch_gbe/pch_gbe_main.c

index eac3c5ca9731b8d9f15819a801654a8e76fd4917..567ff10889be62e670eb352684b7bfdf7802af71 100644 (file)
--- a/drivers/net/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/pch_gbe/pch_gbe_main.c
@@ -20,7 +20,6 @@
  
  #include "pch_gbe.h"
  #include "pch_gbe_api.h"
-#include <linux/prefetch.h>
  
  #define DRV_VERSION     "1.00"
  const char pch_driver_version[] = DRV_VERSION;
@@ -34,11 +33,15 @@ const char pch_driver_version[] = DRV_VERSION;
  #define PCH_GBE_WATCHDOG_PERIOD                (1 * HZ)        /* watchdog time */
  #define PCH_GBE_COPYBREAK_DEFAULT      256
  #define PCH_GBE_PCI_BAR                        1
+#define PCH_GBE_RESERVE_MEMORY         0x200000        /* 2MB */
  
  /* Macros for ML7223 */
  #define PCI_VENDOR_ID_ROHM                     0x10db
  #define PCI_DEVICE_ID_ROHM_ML7223_GBE          0x8013
  
+/* Macros for ML7831 */
+#define PCI_DEVICE_ID_ROHM_ML7831_GBE          0x8802
+
  #define PCH_GBE_TX_WEIGHT         64
  #define PCH_GBE_RX_WEIGHT         64
  #define PCH_GBE_RX_BUFFER_WRITE   16
@@ -52,6 +55,7 @@ const char pch_driver_version[] = DRV_VERSION;
         )
  
  /* Ethertype field values */
+#define PCH_GBE_MAX_RX_BUFFER_SIZE      0x2880
  #define PCH_GBE_MAX_JUMBO_FRAME_SIZE    10318
  #define PCH_GBE_FRAME_SIZE_2048         2048
  #define PCH_GBE_FRAME_SIZE_4096         4096
@@ -83,10 +87,12 @@ const char pch_driver_version[] = DRV_VERSION;
  #define PCH_GBE_INT_ENABLE_MASK ( \
         PCH_GBE_INT_RX_DMA_CMPLT |    \
         PCH_GBE_INT_RX_DSC_EMP   |    \
+       PCH_GBE_INT_RX_FIFO_ERR  |    \
         PCH_GBE_INT_WOL_DET      |    \
         PCH_GBE_INT_TX_CMPLT          \
         )
  
+#define PCH_GBE_INT_DISABLE_ALL                0
  
  static unsigned int copybreak __read_mostly = PCH_GBE_COPYBREAK_DEFAULT;
  
@@ -138,6 +144,27 @@ static void pch_gbe_wait_clr_bit(void *reg, u32 bit)
         if (!tmp)
                 pr_err("Error: busy bit is not cleared\n");
  }
+
+/**
+ * pch_gbe_wait_clr_bit_irq - Wait to clear a bit for interrupt context
+ * @reg:       Pointer of register
+ * @busy:      Busy bit
+ */
+static int pch_gbe_wait_clr_bit_irq(void *reg, u32 bit)
+{
+       u32 tmp;
+       int ret = -1;
+       /* wait busy */
+       tmp = 20;
+       while ((ioread32(reg) & bit) && --tmp)
+               udelay(5);
+       if (!tmp)
+               pr_err("Error: busy bit is not cleared\n");
+       else
+               ret = 0;
+       return ret;
+}
+
  /**
   * pch_gbe_mac_mar_set - Set MAC address register
   * @hw:            Pointer to the HW structure
@@ -189,6 +216,17 @@ static void pch_gbe_mac_reset_hw(struct pch_gbe_hw *hw)
         return;
  }
  
+static void pch_gbe_mac_reset_rx(struct pch_gbe_hw *hw)
+{
+       /* Read the MAC address. and store to the private data */
+       pch_gbe_mac_read_mac_addr(hw);
+       iowrite32(PCH_GBE_RX_RST, &hw->reg->RESET);
+       pch_gbe_wait_clr_bit_irq(&hw->reg->RESET, PCH_GBE_RX_RST);
+       /* Setup the MAC address */
+       pch_gbe_mac_mar_set(hw, hw->mac.addr, 0);
+       return;
+}
+
  /**
   * pch_gbe_mac_init_rx_addrs - Initialize receive address's
   * @hw:        Pointer to the HW structure
@@ -671,13 +709,8 @@ static void pch_gbe_setup_rctl(struct pch_gbe_adapter *adapter)
  
         tcpip = ioread32(&hw->reg->TCPIP_ACC);
  
-       if (netdev->features & NETIF_F_RXCSUM) {
-               tcpip &= ~PCH_GBE_RX_TCPIPACC_OFF;
-               tcpip |= PCH_GBE_RX_TCPIPACC_EN;
-       } else {
-               tcpip |= PCH_GBE_RX_TCPIPACC_OFF;
-               tcpip &= ~PCH_GBE_RX_TCPIPACC_EN;
-       }
+       tcpip |= PCH_GBE_RX_TCPIPACC_OFF;
+       tcpip &= ~PCH_GBE_RX_TCPIPACC_EN;
         iowrite32(tcpip, &hw->reg->TCPIP_ACC);
         return;
  }
@@ -717,13 +750,6 @@ static void pch_gbe_configure_rx(struct pch_gbe_adapter *adapter)
         iowrite32(rdba, &hw->reg->RX_DSC_BASE);
         iowrite32(rdlen, &hw->reg->RX_DSC_SIZE);
         iowrite32((rdba + rdlen), &hw->reg->RX_DSC_SW_P);
-
-       /* Enables Receive DMA */
-       rxdma = ioread32(&hw->reg->DMA_CTRL);
-       rxdma |= PCH_GBE_RX_DMA_EN;
-       iowrite32(rxdma, &hw->reg->DMA_CTRL);
-       /* Enables Receive */
-       iowrite32(PCH_GBE_MRE_MAC_RX_EN, &hw->reg->MAC_RX_EN);
  }
  
  /**
@@ -1097,6 +1123,48 @@ void pch_gbe_update_stats(struct pch_gbe_adapter *adapter)
         spin_unlock_irqrestore(&adapter->stats_lock, flags);
  }
  
+static void pch_gbe_stop_receive(struct pch_gbe_adapter *adapter)
+{
+       struct pch_gbe_hw *hw = &adapter->hw;
+       u32 rxdma;
+       u16 value;
+       int ret;
+
+       /* Disable Receive DMA */
+       rxdma = ioread32(&hw->reg->DMA_CTRL);
+       rxdma &= ~PCH_GBE_RX_DMA_EN;
+       iowrite32(rxdma, &hw->reg->DMA_CTRL);
+       /* Wait Rx DMA BUS is IDLE */
+       ret = pch_gbe_wait_clr_bit_irq(&hw->reg->RX_DMA_ST, PCH_GBE_IDLE_CHECK);
+       if (ret) {
+               /* Disable Bus master */
+               pci_read_config_word(adapter->pdev, PCI_COMMAND, &value);
+               value &= ~PCI_COMMAND_MASTER;
+               pci_write_config_word(adapter->pdev, PCI_COMMAND, value);
+               /* Stop Receive */
+               pch_gbe_mac_reset_rx(hw);
+               /* Enable Bus master */
+               value |= PCI_COMMAND_MASTER;
+               pci_write_config_word(adapter->pdev, PCI_COMMAND, value);
+       } else {
+               /* Stop Receive */
+               pch_gbe_mac_reset_rx(hw);
+       }
+}
+
+static void pch_gbe_start_receive(struct pch_gbe_hw *hw)
+{
+       u32 rxdma;
+
+       /* Enables Receive DMA */
+       rxdma = ioread32(&hw->reg->DMA_CTRL);
+       rxdma |= PCH_GBE_RX_DMA_EN;
+       iowrite32(rxdma, &hw->reg->DMA_CTRL);
+       /* Enables Receive */
+       iowrite32(PCH_GBE_MRE_MAC_RX_EN, &hw->reg->MAC_RX_EN);
+       return;
+}
+
  /**
   * pch_gbe_intr - Interrupt Handler
   * @irq:   Interrupt number
@@ -1123,7 +1191,15 @@ static irqreturn_t pch_gbe_intr(int irq, void *data)
         if (int_st & PCH_GBE_INT_RX_FRAME_ERR)
                 adapter->stats.intr_rx_frame_err_count++;
         if (int_st & PCH_GBE_INT_RX_FIFO_ERR)
-               adapter->stats.intr_rx_fifo_err_count++;
+               if (!adapter->rx_stop_flag) {
+                       adapter->stats.intr_rx_fifo_err_count++;
+                       pr_debug("Rx fifo over run\n");
+                       adapter->rx_stop_flag = true;
+                       int_en = ioread32(&hw->reg->INT_EN);
+                       iowrite32((int_en & ~PCH_GBE_INT_RX_FIFO_ERR),
+                                 &hw->reg->INT_EN);
+                       pch_gbe_stop_receive(adapter);
+               }
         if (int_st & PCH_GBE_INT_RX_DMA_ERR)
                 adapter->stats.intr_rx_dma_err_count++;
         if (int_st & PCH_GBE_INT_TX_FIFO_ERR)
@@ -1135,7 +1211,7 @@ static irqreturn_t pch_gbe_intr(int irq, void *data)
         /* When Rx descriptor is empty  */
         if ((int_st & PCH_GBE_INT_RX_DSC_EMP)) {
                 adapter->stats.intr_rx_dsc_empty_count++;
-               pr_err("Rx descriptor is empty\n");
+               pr_debug("Rx descriptor is empty\n");
                 int_en = ioread32(&hw->reg->INT_EN);
                 iowrite32((int_en & ~PCH_GBE_INT_RX_DSC_EMP), &hw->reg->INT_EN);
                 if (hw->mac.tx_fc_enable) {
@@ -1185,29 +1261,23 @@ pch_gbe_alloc_rx_buffers(struct pch_gbe_adapter *adapter,
         unsigned int i;
         unsigned int bufsz;
  
-       bufsz = adapter->rx_buffer_len + PCH_GBE_DMA_ALIGN;
+       bufsz = adapter->rx_buffer_len + NET_IP_ALIGN;
         i = rx_ring->next_to_use;
  
         while ((cleaned_count--)) {
                 buffer_info = &rx_ring->buffer_info[i];
-               skb = buffer_info->skb;
-               if (skb) {
-                       skb_trim(skb, 0);
-               } else {
-                       skb = netdev_alloc_skb(netdev, bufsz);
-                       if (unlikely(!skb)) {
-                               /* Better luck next round */
-                               adapter->stats.rx_alloc_buff_failed++;
-                               break;
-                       }
-                       /* 64byte align */
-                       skb_reserve(skb, PCH_GBE_DMA_ALIGN);
-
-                       buffer_info->skb = skb;
-                       buffer_info->length = adapter->rx_buffer_len;
+               skb = netdev_alloc_skb(netdev, bufsz);
+               if (unlikely(!skb)) {
+                       /* Better luck next round */
+                       adapter->stats.rx_alloc_buff_failed++;
+                       break;
                 }
+               /* align */
+               skb_reserve(skb, NET_IP_ALIGN);
+               buffer_info->skb = skb;
+
                 buffer_info->dma = dma_map_single(&pdev->dev,
-                                                 skb->data,
+                                                 buffer_info->rx_buffer,
                                                   buffer_info->length,
                                                   DMA_FROM_DEVICE);
                 if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) {
@@ -1240,6 +1310,36 @@ pch_gbe_alloc_rx_buffers(struct pch_gbe_adapter *adapter,
         return;
  }
  
+static int
+pch_gbe_alloc_rx_buffers_pool(struct pch_gbe_adapter *adapter,
+                        struct pch_gbe_rx_ring *rx_ring, int cleaned_count)
+{
+       struct pci_dev *pdev = adapter->pdev;
+       struct pch_gbe_buffer *buffer_info;
+       unsigned int i;
+       unsigned int bufsz;
+       unsigned int size;
+
+       bufsz = adapter->rx_buffer_len;
+
+       size = rx_ring->count * bufsz + PCH_GBE_RESERVE_MEMORY;
+       rx_ring->rx_buff_pool = dma_alloc_coherent(&pdev->dev, size,
+                                               &rx_ring->rx_buff_pool_logic,
+                                               GFP_KERNEL);
+       if (!rx_ring->rx_buff_pool) {
+               pr_err("Unable to allocate memory for the receive poll buffer\n");
+               return -ENOMEM;
+       }
+       memset(rx_ring->rx_buff_pool, 0, size);
+       rx_ring->rx_buff_pool_size = size;
+       for (i = 0; i < rx_ring->count; i++) {
+               buffer_info = &rx_ring->buffer_info[i];
+               buffer_info->rx_buffer = rx_ring->rx_buff_pool + bufsz * i;
+               buffer_info->length = bufsz;
+       }
+       return 0;
+}
+
  /**
   * pch_gbe_alloc_tx_buffers - Allocate transmit buffers
   * @adapter:   Board private structure
@@ -1380,7 +1480,7 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter,
         unsigned int i;
         unsigned int cleaned_count = 0;
         bool cleaned = false;
-       struct sk_buff *skb, *new_skb;
+       struct sk_buff *skb;
         u8 dma_status;
         u16 gbec_status;
         u32 tcp_ip_status;
@@ -1401,13 +1501,12 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter,
                 rx_desc->gbec_status = DSC_INIT16;
                 buffer_info = &rx_ring->buffer_info[i];
                 skb = buffer_info->skb;
+               buffer_info->skb = NULL;
  
                 /* unmap dma */
                 dma_unmap_single(&pdev->dev, buffer_info->dma,
                                    buffer_info->length, DMA_FROM_DEVICE);
                 buffer_info->mapped = false;
-               /* Prefetch the packet */
-               prefetch(skb->data);
  
                 pr_debug("RxDecNo = 0x%04x  Status[DMA:0x%02x GBE:0x%04x "
                          "TCP:0x%08x]  BufInf = 0x%p\n",
@@ -1427,70 +1526,16 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter,
                         pr_err("Receive CRC Error\n");
                 } else {
                         /* get receive length */
-                       /* length convert[-3] */
-                       length = (rx_desc->rx_words_eob) - 3;
-
-                       /* Decide the data conversion method */
-                       if (!(netdev->features & NETIF_F_RXCSUM)) {
-                               /* [Header:14][payload] */
-                               if (NET_IP_ALIGN) {
-                                       /* Because alignment differs,
-                                        * the new_skb is newly allocated,
-                                        * and data is copied to new_skb.*/
-                                       new_skb = netdev_alloc_skb(netdev,
-                                                        length + NET_IP_ALIGN);
-                                       if (!new_skb) {
-                                               /* dorrop error */
-                                               pr_err("New skb allocation "
-                                                       "Error\n");
-                                               goto dorrop;
-                                       }
-                                       skb_reserve(new_skb, NET_IP_ALIGN);
-                                       memcpy(new_skb->data, skb->data,
-                                              length);
-                                       skb = new_skb;
-                               } else {
-                                       /* DMA buffer is used as SKB as it is.*/
-                                       buffer_info->skb = NULL;
-                               }
-                       } else {
-                               /* [Header:14][padding:2][payload] */
-                               /* The length includes padding length */
-                               length = length - PCH_GBE_DMA_PADDING;
-                               if ((length < copybreak) ||
-                                   (NET_IP_ALIGN != PCH_GBE_DMA_PADDING)) {
-                                       /* Because alignment differs,
-                                        * the new_skb is newly allocated,
-                                        * and data is copied to new_skb.
-                                        * Padding data is deleted
-                                        * at the time of a copy.*/
-                                       new_skb = netdev_alloc_skb(netdev,
-                                                        length + NET_IP_ALIGN);
-                                       if (!new_skb) {
-                                               /* dorrop error */
-                                               pr_err("New skb allocation "
-                                                       "Error\n");
-                                               goto dorrop;
-                                       }
-                                       skb_reserve(new_skb, NET_IP_ALIGN);
-                                       memcpy(new_skb->data, skb->data,
-                                              ETH_HLEN);
-                                       memcpy(&new_skb->data[ETH_HLEN],
-                                              &skb->data[ETH_HLEN +
-                                              PCH_GBE_DMA_PADDING],
-                                              length - ETH_HLEN);
-                                       skb = new_skb;
-                               } else {
-                                       /* Padding data is deleted
-                                        * by moving header data.*/
-                                       memmove(&skb->data[PCH_GBE_DMA_PADDING],
-                                               &skb->data[0], ETH_HLEN);
-                                       skb_reserve(skb, NET_IP_ALIGN);
-                                       buffer_info->skb = NULL;
-                               }
-                       }
-                       /* The length includes FCS length */
-                       length = length - ETH_FCS_LEN;
+                       /* length convert[-3], length includes FCS length */
+                       length = (rx_desc->rx_words_eob) - 3 - ETH_FCS_LEN;
+                       if (rx_desc->rx_words_eob & 0x02)
+                               length = length - 4;
+                       /*
+                        * buffer_info->rx_buffer: [Header:14][payload]
+                        * skb->data: [Reserve:2][Header:14][payload]
+                        */
+                       memcpy(skb->data, buffer_info->rx_buffer, length);
+
                         /* update status of driver */
                         adapter->stats.rx_bytes += length;
                         adapter->stats.rx_packets++;
@@ -1509,7 +1554,6 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter,
                         pr_debug("Receive skb->ip_summed: %d length: %d\n",
                                  skb->ip_summed, length);
                 }
-dorrop:
                 /* return some buffers to hardware, one at a time is too slow */
                 if (unlikely(cleaned_count >= PCH_GBE_RX_BUFFER_WRITE)) {
                         pch_gbe_alloc_rx_buffers(adapter, rx_ring,
@@ -1714,9 +1758,15 @@ int pch_gbe_up(struct pch_gbe_adapter *adapter)
                 pr_err("Error: can't bring device up\n");
                 return err;
         }
+       err = pch_gbe_alloc_rx_buffers_pool(adapter, rx_ring, rx_ring->count);
+       if (err) {
+               pr_err("Error: can't bring device up\n");
+               return err;
+       }
         pch_gbe_alloc_tx_buffers(adapter, tx_ring);
         pch_gbe_alloc_rx_buffers(adapter, rx_ring, rx_ring->count);
         adapter->tx_queue_len = netdev->tx_queue_len;
+       pch_gbe_start_receive(&adapter->hw);
  
         mod_timer(&adapter->watchdog_timer, jiffies);
  
@@ -1734,6 +1784,7 @@ int pch_gbe_up(struct pch_gbe_adapter *adapter)
  void pch_gbe_down(struct pch_gbe_adapter *adapter)
  {
         struct net_device *netdev = adapter->netdev;
+       struct pch_gbe_rx_ring *rx_ring = adapter->rx_ring;
  
         /* signal that we're down so the interrupt handler does not
          * reschedule our watchdog timer */
@@ -1752,6 +1803,12 @@ void pch_gbe_down(struct pch_gbe_adapter *adapter)
         pch_gbe_reset(adapter);
         pch_gbe_clean_tx_ring(adapter, adapter->tx_ring);
         pch_gbe_clean_rx_ring(adapter, adapter->rx_ring);
+
+       pci_free_consistent(adapter->pdev, rx_ring->rx_buff_pool_size,
+                           rx_ring->rx_buff_pool, rx_ring->rx_buff_pool_logic);
+       rx_ring->rx_buff_pool_logic = 0;
+       rx_ring->rx_buff_pool_size = 0;
+       rx_ring->rx_buff_pool = NULL;
  }
  
  /**
@@ -2004,6 +2061,8 @@ static int pch_gbe_change_mtu(struct net_device *netdev, int new_mtu)
  {
         struct pch_gbe_adapter *adapter = netdev_priv(netdev);
         int max_frame;
+       unsigned long old_rx_buffer_len = adapter->rx_buffer_len;
+       int err;
  
         max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
         if ((max_frame < ETH_ZLEN + ETH_FCS_LEN) ||
@@ -2018,14 +2077,24 @@ static int pch_gbe_change_mtu(struct net_device *netdev, int new_mtu)
         else if (max_frame <= PCH_GBE_FRAME_SIZE_8192)
                 adapter->rx_buffer_len = PCH_GBE_FRAME_SIZE_8192;
         else
-               adapter->rx_buffer_len = PCH_GBE_MAX_JUMBO_FRAME_SIZE;
-       netdev->mtu = new_mtu;
-       adapter->hw.mac.max_frame_size = max_frame;
+               adapter->rx_buffer_len = PCH_GBE_MAX_RX_BUFFER_SIZE;
  
-       if (netif_running(netdev))
-               pch_gbe_reinit_locked(adapter);
-       else
+       if (netif_running(netdev)) {
+               pch_gbe_down(adapter);
+               err = pch_gbe_up(adapter);
+               if (err) {
+                       adapter->rx_buffer_len = old_rx_buffer_len;
+                       pch_gbe_up(adapter);
+                       return -ENOMEM;
+               } else {
+                       netdev->mtu = new_mtu;
+                       adapter->hw.mac.max_frame_size = max_frame;
+               }
+       } else {
                 pch_gbe_reset(adapter);
+               netdev->mtu = new_mtu;
+               adapter->hw.mac.max_frame_size = max_frame;
+       }
  
         pr_debug("max_frame : %d  rx_buffer_len : %d  mtu : %d  max_frame_size : %d\n",
                  max_frame, (u32) adapter->rx_buffer_len, netdev->mtu,
@@ -2103,6 +2172,7 @@ static int pch_gbe_napi_poll(struct napi_struct *napi, int budget)
         int work_done = 0;
         bool poll_end_flag = false;
         bool cleaned = false;
+       u32 int_en;
  
         pr_debug("budget : %d\n", budget);
  
@@ -2110,8 +2180,15 @@ static int pch_gbe_napi_poll(struct napi_struct *napi, int budget)
         if (!netif_carrier_ok(netdev)) {
                 poll_end_flag = true;
         } else {
-               cleaned = pch_gbe_clean_tx(adapter, adapter->tx_ring);
                 pch_gbe_clean_rx(adapter, adapter->rx_ring, &work_done, budget);
+               if (adapter->rx_stop_flag) {
+                       adapter->rx_stop_flag = false;
+                       pch_gbe_start_receive(&adapter->hw);
+                       int_en = ioread32(&adapter->hw.reg->INT_EN);
+                       iowrite32((int_en | PCH_GBE_INT_RX_FIFO_ERR),
+                                       &adapter->hw.reg->INT_EN);
+               }
+               cleaned = pch_gbe_clean_tx(adapter, adapter->tx_ring);
  
                 if (cleaned)
                         work_done = budget;
@@ -2452,6 +2529,13 @@ static DEFINE_PCI_DEVICE_TABLE(pch_gbe_pcidev_id) = {
          .class = (PCI_CLASS_NETWORK_ETHERNET << 8),
          .class_mask = (0xFFFF00)
          },
+       {.vendor = PCI_VENDOR_ID_ROHM,
+        .device = PCI_DEVICE_ID_ROHM_ML7831_GBE,
+        .subvendor = PCI_ANY_ID,
+        .subdevice = PCI_ANY_ID,
+        .class = (PCI_CLASS_NETWORK_ETHERNET << 8),
+        .class_mask = (0xFFFF00)
+        },
         /* required last entry */
         {0}
  };
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c

index 8b3090dc4bcdf6c3b674d401b89a3a8aa3f5f5be..80b6f36a807406ef759b01df1c5de4902cb0b77f 100644 (file)
--- a/drivers/net/pcnet32.c
+++ b/drivers/net/pcnet32.c
@@ -82,7 +82,7 @@ static int cards_found;
  /*
   * VLB I/O addresses
   */
-static unsigned int pcnet32_portlist[] __initdata =
+static unsigned int pcnet32_portlist[] =
      { 0x300, 0x320, 0x340, 0x360, 0 };
  
  static int pcnet32_debug;
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c

index 2cd8dc5847b4432f5d0567e0cb7bc083e7a63fa3..cb6e0b486b1e220864ba72c60decf9f53b0a05c2 100644 (file)
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -34,8 +34,7 @@
  #define PAGESEL                0x13
  #define LAYER4         0x02
  #define LAYER2         0x01
-#define MAX_RXTS       4
-#define MAX_TXTS       4
+#define MAX_RXTS       64
  #define N_EXT_TS       1
  #define PSF_PTPVER     2
  #define PSF_EVNT       0x4000
@@ -218,7 +217,7 @@ static void phy2rxts(struct phy_rxts *p, struct rxts *rxts)
         rxts->seqid = p->seqid;
         rxts->msgtype = (p->msgtype >> 12) & 0xf;
         rxts->hash = p->msgtype & 0x0fff;
-       rxts->tmo = jiffies + HZ;
+       rxts->tmo = jiffies + 2;
  }
  
  static u64 phy2txts(struct phy_txts *p)
diff --git a/drivers/net/phy/national.c b/drivers/net/phy/national.c

index 0620ba963508e17096dab555ed79495749c68e70..04bb8fcc0cb5dd2c069518270af8e9ca1f7c74e1 100644 (file)
--- a/drivers/net/phy/national.c
+++ b/drivers/net/phy/national.c
@@ -25,8 +25,9 @@
  /* DP83865 phy identifier values */
  #define DP83865_PHY_ID 0x20005c7a
  
-#define DP83865_INT_MASK_REG 0x15
-#define DP83865_INT_MASK_STATUS 0x14
+#define DP83865_INT_STATUS     0x14
+#define DP83865_INT_MASK       0x15
+#define DP83865_INT_CLEAR      0x17
  
  #define DP83865_INT_REMOTE_FAULT 0x0008
  #define DP83865_INT_ANE_COMPLETED 0x0010
@@ -68,21 +69,25 @@ static int ns_config_intr(struct phy_device *phydev)
         int err;
  
         if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
-               err = phy_write(phydev, DP83865_INT_MASK_REG,
+               err = phy_write(phydev, DP83865_INT_MASK,
                                 DP83865_INT_MASK_DEFAULT);
         else
-               err = phy_write(phydev, DP83865_INT_MASK_REG, 0);
+               err = phy_write(phydev, DP83865_INT_MASK, 0);
  
         return err;
  }
  
  static int ns_ack_interrupt(struct phy_device *phydev)
  {
-       int ret = phy_read(phydev, DP83865_INT_MASK_STATUS);
+       int ret = phy_read(phydev, DP83865_INT_STATUS);
         if (ret < 0)
                 return ret;
  
-       return 0;
+       /* Clear the interrupt status bit by writing a “1”
+        * to the corresponding bit in INT_CLEAR (2:0 are reserved) */
+       ret = phy_write(phydev, DP83865_INT_CLEAR, ret & ~0x7);
+
+       return ret;
  }
  
  static void ns_giga_speed_fallback(struct phy_device *phydev, int mode)
diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c

index 86ac38c96bcf040b59027332b614925aeee71293..3bb131137033cb9d9e53daaf0ad0ee89310fae65 100644 (file)
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -80,13 +80,13 @@ static int rionet_capable = 1;
   */
  static struct rio_dev **rionet_active;
  
-#define is_rionet_capable(pef, src_ops, dst_ops)               \
-                       ((pef & RIO_PEF_INB_MBOX) &&            \
-                        (pef & RIO_PEF_INB_DOORBELL) &&        \
+#define is_rionet_capable(src_ops, dst_ops)                    \
+                       ((src_ops & RIO_SRC_OPS_DATA_MSG) &&    \
+                        (dst_ops & RIO_DST_OPS_DATA_MSG) &&    \
                          (src_ops & RIO_SRC_OPS_DOORBELL) &&    \
                          (dst_ops & RIO_DST_OPS_DOORBELL))
  #define dev_rionet_capable(dev) \
-       is_rionet_capable(dev->pef, dev->src_ops, dev->dst_ops)
+       is_rionet_capable(dev->src_ops, dev->dst_ops)
  
  #define RIONET_MAC_MATCH(x)    (*(u32 *)x == 0x00010001)
  #define RIONET_GET_DESTID(x)   (*(u16 *)(x + 4))
@@ -282,7 +282,6 @@ static int rionet_open(struct net_device *ndev)
  {
         int i, rc = 0;
         struct rionet_peer *peer, *tmp;
-       u32 pwdcsr;
         struct rionet_private *rnet = netdev_priv(ndev);
  
         if (netif_msg_ifup(rnet))
@@ -332,13 +331,8 @@ static int rionet_open(struct net_device *ndev)
                         continue;
                 }
  
-               /*
-                * If device has initialized inbound doorbells,
-                * send a join message
-                */
-               rio_read_config_32(peer->rdev, RIO_WRITE_PORT_CSR, &pwdcsr);
-               if (pwdcsr & RIO_DOORBELL_AVAIL)
-                       rio_send_doorbell(peer->rdev, RIONET_DOORBELL_JOIN);
+               /* Send a join message */
+               rio_send_doorbell(peer->rdev, RIONET_DOORBELL_JOIN);
         }
  
        out:
@@ -492,7 +486,7 @@ static int rionet_setup_netdev(struct rio_mport *mport, struct net_device *ndev)
  static int rionet_probe(struct rio_dev *rdev, const struct rio_device_id *id)
  {
         int rc = -ENODEV;
-       u32 lpef, lsrc_ops, ldst_ops;
+       u32 lsrc_ops, ldst_ops;
         struct rionet_peer *peer;
         struct net_device *ndev = NULL;
  
@@ -515,12 +509,11 @@ static int rionet_probe(struct rio_dev *rdev, const struct rio_device_id *id)
          * on later probes
          */
         if (!rionet_check) {
-               rio_local_read_config_32(rdev->net->hport, RIO_PEF_CAR, &lpef);
                 rio_local_read_config_32(rdev->net->hport, RIO_SRC_OPS_CAR,
                                          &lsrc_ops);
                 rio_local_read_config_32(rdev->net->hport, RIO_DST_OPS_CAR,
                                          &ldst_ops);
-               if (!is_rionet_capable(lpef, lsrc_ops, ldst_ops)) {
+               if (!is_rionet_capable(lsrc_ops, ldst_ops)) {
                         printk(KERN_ERR
                                "%s: local device is not network capable\n",
                                DRV_NAME);
diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c

index faca764aa21bef971677c3fc3dc027e3b5dd0fa6..b59abc706d9301e7b44fcdd504d5f95aa852f01e 100644 (file)
--- a/drivers/net/sfc/efx.c
+++ b/drivers/net/sfc/efx.c
@@ -1050,7 +1050,6 @@ static int efx_init_io(struct efx_nic *efx)
  {
         struct pci_dev *pci_dev = efx->pci_dev;
         dma_addr_t dma_mask = efx->type->max_dma_mask;
-       bool use_wc;
         int rc;
  
         netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n");
@@ -1101,21 +1100,8 @@ static int efx_init_io(struct efx_nic *efx)
                 rc = -EIO;
                 goto fail3;
         }
-
-       /* bug22643: If SR-IOV is enabled then tx push over a write combined
-        * mapping is unsafe. We need to disable write combining in this case.
-        * MSI is unsupported when SR-IOV is enabled, and the firmware will
-        * have removed the MSI capability. So write combining is safe if
-        * there is an MSI capability.
-        */
-       use_wc = (!EFX_WORKAROUND_22643(efx) ||
-                 pci_find_capability(pci_dev, PCI_CAP_ID_MSI));
-       if (use_wc)
-               efx->membase = ioremap_wc(efx->membase_phys,
-                                         efx->type->mem_map_size);
-       else
-               efx->membase = ioremap_nocache(efx->membase_phys,
-                                              efx->type->mem_map_size);
+       efx->membase = ioremap_nocache(efx->membase_phys,
+                                      efx->type->mem_map_size);
         if (!efx->membase) {
                 netif_err(efx, probe, efx->net_dev,
                           "could not map memory BAR at %llx+%x\n",
diff --git a/drivers/net/sfc/io.h b/drivers/net/sfc/io.h

index cc978803d484bf84510ca5beb24ca41dd83a93ba..751d1ec112cc5d5e96f3755dec9482c8956457c8 100644 (file)
--- a/drivers/net/sfc/io.h
+++ b/drivers/net/sfc/io.h
@@ -103,7 +103,6 @@ static inline void efx_writeo(struct efx_nic *efx, efx_oword_t *value,
         _efx_writed(efx, value->u32[2], reg + 8);
         _efx_writed(efx, value->u32[3], reg + 12);
  #endif
-       wmb();
         mmiowb();
         spin_unlock_irqrestore(&efx->biu_lock, flags);
  }
@@ -126,7 +125,6 @@ static inline void efx_sram_writeq(struct efx_nic *efx, void __iomem *membase,
         __raw_writel((__force u32)value->u32[0], membase + addr);
         __raw_writel((__force u32)value->u32[1], membase + addr + 4);
  #endif
-       wmb();
         mmiowb();
         spin_unlock_irqrestore(&efx->biu_lock, flags);
  }
@@ -141,7 +139,6 @@ static inline void efx_writed(struct efx_nic *efx, efx_dword_t *value,
  
         /* No lock required */
         _efx_writed(efx, value->u32[0], reg);
-       wmb();
  }
  
  /* Read a 128-bit CSR, locking as appropriate. */
@@ -152,7 +149,6 @@ static inline void efx_reado(struct efx_nic *efx, efx_oword_t *value,
  
         spin_lock_irqsave(&efx->biu_lock, flags);
         value->u32[0] = _efx_readd(efx, reg + 0);
-       rmb();
         value->u32[1] = _efx_readd(efx, reg + 4);
         value->u32[2] = _efx_readd(efx, reg + 8);
         value->u32[3] = _efx_readd(efx, reg + 12);
@@ -175,7 +171,6 @@ static inline void efx_sram_readq(struct efx_nic *efx, void __iomem *membase,
         value->u64[0] = (__force __le64)__raw_readq(membase + addr);
  #else
         value->u32[0] = (__force __le32)__raw_readl(membase + addr);
-       rmb();
         value->u32[1] = (__force __le32)__raw_readl(membase + addr + 4);
  #endif
         spin_unlock_irqrestore(&efx->biu_lock, flags);
@@ -249,7 +244,6 @@ static inline void _efx_writeo_page(struct efx_nic *efx, efx_oword_t *value,
         _efx_writed(efx, value->u32[2], reg + 8);
         _efx_writed(efx, value->u32[3], reg + 12);
  #endif
-       wmb();
  }
  #define efx_writeo_page(efx, value, reg, page)                         \
         _efx_writeo_page(efx, value,                                    \
diff --git a/drivers/net/sfc/mcdi.c b/drivers/net/sfc/mcdi.c

index 3dd45ed61f0a33f5368c56df8347e1f3f5a7f1af..81a425397468a3c90e7b3542b8d4c72081cde84e 100644 (file)
--- a/drivers/net/sfc/mcdi.c
+++ b/drivers/net/sfc/mcdi.c
@@ -50,20 +50,6 @@ static inline struct efx_mcdi_iface *efx_mcdi(struct efx_nic *efx)
         return &nic_data->mcdi;
  }
  
-static inline void
-efx_mcdi_readd(struct efx_nic *efx, efx_dword_t *value, unsigned reg)
-{
-       struct siena_nic_data *nic_data = efx->nic_data;
-       value->u32[0] = (__force __le32)__raw_readl(nic_data->mcdi_smem + reg);
-}
-
-static inline void
-efx_mcdi_writed(struct efx_nic *efx, const efx_dword_t *value, unsigned reg)
-{
-       struct siena_nic_data *nic_data = efx->nic_data;
-       __raw_writel((__force u32)value->u32[0], nic_data->mcdi_smem + reg);
-}
-
  void efx_mcdi_init(struct efx_nic *efx)
  {
         struct efx_mcdi_iface *mcdi;
@@ -84,8 +70,8 @@ static void efx_mcdi_copyin(struct efx_nic *efx, unsigned cmd,
                             const u8 *inbuf, size_t inlen)
  {
         struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
-       unsigned pdu = MCDI_PDU(efx);
-       unsigned doorbell = MCDI_DOORBELL(efx);
+       unsigned pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx);
+       unsigned doorbell = FR_CZ_MC_TREG_SMEM + MCDI_DOORBELL(efx);
         unsigned int i;
         efx_dword_t hdr;
         u32 xflags, seqno;
@@ -106,28 +92,29 @@ static void efx_mcdi_copyin(struct efx_nic *efx, unsigned cmd,
                              MCDI_HEADER_SEQ, seqno,
                              MCDI_HEADER_XFLAGS, xflags);
  
-       efx_mcdi_writed(efx, &hdr, pdu);
+       efx_writed(efx, &hdr, pdu);
  
         for (i = 0; i < inlen; i += 4)
-               efx_mcdi_writed(efx, (const efx_dword_t *)(inbuf + i),
-                               pdu + 4 + i);
+               _efx_writed(efx, *((__le32 *)(inbuf + i)), pdu + 4 + i);
+
+       /* Ensure the payload is written out before the header */
+       wmb();
  
         /* ring the doorbell with a distinctive value */
-       EFX_POPULATE_DWORD_1(hdr, EFX_DWORD_0, 0x45789abc);
-       efx_mcdi_writed(efx, &hdr, doorbell);
+       _efx_writed(efx, (__force __le32) 0x45789abc, doorbell);
  }
  
  static void efx_mcdi_copyout(struct efx_nic *efx, u8 *outbuf, size_t outlen)
  {
         struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
-       unsigned int pdu = MCDI_PDU(efx);
+       unsigned int pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx);
         int i;
  
         BUG_ON(atomic_read(&mcdi->state) == MCDI_STATE_QUIESCENT);
         BUG_ON(outlen & 3 || outlen >= 0x100);
  
         for (i = 0; i < outlen; i += 4)
-               efx_mcdi_readd(efx, (efx_dword_t *)(outbuf + i), pdu + 4 + i);
+               *((__le32 *)(outbuf + i)) = _efx_readd(efx, pdu + 4 + i);
  }
  
  static int efx_mcdi_poll(struct efx_nic *efx)
@@ -135,7 +122,7 @@ static int efx_mcdi_poll(struct efx_nic *efx)
         struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
         unsigned int time, finish;
         unsigned int respseq, respcmd, error;
-       unsigned int pdu = MCDI_PDU(efx);
+       unsigned int pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx);
         unsigned int rc, spins;
         efx_dword_t reg;
  
@@ -161,7 +148,8 @@ static int efx_mcdi_poll(struct efx_nic *efx)
  
                 time = get_seconds();
  
-               efx_mcdi_readd(efx, &reg, pdu);
+               rmb();
+               efx_readd(efx, &reg, pdu);
  
                 /* All 1's indicates that shared memory is in reset (and is
                  * not a valid header). Wait for it to come out reset before
@@ -188,7 +176,7 @@ static int efx_mcdi_poll(struct efx_nic *efx)
                           respseq, mcdi->seqno);
                 rc = EIO;
         } else if (error) {
-               efx_mcdi_readd(efx, &reg, pdu + 4);
+               efx_readd(efx, &reg, pdu + 4);
                 switch (EFX_DWORD_FIELD(reg, EFX_DWORD_0)) {
  #define TRANSLATE_ERROR(name)                                  \
                 case MC_CMD_ERR_ ## name:                       \
@@ -222,21 +210,21 @@ out:
  /* Test and clear MC-rebooted flag for this port/function */
  int efx_mcdi_poll_reboot(struct efx_nic *efx)
  {
-       unsigned int addr = MCDI_REBOOT_FLAG(efx);
+       unsigned int addr = FR_CZ_MC_TREG_SMEM + MCDI_REBOOT_FLAG(efx);
         efx_dword_t reg;
         uint32_t value;
  
         if (efx_nic_rev(efx) < EFX_REV_SIENA_A0)
                 return false;
  
-       efx_mcdi_readd(efx, &reg, addr);
+       efx_readd(efx, &reg, addr);
         value = EFX_DWORD_FIELD(reg, EFX_DWORD_0);
  
         if (value == 0)
                 return 0;
  
         EFX_ZERO_DWORD(reg);
-       efx_mcdi_writed(efx, &reg, addr);
+       efx_writed(efx, &reg, addr);
  
         if (value == MC_STATUS_DWORD_ASSERT)
                 return -EINTR;
diff --git a/drivers/net/sfc/nic.c b/drivers/net/sfc/nic.c

index bafa23a6874c1a20a5fbbd57e5167bc78bf63b98..3edfbaf5f0229b2190e9e40ec554b4b9c6a84c86 100644 (file)
--- a/drivers/net/sfc/nic.c
+++ b/drivers/net/sfc/nic.c
@@ -1936,13 +1936,6 @@ void efx_nic_get_regs(struct efx_nic *efx, void *buf)
  
                 size = min_t(size_t, table->step, 16);
  
-               if (table->offset >= efx->type->mem_map_size) {
-                       /* No longer mapped; return dummy data */
-                       memcpy(buf, "\xde\xc0\xad\xde", 4);
-                       buf += table->rows * size;
-                       continue;
-               }
-
                 for (i = 0; i < table->rows; i++) {
                         switch (table->step) {
                         case 4: /* 32-bit register or SRAM */
diff --git a/drivers/net/sfc/nic.h b/drivers/net/sfc/nic.h

index 4bd1f2839dfe89e67107b864e93ca48a2bf0d447..7443f99c977f0cf0757f1296560aa611e0ade47f 100644 (file)
--- a/drivers/net/sfc/nic.h
+++ b/drivers/net/sfc/nic.h
@@ -143,12 +143,10 @@ static inline struct falcon_board *falcon_board(struct efx_nic *efx)
  /**
   * struct siena_nic_data - Siena NIC state
   * @mcdi: Management-Controller-to-Driver Interface
- * @mcdi_smem: MCDI shared memory mapping. The mapping is always uncacheable.
   * @wol_filter_id: Wake-on-LAN packet filter id
   */
  struct siena_nic_data {
         struct efx_mcdi_iface mcdi;
-       void __iomem *mcdi_smem;
         int wol_filter_id;
  };
  
diff --git a/drivers/net/sfc/siena.c b/drivers/net/sfc/siena.c

index 5735e84c69de6eba4a5589a3b10e23173e7a5fa7..2c3bd93fab542d797c7769e438ee8269b283bb88 100644 (file)
--- a/drivers/net/sfc/siena.c
+++ b/drivers/net/sfc/siena.c
@@ -250,26 +250,12 @@ static int siena_probe_nic(struct efx_nic *efx)
         efx_reado(efx, &reg, FR_AZ_CS_DEBUG);
         efx->net_dev->dev_id = EFX_OWORD_FIELD(reg, FRF_CZ_CS_PORT_NUM) - 1;
  
-       /* Initialise MCDI */
-       nic_data->mcdi_smem = ioremap_nocache(efx->membase_phys +
-                                             FR_CZ_MC_TREG_SMEM,
-                                             FR_CZ_MC_TREG_SMEM_STEP *
-                                             FR_CZ_MC_TREG_SMEM_ROWS);
-       if (!nic_data->mcdi_smem) {
-               netif_err(efx, probe, efx->net_dev,
-                         "could not map MCDI at %llx+%x\n",
-                         (unsigned long long)efx->membase_phys +
-                         FR_CZ_MC_TREG_SMEM,
-                         FR_CZ_MC_TREG_SMEM_STEP * FR_CZ_MC_TREG_SMEM_ROWS);
-               rc = -ENOMEM;
-               goto fail1;
-       }
         efx_mcdi_init(efx);
  
         /* Recover from a failed assertion before probing */
         rc = efx_mcdi_handle_assertion(efx);
         if (rc)
-               goto fail2;
+               goto fail1;
  
         /* Let the BMC know that the driver is now in charge of link and
          * filter settings. We must do this before we reset the NIC */
@@ -324,7 +310,6 @@ fail4:
  fail3:
         efx_mcdi_drv_attach(efx, false, NULL);
  fail2:
-       iounmap(nic_data->mcdi_smem);
  fail1:
         kfree(efx->nic_data);
         return rc;
@@ -404,8 +389,6 @@ static int siena_init_nic(struct efx_nic *efx)
  
  static void siena_remove_nic(struct efx_nic *efx)
  {
-       struct siena_nic_data *nic_data = efx->nic_data;
-
         efx_nic_free_buffer(efx, &efx->irq_status);
  
         siena_reset_hw(efx, RESET_TYPE_ALL);
@@ -415,8 +398,7 @@ static void siena_remove_nic(struct efx_nic *efx)
                 efx_mcdi_drv_attach(efx, false, NULL);
  
         /* Tear down the private nic state */
-       iounmap(nic_data->mcdi_smem);
-       kfree(nic_data);
+       kfree(efx->nic_data);
         efx->nic_data = NULL;
  }
  
@@ -656,7 +638,8 @@ const struct efx_nic_type siena_a0_nic_type = {
         .default_mac_ops = &efx_mcdi_mac_operations,
  
         .revision = EFX_REV_SIENA_A0,
-       .mem_map_size = FR_CZ_MC_TREG_SMEM, /* MC_TREG_SMEM mapped separately */
+       .mem_map_size = (FR_CZ_MC_TREG_SMEM +
+                        FR_CZ_MC_TREG_SMEM_STEP * FR_CZ_MC_TREG_SMEM_ROWS),
         .txd_ptr_tbl_base = FR_BZ_TX_DESC_PTR_TBL,
         .rxd_ptr_tbl_base = FR_BZ_RX_DESC_PTR_TBL,
         .buf_tbl_base = FR_BZ_BUF_FULL_TBL,
diff --git a/drivers/net/sfc/workarounds.h b/drivers/net/sfc/workarounds.h

index 99ff11400cef646458279500b3b230b804c4be8d..e4dd3a7f304b5b70e874530fda18d251cc4515b1 100644 (file)
--- a/drivers/net/sfc/workarounds.h
+++ b/drivers/net/sfc/workarounds.h
@@ -38,8 +38,6 @@
  #define EFX_WORKAROUND_15783 EFX_WORKAROUND_ALWAYS
  /* Legacy interrupt storm when interrupt fifo fills */
  #define EFX_WORKAROUND_17213 EFX_WORKAROUND_SIENA
-/* Write combining and sriov=enabled are incompatible */
-#define EFX_WORKAROUND_22643 EFX_WORKAROUND_SIENA
  
  /* Spurious parity errors in TSORT buffers */
  #define EFX_WORKAROUND_5129 EFX_WORKAROUND_FALCON_A
diff --git a/drivers/net/sh_eth.c b/drivers/net/sh_eth.c

index ad35c210b839b950726960144ac1da72a43833cc..1c1666e99106eb8ed09ab7e0f8290abd74cb8727 100644 (file)
--- a/drivers/net/sh_eth.c
+++ b/drivers/net/sh_eth.c
@@ -21,6 +21,7 @@
   */
  
  #include <linux/init.h>
+#include <linux/interrupt.h>
  #include <linux/dma-mapping.h>
  #include <linux/etherdevice.h>
  #include <linux/delay.h>
@@ -30,6 +31,7 @@
  #include <linux/phy.h>
  #include <linux/cache.h>
  #include <linux/io.h>
+#include <linux/interrupt.h>
  #include <linux/pm_runtime.h>
  #include <linux/slab.h>
  #include <linux/ethtool.h>
diff --git a/drivers/net/slip.c b/drivers/net/slip.c

index f11b3f3df24fa813a2a1b967be619057c12c0f0f..4c617534f9377a40da2755b77c54dbc7da2cf978 100644 (file)
--- a/drivers/net/slip.c
+++ b/drivers/net/slip.c
@@ -367,7 +367,7 @@ static void sl_bump(struct slip *sl)
         memcpy(skb_put(skb, count), sl->rbuff, count);
         skb_reset_mac_header(skb);
         skb->protocol = htons(ETH_P_IP);
-       netif_rx(skb);
+       netif_rx_ni(skb);
         dev->stats.rx_packets++;
  }
  
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c

index a03336e086d5ec11423a62b492304c2f00769fc3..f06fb78383a1b865b22c989661a1795f4d7f0368 100644 (file)
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -228,23 +228,40 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
         if (ctx->rx_max != le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize)) {
  
                 if (flags & USB_CDC_NCM_NCAP_NTB_INPUT_SIZE) {
-                       struct usb_cdc_ncm_ndp_input_size ndp_in_sz;
+                       struct usb_cdc_ncm_ndp_input_size *ndp_in_sz;
+
+                       ndp_in_sz = kzalloc(sizeof(*ndp_in_sz), GFP_KERNEL);
+                       if (!ndp_in_sz) {
+                               err = -ENOMEM;
+                               goto size_err;
+                       }
+
                         err = usb_control_msg(ctx->udev,
                                         usb_sndctrlpipe(ctx->udev, 0),
                                         USB_CDC_SET_NTB_INPUT_SIZE,
                                         USB_TYPE_CLASS | USB_DIR_OUT
                                          | USB_RECIP_INTERFACE,
-                                       0, iface_no, &ndp_in_sz, 8, 1000);
+                                       0, iface_no, ndp_in_sz, 8, 1000);
+                       kfree(ndp_in_sz);
                 } else {
-                       __le32 dwNtbInMaxSize = cpu_to_le32(ctx->rx_max);
+                       __le32 *dwNtbInMaxSize;
+                       dwNtbInMaxSize = kzalloc(sizeof(*dwNtbInMaxSize),
+                                       GFP_KERNEL);
+                       if (!dwNtbInMaxSize) {
+                               err = -ENOMEM;
+                               goto size_err;
+                       }
+                       *dwNtbInMaxSize = cpu_to_le32(ctx->rx_max);
+
                         err = usb_control_msg(ctx->udev,
                                         usb_sndctrlpipe(ctx->udev, 0),
                                         USB_CDC_SET_NTB_INPUT_SIZE,
                                         USB_TYPE_CLASS | USB_DIR_OUT
                                          | USB_RECIP_INTERFACE,
-                                       0, iface_no, &dwNtbInMaxSize, 4, 1000);
+                                       0, iface_no, dwNtbInMaxSize, 4, 1000);
+                       kfree(dwNtbInMaxSize);
                 }
-
+size_err:
                 if (err < 0)
                         pr_debug("Setting NTB Input Size failed\n");
         }
@@ -325,19 +342,29 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
  
         /* set Max Datagram Size (MTU) */
         if (flags & USB_CDC_NCM_NCAP_MAX_DATAGRAM_SIZE) {
-               __le16 max_datagram_size;
+               __le16 *max_datagram_size;
                 u16 eth_max_sz = le16_to_cpu(ctx->ether_desc->wMaxSegmentSize);
+
+               max_datagram_size = kzalloc(sizeof(*max_datagram_size),
+                               GFP_KERNEL);
+               if (!max_datagram_size) {
+                       err = -ENOMEM;
+                       goto max_dgram_err;
+               }
+
                 err = usb_control_msg(ctx->udev, usb_rcvctrlpipe(ctx->udev, 0),
                                 USB_CDC_GET_MAX_DATAGRAM_SIZE,
                                 USB_TYPE_CLASS | USB_DIR_IN
                                  | USB_RECIP_INTERFACE,
-                               0, iface_no, &max_datagram_size,
+                               0, iface_no, max_datagram_size,
                                 2, 1000);
                 if (err < 0) {
                         pr_debug("GET_MAX_DATAGRAM_SIZE failed, use size=%u\n",
                                                 CDC_NCM_MIN_DATAGRAM_SIZE);
+                       kfree(max_datagram_size);
                 } else {
-                       ctx->max_datagram_size = le16_to_cpu(max_datagram_size);
+                       ctx->max_datagram_size =
+                               le16_to_cpu(*max_datagram_size);
                         /* Check Eth descriptor value */
                         if (eth_max_sz < CDC_NCM_MAX_DATAGRAM_SIZE) {
                                 if (ctx->max_datagram_size > eth_max_sz)
@@ -360,8 +387,10 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx)
                                                 USB_TYPE_CLASS | USB_DIR_OUT
                                                  | USB_RECIP_INTERFACE,
                                                 0,
-                                               iface_no, &max_datagram_size,
+                                               iface_no, max_datagram_size,
                                                 2, 1000);
+                       kfree(max_datagram_size);
+max_dgram_err:
                         if (err < 0)
                                 pr_debug("SET_MAX_DATAGRAM_SIZE failed\n");
                 }
diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c

index 15772b1b6a91c71a847b45e9ecf8f854cba461dc..13c1f044b40d2a966215e340f65163edc8e6e771 100644 (file)
--- a/drivers/net/usb/ipheth.c
+++ b/drivers/net/usb/ipheth.c
@@ -59,6 +59,7 @@
  #define USB_PRODUCT_IPHONE_3G   0x1292
  #define USB_PRODUCT_IPHONE_3GS  0x1294
  #define USB_PRODUCT_IPHONE_4   0x1297
+#define USB_PRODUCT_IPHONE_4_VZW 0x129c
  
  #define IPHETH_USBINTF_CLASS    255
  #define IPHETH_USBINTF_SUBCLASS 253
@@ -98,6 +99,10 @@ static struct usb_device_id ipheth_table[] = {
                 USB_VENDOR_APPLE, USB_PRODUCT_IPHONE_4,
                 IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
                 IPHETH_USBINTF_PROTO) },
+       { USB_DEVICE_AND_INTERFACE_INFO(
+               USB_VENDOR_APPLE, USB_PRODUCT_IPHONE_4_VZW,
+               IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
+               IPHETH_USBINTF_PROTO) },
         { }
  };
  MODULE_DEVICE_TABLE(usb, ipheth_table);
diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c

index 041fb7d43c4f7d077c8175c5d395901a1d92061c..ef3b236b51457d49bbd59ddef6a4dbd648c24dd1 100644 (file)
--- a/drivers/net/usb/rtl8150.c
+++ b/drivers/net/usb/rtl8150.c
@@ -977,7 +977,6 @@ static void rtl8150_disconnect(struct usb_interface *intf)
         usb_set_intfdata(intf, NULL);
         if (dev) {
                 set_bit(RTL8150_UNPLUG, &dev->flags);
-               tasklet_disable(&dev->tl);
                 tasklet_kill(&dev->tl);
                 unregister_netdev(dev->netdev);
                 unlink_all_urbs(dev);
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c

index deb1eca13c9f4f8321facaced82e6740da5f5cb5..7c5336c5c37f09c91d2bacfdb0fbe92fdfbddd31 100644 (file)
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -515,10 +515,6 @@ static void velocity_init_cam_filter(struct velocity_info *vptr)
         mac_set_cam_mask(regs, vptr->mCAMmask);
  
         /* Enable VCAMs */
-
-       if (test_bit(0, vptr->active_vlans))
-               WORD_REG_BITS_ON(MCFG_RTGOPT, &regs->MCFG);
-
         for_each_set_bit(vid, vptr->active_vlans, VLAN_N_VID) {
                 mac_set_vlan_cam(regs, i, (u8 *) &vid);
                 vptr->vCAMmask[i / 8] |= 0x1 << (i % 8);
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c

index 1cbacb3896528e0cc5b54e99abd34b5a809fb714..0959583feb27ed2c7962e6d6e7a931c53f7d664a 100644 (file)
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1929,14 +1929,17 @@ static void
  vmxnet3_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
  {
         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
-       u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
-       unsigned long flags;
  
-       VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
-       spin_lock_irqsave(&adapter->cmd_lock, flags);
-       VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
-                              VMXNET3_CMD_UPDATE_VLAN_FILTERS);
-       spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+       if (!(netdev->flags & IFF_PROMISC)) {
+               u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
+               unsigned long flags;
+
+               VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
+               spin_lock_irqsave(&adapter->cmd_lock, flags);
+               VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
+                                      VMXNET3_CMD_UPDATE_VLAN_FILTERS);
+               spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+       }
  
         set_bit(vid, adapter->active_vlans);
  }
@@ -1946,14 +1949,17 @@ static void
  vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
  {
         struct vmxnet3_adapter *adapter = netdev_priv(netdev);
-       u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
-       unsigned long flags;
  
-       VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
-       spin_lock_irqsave(&adapter->cmd_lock, flags);
-       VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
-                              VMXNET3_CMD_UPDATE_VLAN_FILTERS);
-       spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+       if (!(netdev->flags & IFF_PROMISC)) {
+               u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
+               unsigned long flags;
+
+               VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
+               spin_lock_irqsave(&adapter->cmd_lock, flags);
+               VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
+                                      VMXNET3_CMD_UPDATE_VLAN_FILTERS);
+               spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+       }
  
         clear_bit(vid, adapter->active_vlans);
  }
diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c

index f54dff44ed50d1e2936db789cb0af5c5451ad46b..c3119a6caaceb2789feb6a1854703e7d54b6dbaa 100644 (file)
--- a/drivers/net/wireless/ath/ath5k/base.c
+++ b/drivers/net/wireless/ath/ath5k/base.c
@@ -1735,6 +1735,8 @@ ath5k_beacon_setup(struct ath5k_hw *ah, struct ath5k_buf *bf)
  
         if (dma_mapping_error(ah->dev, bf->skbaddr)) {
                 ATH5K_ERR(ah, "beacon DMA mapping failed\n");
+               dev_kfree_skb_any(skb);
+               bf->skb = NULL;
                 return -EIO;
         }
  
@@ -1819,8 +1821,6 @@ ath5k_beacon_update(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
         ath5k_txbuf_free_skb(ah, avf->bbuf);
         avf->bbuf->skb = skb;
         ret = ath5k_beacon_setup(ah, avf->bbuf);
-       if (ret)
-               avf->bbuf->skb = NULL;
  out:
         return ret;
  }
@@ -1840,6 +1840,7 @@ ath5k_beacon_send(struct ath5k_hw *ah)
         struct ath5k_vif *avf;
         struct ath5k_buf *bf;
         struct sk_buff *skb;
+       int err;
  
         ATH5K_DBG_UNLIMIT(ah, ATH5K_DEBUG_BEACON, "in beacon_send\n");
  
@@ -1888,11 +1889,6 @@ ath5k_beacon_send(struct ath5k_hw *ah)
  
         avf = (void *)vif->drv_priv;
         bf = avf->bbuf;
-       if (unlikely(bf->skb == NULL || ah->opmode == NL80211_IFTYPE_STATION ||
-                    ah->opmode == NL80211_IFTYPE_MONITOR)) {
-               ATH5K_WARN(ah, "bf=%p bf_skb=%p\n", bf, bf ? bf->skb : NULL);
-               return;
-       }
  
         /*
          * Stop any current dma and put the new frame on the queue.
@@ -1906,8 +1902,17 @@ ath5k_beacon_send(struct ath5k_hw *ah)
  
         /* refresh the beacon for AP or MESH mode */
         if (ah->opmode == NL80211_IFTYPE_AP ||
-           ah->opmode == NL80211_IFTYPE_MESH_POINT)
-               ath5k_beacon_update(ah->hw, vif);
+           ah->opmode == NL80211_IFTYPE_MESH_POINT) {
+               err = ath5k_beacon_update(ah->hw, vif);
+               if (err)
+                       return;
+       }
+
+       if (unlikely(bf->skb == NULL || ah->opmode == NL80211_IFTYPE_STATION ||
+                    ah->opmode == NL80211_IFTYPE_MONITOR)) {
+               ATH5K_WARN(ah, "bf=%p bf_skb=%p\n", bf, bf->skb);
+               return;
+       }
  
         trace_ath5k_tx(ah, bf->skb, &ah->txqs[ah->bhalq]);
  
diff --git a/drivers/net/wireless/ath/ath9k/ar9002_calib.c b/drivers/net/wireless/ath/ath9k/ar9002_calib.c

index 2d4c0910295bd39b08cb4abdc845c140d6bc22de..2d394af82171fe4031e1c1797fbb7db158879e8c 100644 (file)
--- a/drivers/net/wireless/ath/ath9k/ar9002_calib.c
+++ b/drivers/net/wireless/ath/ath9k/ar9002_calib.c
@@ -41,7 +41,8 @@ static bool ar9002_hw_is_cal_supported(struct ath_hw *ah,
         case ADC_DC_CAL:
                 /* Run ADC Gain Cal for non-CCK & non 2GHz-HT20 only */
                 if (!IS_CHAN_B(chan) &&
-                   !(IS_CHAN_2GHZ(chan) && IS_CHAN_HT20(chan)))
+                   !((IS_CHAN_2GHZ(chan) || IS_CHAN_A_FAST_CLOCK(ah, chan)) &&
+                     IS_CHAN_HT20(chan)))
                         supported = true;
                 break;
         }
diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c

index d109c25417f4930b525ac43da631c4fe17c2ebdb..1b9400371eaf0b1b8f746a62952f3c7f18291226 100644 (file)
--- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
+++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
@@ -69,7 +69,7 @@ static int ar9003_hw_power_interpolate(int32_t x,
  static const struct ar9300_eeprom ar9300_default = {
         .eepromVersion = 2,
         .templateVersion = 2,
-       .macAddr = {1, 2, 3, 4, 5, 6},
+       .macAddr = {0, 2, 3, 4, 5, 6},
         .custData = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                      0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
         .baseEepHeader = {
@@ -307,7 +307,7 @@ static const struct ar9300_eeprom ar9300_default = {
                  { { CTL(60, 0), CTL(60, 1), CTL(60, 0), CTL(60, 0) } },
                  { { CTL(60, 1), CTL(60, 0), CTL(60, 0), CTL(60, 1) } },
  
-                { { CTL(60, 1), CTL(60, 0), CTL(0, 0), CTL(0, 0) } },
+                { { CTL(60, 1), CTL(60, 0), CTL(60, 0), CTL(60, 0) } },
                  { { CTL(60, 0), CTL(60, 1), CTL(60, 0), CTL(60, 0) } },
                  { { CTL(60, 0), CTL(60, 1), CTL(60, 0), CTL(60, 0) } },
  
@@ -884,7 +884,7 @@ static const struct ar9300_eeprom ar9300_x113 = {
                  { { CTL(60, 0), CTL(60, 1), CTL(60, 0), CTL(60, 0) } },
                  { { CTL(60, 1), CTL(60, 0), CTL(60, 0), CTL(60, 1) } },
  
-                { { CTL(60, 1), CTL(60, 0), CTL(0, 0), CTL(0, 0) } },
+                { { CTL(60, 1), CTL(60, 0), CTL(60, 0), CTL(60, 0) } },
                  { { CTL(60, 0), CTL(60, 1), CTL(60, 0), CTL(60, 0) } },
                  { { CTL(60, 0), CTL(60, 1), CTL(60, 0), CTL(60, 0) } },
  
@@ -2040,7 +2040,7 @@ static const struct ar9300_eeprom ar9300_x112 = {
                 { { CTL(60, 0), CTL(60, 1), CTL(60, 0), CTL(60, 0) } },
                 { { CTL(60, 1), CTL(60, 0), CTL(60, 0), CTL(60, 1) } },
  
-               { { CTL(60, 1), CTL(60, 0), CTL(0, 0), CTL(0, 0) } },
+               { { CTL(60, 1), CTL(60, 0), CTL(60, 0), CTL(60, 0) } },
                 { { CTL(60, 0), CTL(60, 1), CTL(60, 0), CTL(60, 0) } },
                 { { CTL(60, 0), CTL(60, 1), CTL(60, 0), CTL(60, 0) } },
  
@@ -3734,7 +3734,7 @@ static void ar9003_hw_internal_regulator_apply(struct ath_hw *ah)
                                 }
                         } else {
                                 reg_pmu_set = (5 << 1) | (7 << 4) |
-                                             (1 << 8) | (2 << 14) |
+                                             (2 << 8) | (2 << 14) |
                                               (6 << 17) | (1 << 20) |
                                               (3 << 24) | (1 << 28);
                         }
diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.c b/drivers/net/wireless/ath/ath9k/ar9003_phy.c

index 1baca8e4715d9ad62e97c8ef8b9e0b4ec431f240..fcafec0605f41e94679c889e092fcc56b6eb9748 100644 (file)
--- a/drivers/net/wireless/ath/ath9k/ar9003_phy.c
+++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.c
@@ -671,7 +671,7 @@ static int ar9003_hw_process_ini(struct ath_hw *ah,
                 REG_WRITE_ARRAY(&ah->iniModesAdditional,
                                 modesIndex, regWrites);
  
-       if (AR_SREV_9300(ah))
+       if (AR_SREV_9330(ah))
                 REG_WRITE_ARRAY(&ah->iniModesAdditional, 1, regWrites);
  
         if (AR_SREV_9340(ah) && !ah->is_clk_25mhz)
diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.h b/drivers/net/wireless/ath/ath9k/ar9003_phy.h

index 6de3f0bc18e6f34ab9c69d171972237823d5ad84..5c590429f120c307688952206e997aef6832d066 100644 (file)
--- a/drivers/net/wireless/ath/ath9k/ar9003_phy.h
+++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.h
@@ -850,7 +850,7 @@
  #define AR_PHY_TPC_11_B1         (AR_SM1_BASE + 0x220)
  #define AR_PHY_PDADC_TAB_1       (AR_SM1_BASE + 0x240)
  #define AR_PHY_TX_IQCAL_STATUS_B1   (AR_SM1_BASE + 0x48c)
-#define AR_PHY_TX_IQCAL_CORR_COEFF_B1(_i)    (AR_SM_BASE + 0x450 + ((_i) << 2))
+#define AR_PHY_TX_IQCAL_CORR_COEFF_B1(_i)    (AR_SM1_BASE + 0x450 + ((_i) << 2))
  
  /*
   * Channel 2 Register Map
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c

index 9098aaad97a92707bdf62d2dcbdcc4dcb38a7a15..722967b86cf12642340a9eab31fe69e6425a43ad 100644 (file)
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -2283,7 +2283,11 @@ static void ath9k_set_coverage_class(struct ieee80211_hw *hw, u8 coverage_class)
  
         mutex_lock(&sc->mutex);
         ah->coverage_class = coverage_class;
+
+       ath9k_ps_wakeup(sc);
         ath9k_hw_init_global_settings(ah);
+       ath9k_ps_restore(sc);
+
         mutex_unlock(&sc->mutex);
  }
  
@@ -2299,6 +2303,12 @@ static void ath9k_flush(struct ieee80211_hw *hw, bool drop)
         mutex_lock(&sc->mutex);
         cancel_delayed_work_sync(&sc->tx_complete_work);
  
+       if (ah->ah_flags & AH_UNPLUGGED) {
+               ath_dbg(common, ATH_DBG_ANY, "Device has been unplugged!\n");
+               mutex_unlock(&sc->mutex);
+               return;
+       }
+
         if (sc->sc_flags & SC_OP_INVALID) {
                 ath_dbg(common, ATH_DBG_ANY, "Device not present\n");
                 mutex_unlock(&sc->mutex);
diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c

index 0122930b14c71f85ab0f9d779a87030373011ebb..0474e6638d218e60ca3cd2533818d6027e954433 100644 (file)
--- a/drivers/net/wireless/ath/carl9170/main.c
+++ b/drivers/net/wireless/ath/carl9170/main.c
@@ -1066,8 +1066,10 @@ static int carl9170_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
          * the high througput speed in 802.11n networks.
          */
  
-       if (!is_main_vif(ar, vif))
+       if (!is_main_vif(ar, vif)) {
+               mutex_lock(&ar->mutex);
                 goto err_softw;
+       }
  
         /*
          * While the hardware supports *catch-all* key, for offloading
diff --git a/drivers/net/wireless/b43/dma.c b/drivers/net/wireless/b43/dma.c

index 83cba22ac6e88d6c6bddeecc1d65815b62db5f3f..481e534534eb3a437a732509c80bba6db73ff2e1 100644 (file)
--- a/drivers/net/wireless/b43/dma.c
+++ b/drivers/net/wireless/b43/dma.c
@@ -795,9 +795,23 @@ static u64 supported_dma_mask(struct b43_wldev *dev)
         u32 tmp;
         u16 mmio_base;
  
-       tmp = b43_read32(dev, SSB_TMSHIGH);
-       if (tmp & SSB_TMSHIGH_DMA64)
-               return DMA_BIT_MASK(64);
+       switch (dev->dev->bus_type) {
+#ifdef CONFIG_B43_BCMA
+       case B43_BUS_BCMA:
+               tmp = bcma_aread32(dev->dev->bdev, BCMA_IOST);
+               if (tmp & BCMA_IOST_DMA64)
+                       return DMA_BIT_MASK(64);
+               break;
+#endif
+#ifdef CONFIG_B43_SSB
+       case B43_BUS_SSB:
+               tmp = ssb_read32(dev->dev->sdev, SSB_TMSHIGH);
+               if (tmp & SSB_TMSHIGH_DMA64)
+                       return DMA_BIT_MASK(64);
+               break;
+#endif
+       }
+
         mmio_base = b43_dmacontroller_base(0, 0);
         b43_write32(dev, mmio_base + B43_DMA32_TXCTL, B43_DMA32_TXADDREXT_MASK);
         tmp = b43_read32(dev, mmio_base + B43_DMA32_TXCTL);
diff --git a/drivers/net/wireless/iwlegacy/iwl-3945-rs.c b/drivers/net/wireless/iwlegacy/iwl-3945-rs.c

index 977bd2477c6a91dd5cdad45a5b4ae3a3bbc79c64..164bcae821f833e81630821e3df9d0d09ca71867 100644 (file)
--- a/drivers/net/wireless/iwlegacy/iwl-3945-rs.c
+++ b/drivers/net/wireless/iwlegacy/iwl-3945-rs.c
@@ -822,12 +822,15 @@ static void iwl3945_rs_get_rate(void *priv_r, struct ieee80211_sta *sta,
  
   out:
  
-       rs_sta->last_txrate_idx = index;
-       if (sband->band == IEEE80211_BAND_5GHZ)
-               info->control.rates[0].idx = rs_sta->last_txrate_idx -
-                               IWL_FIRST_OFDM_RATE;
-       else
+       if (sband->band == IEEE80211_BAND_5GHZ) {
+               if (WARN_ON_ONCE(index < IWL_FIRST_OFDM_RATE))
+                       index = IWL_FIRST_OFDM_RATE;
+               rs_sta->last_txrate_idx = index;
+               info->control.rates[0].idx = index - IWL_FIRST_OFDM_RATE;
+       } else {
+               rs_sta->last_txrate_idx = index;
                 info->control.rates[0].idx = rs_sta->last_txrate_idx;
+       }
  
         IWL_DEBUG_RATE(priv, "leave: %d\n", index);
  }
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-ucode.c b/drivers/net/wireless/iwlwifi/iwl-agn-ucode.c

index a895a099d086adcc99cef9629e2acbc1e1681188..56211006a182900cf56fc6a013893299a28795e5 100644 (file)
--- a/drivers/net/wireless/iwlwifi/iwl-agn-ucode.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-ucode.c
@@ -167,7 +167,7 @@ static int iwlagn_set_temperature_offset_calib(struct iwl_priv *priv)
  
         memset(&cmd, 0, sizeof(cmd));
         iwl_set_calib_hdr(&cmd.hdr, IWL_PHY_CALIBRATE_TEMP_OFFSET_CMD);
-       memcpy(&cmd.radio_sensor_offset, offset_calib, sizeof(offset_calib));
+       memcpy(&cmd.radio_sensor_offset, offset_calib, sizeof(*offset_calib));
         if (!(cmd.radio_sensor_offset))
                 cmd.radio_sensor_offset = DEFAULT_RADIO_SENSOR_OFFSET;
  
diff --git a/drivers/net/wireless/iwlwifi/iwl-pci.c b/drivers/net/wireless/iwlwifi/iwl-pci.c

index 69d4ec467dca70d8da0dbe0129677f42da2f2d30..2fdbffa079c1fdc4aa0529d5c2b0a885c8a18870 100644 (file)
--- a/drivers/net/wireless/iwlwifi/iwl-pci.c
+++ b/drivers/net/wireless/iwlwifi/iwl-pci.c
@@ -478,27 +478,22 @@ out_no_pci:
         return err;
  }
  
-static void iwl_pci_down(struct iwl_bus *bus)
-{
-       struct iwl_pci_bus *pci_bus = (struct iwl_pci_bus *) bus->bus_specific;
-
-       pci_disable_msi(pci_bus->pci_dev);
-       pci_iounmap(pci_bus->pci_dev, pci_bus->hw_base);
-       pci_release_regions(pci_bus->pci_dev);
-       pci_disable_device(pci_bus->pci_dev);
-       pci_set_drvdata(pci_bus->pci_dev, NULL);
-
-       kfree(bus);
-}
-
  static void __devexit iwl_pci_remove(struct pci_dev *pdev)
  {
         struct iwl_priv *priv = pci_get_drvdata(pdev);
-       void *bus_specific = priv->bus->bus_specific;
+       struct iwl_bus *bus = priv->bus;
+       struct iwl_pci_bus *pci_bus = IWL_BUS_GET_PCI_BUS(bus);
+       struct pci_dev *pci_dev = IWL_BUS_GET_PCI_DEV(bus);
  
         iwl_remove(priv);
  
-       iwl_pci_down(bus_specific);
+       pci_disable_msi(pci_dev);
+       pci_iounmap(pci_dev, pci_bus->hw_base);
+       pci_release_regions(pci_dev);
+       pci_disable_device(pci_dev);
+       pci_set_drvdata(pci_dev, NULL);
+
+       kfree(bus);
  }
  
  #ifdef CONFIG_PM
diff --git a/drivers/net/wireless/iwlwifi/iwl-trans-tx-pcie.c b/drivers/net/wireless/iwlwifi/iwl-trans-tx-pcie.c

index a6b2b1db0b1dd11051e25d24f852ce602ec4fde7..222d410c586e4e481a8c474471ed8702e41aac90 100644 (file)
--- a/drivers/net/wireless/iwlwifi/iwl-trans-tx-pcie.c
+++ b/drivers/net/wireless/iwlwifi/iwl-trans-tx-pcie.c
@@ -771,6 +771,8 @@ void iwl_tx_cmd_complete(struct iwl_priv *priv, struct iwl_rx_mem_buffer *rxb)
         cmd = txq->cmd[cmd_index];
         meta = &txq->meta[cmd_index];
  
+       txq->time_stamp = jiffies;
+
         iwlagn_unmap_tfd(priv, meta, &txq->tfds[index], DMA_BIDIRECTIONAL);
  
         /* Input error checking is done when commands are added to queue. */
diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c

index 507559361d87f7cd657586cd96c095c1484a3d49..dbf501ca317f51cac690b87a0edb7f8a84145353 100644 (file)
--- a/drivers/net/wireless/rt2x00/rt2800usb.c
+++ b/drivers/net/wireless/rt2x00/rt2800usb.c
@@ -464,6 +464,15 @@ static bool rt2800usb_txdone_entry_check(struct queue_entry *entry, u32 reg)
         int wcid, ack, pid;
         int tx_wcid, tx_ack, tx_pid;
  
+       if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags) ||
+           !test_bit(ENTRY_DATA_STATUS_PENDING, &entry->flags)) {
+               WARNING(entry->queue->rt2x00dev,
+                       "Data pending for entry %u in queue %u\n",
+                       entry->entry_idx, entry->queue->qid);
+               cond_resched();
+               return false;
+       }
+
         wcid    = rt2x00_get_field32(reg, TX_STA_FIFO_WCID);
         ack     = rt2x00_get_field32(reg, TX_STA_FIFO_TX_ACK_REQUIRED);
         pid     = rt2x00_get_field32(reg, TX_STA_FIFO_PID_TYPE);
@@ -529,12 +538,11 @@ static void rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
                         entry = rt2x00queue_get_entry(queue, Q_INDEX_DONE);
                         if (rt2800usb_txdone_entry_check(entry, reg))
                                 break;
+                       entry = NULL;
                 }
  
-               if (!entry || rt2x00queue_empty(queue))
-                       break;
-
-               rt2800_txdone_entry(entry, reg);
+               if (entry)
+                       rt2800_txdone_entry(entry, reg);
         }
  }
  
@@ -558,8 +566,10 @@ static void rt2800usb_work_txdone(struct work_struct *work)
                 while (!rt2x00queue_empty(queue)) {
                         entry = rt2x00queue_get_entry(queue, Q_INDEX_DONE);
  
-                       if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags))
+                       if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags) ||
+                           !test_bit(ENTRY_DATA_STATUS_PENDING, &entry->flags))
                                 break;
+
                         if (test_bit(ENTRY_DATA_IO_FAILED, &entry->flags))
                                 rt2x00lib_txdone_noinfo(entry, TXDONE_FAILURE);
                         else if (rt2x00queue_status_timeout(entry))
@@ -921,6 +931,8 @@ static struct usb_device_id rt2800usb_device_table[] = {
         { USB_DEVICE(0x07d1, 0x3c16) },
         /* Draytek */
         { USB_DEVICE(0x07fa, 0x7712) },
+       /* DVICO */
+       { USB_DEVICE(0x0fe9, 0xb307) },
         /* Edimax */
         { USB_DEVICE(0x7392, 0x7711) },
         { USB_DEVICE(0x7392, 0x7717) },
diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c

index b6b4542c2460e6539de067048270385fba345d85..1e31050dafc9e9b2ea2a59073137de053e48074b 100644 (file)
--- a/drivers/net/wireless/rt2x00/rt2x00usb.c
+++ b/drivers/net/wireless/rt2x00/rt2x00usb.c
@@ -262,23 +262,20 @@ static void rt2x00usb_interrupt_txdone(struct urb *urb)
         struct queue_entry *entry = (struct queue_entry *)urb->context;
         struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev;
  
-       if (!test_and_clear_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags))
+       if (!test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags))
                 return;
-
-       if (rt2x00dev->ops->lib->tx_dma_done)
-               rt2x00dev->ops->lib->tx_dma_done(entry);
-
-       /*
-        * Report the frame as DMA done
-        */
-       rt2x00lib_dmadone(entry);
-
         /*
          * Check if the frame was correctly uploaded
          */
         if (urb->status)
                 set_bit(ENTRY_DATA_IO_FAILED, &entry->flags);
+       /*
+        * Report the frame as DMA done
+        */
+       rt2x00lib_dmadone(entry);
  
+       if (rt2x00dev->ops->lib->tx_dma_done)
+               rt2x00dev->ops->lib->tx_dma_done(entry);
         /*
          * Schedule the delayed work for reading the TX status
          * from the device.
@@ -874,18 +871,8 @@ int rt2x00usb_suspend(struct usb_interface *usb_intf, pm_message_t state)
  {
         struct ieee80211_hw *hw = usb_get_intfdata(usb_intf);
         struct rt2x00_dev *rt2x00dev = hw->priv;
-       int retval;
-
-       retval = rt2x00lib_suspend(rt2x00dev, state);
-       if (retval)
-               return retval;
  
-       /*
-        * Decrease usbdev refcount.
-        */
-       usb_put_dev(interface_to_usbdev(usb_intf));
-
-       return 0;
+       return rt2x00lib_suspend(rt2x00dev, state);
  }
  EXPORT_SYMBOL_GPL(rt2x00usb_suspend);
  
@@ -894,8 +881,6 @@ int rt2x00usb_resume(struct usb_interface *usb_intf)
         struct ieee80211_hw *hw = usb_get_intfdata(usb_intf);
         struct rt2x00_dev *rt2x00dev = hw->priv;
  
-       usb_get_dev(interface_to_usbdev(usb_intf));
-
         return rt2x00lib_resume(rt2x00dev);
  }
  EXPORT_SYMBOL_GPL(rt2x00usb_resume);
diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c

index 6a93939f44e887984d3f9877a8753485b3d82980..0baeb894f093e11e81f13f3d55c4c1624f546536 100644 (file)
--- a/drivers/net/wireless/rt2x00/rt73usb.c
+++ b/drivers/net/wireless/rt2x00/rt73usb.c
@@ -2420,6 +2420,7 @@ static struct usb_device_id rt73usb_device_table[] = {
         /* Buffalo */
         { USB_DEVICE(0x0411, 0x00d8) },
         { USB_DEVICE(0x0411, 0x00d9) },
+       { USB_DEVICE(0x0411, 0x00e6) },
         { USB_DEVICE(0x0411, 0x00f4) },
         { USB_DEVICE(0x0411, 0x0116) },
         { USB_DEVICE(0x0411, 0x0119) },
diff --git a/drivers/net/wireless/rtlwifi/core.c b/drivers/net/wireless/rtlwifi/core.c

index 1bdc1aa305c0ea1c48e371da14a31cd9b95a86ed..04c4e9eb6ee6ed726357fb6bb3ac8da26c4aa7be 100644 (file)
--- a/drivers/net/wireless/rtlwifi/core.c
+++ b/drivers/net/wireless/rtlwifi/core.c
@@ -610,6 +610,11 @@ static void rtl_op_bss_info_changed(struct ieee80211_hw *hw,
  
                         mac->link_state = MAC80211_NOLINK;
                         memset(mac->bssid, 0, 6);
+
+                       /* reset sec info */
+                       rtl_cam_reset_sec_info(hw);
+
+                       rtl_cam_reset_all_entry(hw);
                         mac->vendor = PEER_UNKNOWN;
  
                         RT_TRACE(rtlpriv, COMP_MAC80211, DBG_DMESG,
@@ -1063,6 +1068,9 @@ static int rtl_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
                  *or clear all entry here.
                  */
                 rtl_cam_delete_one_entry(hw, mac_addr, key_idx);
+
+               rtl_cam_reset_sec_info(hw);
+
                 break;
         default:
                 RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c

index 942f7a3969a79ceb857904053a689b2421f90fe0..ef63c0df006ad313de8781656b2bca4a15942ee7 100644 (file)
--- a/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192cu/sw.c
@@ -281,6 +281,8 @@ static struct usb_device_id rtl8192c_usb_ids[] = {
         {RTL_USB_DEVICE(USB_VENDER_ID_REALTEK, 0x817d, rtl92cu_hal_cfg)},
         /* 8188CE-VAU USB minCard (b/g mode only) */
         {RTL_USB_DEVICE(USB_VENDER_ID_REALTEK, 0x817e, rtl92cu_hal_cfg)},
+       /* 8188RU in Alfa AWUS036NHR */
+       {RTL_USB_DEVICE(USB_VENDER_ID_REALTEK, 0x817f, rtl92cu_hal_cfg)},
         /* 8188 Combo for BC4 */
         {RTL_USB_DEVICE(USB_VENDER_ID_REALTEK, 0x8754, rtl92cu_hal_cfg)},
  
@@ -303,20 +305,23 @@ static struct usb_device_id rtl8192c_usb_ids[] = {
         {RTL_USB_DEVICE(0x0eb0, 0x9071, rtl92cu_hal_cfg)}, /*NO Brand - Etop*/
         /* HP - Lite-On ,8188CUS Slim Combo */
         {RTL_USB_DEVICE(0x103c, 0x1629, rtl92cu_hal_cfg)},
+       {RTL_USB_DEVICE(0x13d3, 0x3357, rtl92cu_hal_cfg)}, /* AzureWave */
         {RTL_USB_DEVICE(0x2001, 0x3308, rtl92cu_hal_cfg)}, /*D-Link - Alpha*/
         {RTL_USB_DEVICE(0x2019, 0xab2a, rtl92cu_hal_cfg)}, /*Planex - Abocom*/
         {RTL_USB_DEVICE(0x2019, 0xed17, rtl92cu_hal_cfg)}, /*PCI - Edimax*/
         {RTL_USB_DEVICE(0x20f4, 0x648b, rtl92cu_hal_cfg)}, /*TRENDnet - Cameo*/
         {RTL_USB_DEVICE(0x7392, 0x7811, rtl92cu_hal_cfg)}, /*Edimax - Edimax*/
-       {RTL_USB_DEVICE(0x3358, 0x13d3, rtl92cu_hal_cfg)}, /*Azwave 8188CE-VAU*/
+       {RTL_USB_DEVICE(0x13d3, 0x3358, rtl92cu_hal_cfg)}, /*Azwave 8188CE-VAU*/
         /* Russian customer -Azwave (8188CE-VAU  b/g mode only) */
-       {RTL_USB_DEVICE(0x3359, 0x13d3, rtl92cu_hal_cfg)},
+       {RTL_USB_DEVICE(0x13d3, 0x3359, rtl92cu_hal_cfg)},
+       {RTL_USB_DEVICE(0x4855, 0x0090, rtl92cu_hal_cfg)}, /* Feixun */
+       {RTL_USB_DEVICE(0x4855, 0x0091, rtl92cu_hal_cfg)}, /* NetweeN-Feixun */
+       {RTL_USB_DEVICE(0x9846, 0x9041, rtl92cu_hal_cfg)}, /* Netgear Cameo */
  
         /****** 8192CU ********/
         {RTL_USB_DEVICE(0x0586, 0x341f, rtl92cu_hal_cfg)}, /*Zyxel -Abocom*/
         {RTL_USB_DEVICE(0x07aa, 0x0056, rtl92cu_hal_cfg)}, /*ATKK-Gemtek*/
         {RTL_USB_DEVICE(0x07b8, 0x8178, rtl92cu_hal_cfg)}, /*Funai -Abocom*/
-       {RTL_USB_DEVICE(0x07b8, 0x8178, rtl92cu_hal_cfg)}, /*Abocom -Abocom*/
         {RTL_USB_DEVICE(0x2001, 0x3307, rtl92cu_hal_cfg)}, /*D-Link-Cameo*/
         {RTL_USB_DEVICE(0x2001, 0x3309, rtl92cu_hal_cfg)}, /*D-Link-Alpha*/
         {RTL_USB_DEVICE(0x2001, 0x330a, rtl92cu_hal_cfg)}, /*D-Link-Alpha*/
diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c b/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c

index 906e7aa55bc39f40eb0c3743c91a821084d5a934..3e52a5496224ad485c9c6f9c351a266e1d239d25 100644 (file)
--- a/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c
@@ -549,15 +549,16 @@ void rtl92cu_tx_fill_desc(struct ieee80211_hw *hw,
                                (tcb_desc->rts_use_shortpreamble ? 1 : 0)
                                : (tcb_desc->rts_use_shortgi ? 1 : 0)));
         if (mac->bw_40) {
-               if (tcb_desc->packet_bw) {
+               if (rate_flag & IEEE80211_TX_RC_DUP_DATA) {
                         SET_TX_DESC_DATA_BW(txdesc, 1);
                         SET_TX_DESC_DATA_SC(txdesc, 3);
+               } else if(rate_flag & IEEE80211_TX_RC_40_MHZ_WIDTH){
+                       SET_TX_DESC_DATA_BW(txdesc, 1);
+                       SET_TX_DESC_DATA_SC(txdesc, mac->cur_40_prime_sc);
                 } else {
                         SET_TX_DESC_DATA_BW(txdesc, 0);
-                               if (rate_flag & IEEE80211_TX_RC_DUP_DATA)
-                                       SET_TX_DESC_DATA_SC(txdesc,
-                                                         mac->cur_40_prime_sc);
-                       }
+                       SET_TX_DESC_DATA_SC(txdesc, 0);
+               }
         } else {
                 SET_TX_DESC_DATA_BW(txdesc, 0);
                 SET_TX_DESC_DATA_SC(txdesc, 0);
diff --git a/drivers/net/wireless/wl1251/acx.c b/drivers/net/wireless/wl1251/acx.c

index ef8370edace709f8c1fb3437403fe4447f7d7d12..ad87a1ac6462aab40befe12db72eef99fcc52f5b 100644 (file)
--- a/drivers/net/wireless/wl1251/acx.c
+++ b/drivers/net/wireless/wl1251/acx.c
@@ -140,8 +140,6 @@ int wl1251_acx_sleep_auth(struct wl1251 *wl, u8 sleep_auth)
         auth->sleep_auth = sleep_auth;
  
         ret = wl1251_cmd_configure(wl, ACX_SLEEP_AUTH, auth, sizeof(*auth));
-       if (ret < 0)
-               return ret;
  
  out:
         kfree(auth);
@@ -681,10 +679,8 @@ int wl1251_acx_cca_threshold(struct wl1251 *wl)
  
         ret = wl1251_cmd_configure(wl, ACX_CCA_THRESHOLD,
                                    detection, sizeof(*detection));
-       if (ret < 0) {
+       if (ret < 0)
                 wl1251_warning("failed to set cca threshold: %d", ret);
-               return ret;
-       }
  
  out:
         kfree(detection);
diff --git a/drivers/net/wireless/wl1251/cmd.c b/drivers/net/wireless/wl1251/cmd.c

index 81f164bc4888f2389d86764c4c66a64295e9f18d..d14d69d733a07fc9bd9e77c4e2192e42ea8f83c9 100644 (file)
--- a/drivers/net/wireless/wl1251/cmd.c
+++ b/drivers/net/wireless/wl1251/cmd.c
@@ -241,7 +241,7 @@ int wl1251_cmd_data_path(struct wl1251 *wl, u8 channel, bool enable)
         if (ret < 0) {
                 wl1251_error("tx %s cmd for channel %d failed",
                              enable ? "start" : "stop", channel);
-               return ret;
+               goto out;
         }
  
         wl1251_debug(DEBUG_BOOT, "tx %s cmd channel %d",
diff --git a/drivers/net/wireless/wl12xx/acx.c b/drivers/net/wireless/wl12xx/acx.c

index 7e33f1f4f3d47c7866baac5af76b656111139d2b..34f6ab53e51960b097f709b9546c0273aa9f47a5 100644 (file)
--- a/drivers/net/wireless/wl12xx/acx.c
+++ b/drivers/net/wireless/wl12xx/acx.c
@@ -77,8 +77,6 @@ int wl1271_acx_sleep_auth(struct wl1271 *wl, u8 sleep_auth)
         auth->sleep_auth = sleep_auth;
  
         ret = wl1271_cmd_configure(wl, ACX_SLEEP_AUTH, auth, sizeof(*auth));
-       if (ret < 0)
-               return ret;
  
  out:
         kfree(auth);
@@ -624,10 +622,8 @@ int wl1271_acx_cca_threshold(struct wl1271 *wl)
  
         ret = wl1271_cmd_configure(wl, ACX_CCA_THRESHOLD,
                                    detection, sizeof(*detection));
-       if (ret < 0) {
+       if (ret < 0)
                 wl1271_warning("failed to set cca threshold: %d", ret);
-               return ret;
-       }
  
  out:
         kfree(detection);
diff --git a/drivers/net/wireless/wl12xx/main.c b/drivers/net/wireless/wl12xx/main.c

index e58c22d21e39af9d8683ff94e1ccee929258902c..b70ae40ad660148418d9914aa0c88bcc71554bae 100644 (file)
--- a/drivers/net/wireless/wl12xx/main.c
+++ b/drivers/net/wireless/wl12xx/main.c
@@ -4283,6 +4283,7 @@ int wl1271_init_ieee80211(struct wl1271 *wl)
         wl->hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) |
                 BIT(NL80211_IFTYPE_ADHOC) | BIT(NL80211_IFTYPE_AP);
         wl->hw->wiphy->max_scan_ssids = 1;
+       wl->hw->wiphy->max_sched_scan_ssids = 1;
         /*
          * Maximum length of elements in scanning probe request templates
          * should be the maximum length possible for a template, without
diff --git a/drivers/net/wireless/wl12xx/sdio.c b/drivers/net/wireless/wl12xx/sdio.c

index 5cf18c2c23f09ee3fdd35a3aceafc322f3770580..fb1fd5af75ea562c47120067d88b8f9540e7b805 100644 (file)
--- a/drivers/net/wireless/wl12xx/sdio.c
+++ b/drivers/net/wireless/wl12xx/sdio.c
@@ -164,7 +164,7 @@ static int wl1271_sdio_power_on(struct wl1271 *wl)
         /* If enabled, tell runtime PM not to power off the card */
         if (pm_runtime_enabled(&func->dev)) {
                 ret = pm_runtime_get_sync(&func->dev);
-               if (ret)
+               if (ret < 0)
                         goto out;
         } else {
                 /* Runtime PM is disabled: power up the card manually */
diff --git a/drivers/net/wireless/wl12xx/testmode.c b/drivers/net/wireless/wl12xx/testmode.c

index 5d5e1ef87206a4e1722510084ba4cab2783cf0bd..4ae8effaee22828acf9565d47d9ad599f4ac9c00 100644 (file)
--- a/drivers/net/wireless/wl12xx/testmode.c
+++ b/drivers/net/wireless/wl12xx/testmode.c
@@ -36,7 +36,6 @@ enum wl1271_tm_commands {
         WL1271_TM_CMD_TEST,
         WL1271_TM_CMD_INTERROGATE,
         WL1271_TM_CMD_CONFIGURE,
-       WL1271_TM_CMD_NVS_PUSH,
         WL1271_TM_CMD_SET_PLT_MODE,
         WL1271_TM_CMD_RECOVER,
  
@@ -139,12 +138,15 @@ static int wl1271_tm_cmd_interrogate(struct wl1271 *wl, struct nlattr *tb[])
  
         if (ret < 0) {
                 wl1271_warning("testmode cmd interrogate failed: %d", ret);
+               kfree(cmd);
                 return ret;
         }
  
         skb = cfg80211_testmode_alloc_reply_skb(wl->hw->wiphy, sizeof(*cmd));
-       if (!skb)
+       if (!skb) {
+               kfree(cmd);
                 return -ENOMEM;
+       }
  
         NLA_PUT(skb, WL1271_TM_ATTR_DATA, sizeof(*cmd), cmd);
  
@@ -187,48 +189,6 @@ static int wl1271_tm_cmd_configure(struct wl1271 *wl, struct nlattr *tb[])
         return 0;
  }
  
-static int wl1271_tm_cmd_nvs_push(struct wl1271 *wl, struct nlattr *tb[])
-{
-       int ret = 0;
-       size_t len;
-       void *buf;
-
-       wl1271_debug(DEBUG_TESTMODE, "testmode cmd nvs push");
-
-       if (!tb[WL1271_TM_ATTR_DATA])
-               return -EINVAL;
-
-       buf = nla_data(tb[WL1271_TM_ATTR_DATA]);
-       len = nla_len(tb[WL1271_TM_ATTR_DATA]);
-
-       mutex_lock(&wl->mutex);
-
-       kfree(wl->nvs);
-
-       if ((wl->chip.id == CHIP_ID_1283_PG20) &&
-           (len != sizeof(struct wl128x_nvs_file)))
-               return -EINVAL;
-       else if (len != sizeof(struct wl1271_nvs_file))
-               return -EINVAL;
-
-       wl->nvs = kzalloc(len, GFP_KERNEL);
-       if (!wl->nvs) {
-               wl1271_error("could not allocate memory for the nvs file");
-               ret = -ENOMEM;
-               goto out;
-       }
-
-       memcpy(wl->nvs, buf, len);
-       wl->nvs_len = len;
-
-       wl1271_debug(DEBUG_TESTMODE, "testmode pushed nvs");
-
-out:
-       mutex_unlock(&wl->mutex);
-
-       return ret;
-}
-
  static int wl1271_tm_cmd_set_plt_mode(struct wl1271 *wl, struct nlattr *tb[])
  {
         u32 val;
@@ -285,8 +245,6 @@ int wl1271_tm_cmd(struct ieee80211_hw *hw, void *data, int len)
                 return wl1271_tm_cmd_interrogate(wl, tb);
         case WL1271_TM_CMD_CONFIGURE:
                 return wl1271_tm_cmd_configure(wl, tb);
-       case WL1271_TM_CMD_NVS_PUSH:
-               return wl1271_tm_cmd_nvs_push(wl, tb);
         case WL1271_TM_CMD_SET_PLT_MODE:
                 return wl1271_tm_cmd_set_plt_mode(wl, tb);
         case WL1271_TM_CMD_RECOVER:
diff --git a/drivers/pci/hotplug/pcihp_slot.c b/drivers/pci/hotplug/pcihp_slot.c

index 749fdf0703199cef927004c6fa8b803d28ae1368..3ffd9c1acc0a33e0146f992fccc695444799b726 100644 (file)
--- a/drivers/pci/hotplug/pcihp_slot.c
+++ b/drivers/pci/hotplug/pcihp_slot.c
@@ -158,47 +158,6 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp)
          */
  }
  
-/* Program PCIE MaxPayload setting on device: ensure parent maxpayload <= device */
-static int pci_set_payload(struct pci_dev *dev)
-{
-       int pos, ppos;
-       u16 pctl, psz;
-       u16 dctl, dsz, dcap, dmax;
-       struct pci_dev *parent;
-
-       parent = dev->bus->self;
-       pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
-       if (!pos)
-               return 0;
-
-       /* Read Device MaxPayload capability and setting */
-       pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &dctl);
-       pci_read_config_word(dev, pos + PCI_EXP_DEVCAP, &dcap);
-       dsz = (dctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5;
-       dmax = (dcap & PCI_EXP_DEVCAP_PAYLOAD);
-
-       /* Read Parent MaxPayload setting */
-       ppos = pci_find_capability(parent, PCI_CAP_ID_EXP);
-       if (!ppos)
-               return 0;
-       pci_read_config_word(parent, ppos + PCI_EXP_DEVCTL, &pctl);
-       psz = (pctl &  PCI_EXP_DEVCTL_PAYLOAD) >> 5;
-
-       /* If parent payload > device max payload -> error
-        * If parent payload > device payload -> set speed
-        * If parent payload <= device payload -> do nothing
-        */
-       if (psz > dmax)
-               return -1;
-       else if (psz > dsz) {
-               dev_info(&dev->dev, "Setting MaxPayload to %d\n", 128 << psz);
-               pci_write_config_word(dev, pos + PCI_EXP_DEVCTL,
-                                     (dctl & ~PCI_EXP_DEVCTL_PAYLOAD) +
-                                     (psz << 5));
-       }
-       return 0;
-}
-
  void pci_configure_slot(struct pci_dev *dev)
  {
         struct pci_dev *cdev;
@@ -210,9 +169,9 @@ void pci_configure_slot(struct pci_dev *dev)
                         (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI)))
                 return;
  
-       ret = pci_set_payload(dev);
-       if (ret)
-               dev_warn(&dev->dev, "could not set device max payload\n");
+       if (dev->bus && dev->bus->self)
+               pcie_bus_configure_settings(dev->bus,
+                                           dev->bus->self->pcie_mpss);
  
         memset(&hpp, 0, sizeof(hpp));
         ret = pci_get_hp_params(dev, &hpp);
diff --git a/drivers/pci/of.c b/drivers/pci/of.c

index c94d37ec55c87af38e7b6d1c9ccf46ea9baeb60c..f0929934bb7ac0c8a5d0dca4dbb197fb4700b6d3 100644 (file)
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -55,7 +55,7 @@ struct device_node * __weak pcibios_get_phb_of_node(struct pci_bus *bus)
          */
         if (bus->bridge->of_node)
                 return of_node_get(bus->bridge->of_node);
-       if (bus->bridge->parent->of_node)
+       if (bus->bridge->parent && bus->bridge->parent->of_node)
                 return of_node_get(bus->bridge->parent->of_node);
         return NULL;
  }
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c

index 08a95b369d850c54292c66e45f67dfd197c860ef..4e84fd4a4312c2d4660bff0992aa21b7fe67e5a1 100644 (file)
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -77,6 +77,8 @@ unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE;
  unsigned long pci_hotplug_io_size  = DEFAULT_HOTPLUG_IO_SIZE;
  unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE;
  
+enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_SAFE;
+
  /*
   * The default CLS is used if arch didn't set CLS explicitly and not
   * all pci devices agree on the same value.  Arch can override either
@@ -3222,6 +3224,67 @@ out:
  }
  EXPORT_SYMBOL(pcie_set_readrq);
  
+/**
+ * pcie_get_mps - get PCI Express maximum payload size
+ * @dev: PCI device to query
+ *
+ * Returns maximum payload size in bytes
+ *    or appropriate error value.
+ */
+int pcie_get_mps(struct pci_dev *dev)
+{
+       int ret, cap;
+       u16 ctl;
+
+       cap = pci_pcie_cap(dev);
+       if (!cap)
+               return -EINVAL;
+
+       ret = pci_read_config_word(dev, cap + PCI_EXP_DEVCTL, &ctl);
+       if (!ret)
+               ret = 128 << ((ctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5);
+
+       return ret;
+}
+
+/**
+ * pcie_set_mps - set PCI Express maximum payload size
+ * @dev: PCI device to query
+ * @mps: maximum payload size in bytes
+ *    valid values are 128, 256, 512, 1024, 2048, 4096
+ *
+ * If possible sets maximum payload size
+ */
+int pcie_set_mps(struct pci_dev *dev, int mps)
+{
+       int cap, err = -EINVAL;
+       u16 ctl, v;
+
+       if (mps < 128 || mps > 4096 || !is_power_of_2(mps))
+               goto out;
+
+       v = ffs(mps) - 8;
+       if (v > dev->pcie_mpss) 
+               goto out;
+       v <<= 5;
+
+       cap = pci_pcie_cap(dev);
+       if (!cap)
+               goto out;
+
+       err = pci_read_config_word(dev, cap + PCI_EXP_DEVCTL, &ctl);
+       if (err)
+               goto out;
+
+       if ((ctl & PCI_EXP_DEVCTL_PAYLOAD) != v) {
+               ctl &= ~PCI_EXP_DEVCTL_PAYLOAD;
+               ctl |= v;
+               err = pci_write_config_word(dev, cap + PCI_EXP_DEVCTL, ctl);
+       }
+out:
+       return err;
+}
+
  /**
   * pci_select_bars - Make BAR mask from the type of resource
   * @dev: the PCI device for which BAR mask is made
@@ -3505,6 +3568,10 @@ static int __init pci_setup(char *str)
                                 pci_hotplug_io_size = memparse(str + 9, &str);
                         } else if (!strncmp(str, "hpmemsize=", 10)) {
                                 pci_hotplug_mem_size = memparse(str + 10, &str);
+                       } else if (!strncmp(str, "pcie_bus_safe", 13)) {
+                               pcie_bus_config = PCIE_BUS_SAFE;
+                       } else if (!strncmp(str, "pcie_bus_perf", 13)) {
+                               pcie_bus_config = PCIE_BUS_PERFORMANCE;
                         } else {
                                 printk(KERN_ERR "PCI: Unknown option `%s'\n",
                                                 str);
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h

index c8cee764b0de596ab8e1656a104203d5337e5f62..b74084e9ca12fa52e14f06e3b71d38aca4708b7d 100644 (file)
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -283,6 +283,8 @@ static inline int pci_iov_bus_range(struct pci_bus *bus)
  
  #endif /* CONFIG_PCI_IOV */
  
+extern unsigned long pci_cardbus_resource_alignment(struct resource *);
+
  static inline resource_size_t pci_resource_alignment(struct pci_dev *dev,
                                          struct resource *res)
  {
@@ -292,6 +294,8 @@ static inline resource_size_t pci_resource_alignment(struct pci_dev *dev,
         if (resno >= PCI_IOV_RESOURCES && resno <= PCI_IOV_RESOURCE_END)
                 return pci_sriov_resource_alignment(dev, resno);
  #endif
+       if (dev->class >> 8  == PCI_CLASS_BRIDGE_CARDBUS)
+               return pci_cardbus_resource_alignment(res);
         return resource_alignment(res);
  }
  
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c

index 795c9026d55fccfa512d9c2393973947af05dab9..f3f94a5c068feede9a30881a56b322b051dcb6b5 100644 (file)
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -856,6 +856,8 @@ void set_pcie_port_type(struct pci_dev *pdev)
         pdev->pcie_cap = pos;
         pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
         pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
+       pci_read_config_word(pdev, pos + PCI_EXP_DEVCAP, &reg16);
+       pdev->pcie_mpss = reg16 & PCI_EXP_DEVCAP_PAYLOAD;
  }
  
  void set_pcie_hotplug_bridge(struct pci_dev *pdev)
@@ -1326,6 +1328,151 @@ int pci_scan_slot(struct pci_bus *bus, int devfn)
         return nr;
  }
  
+static int pcie_find_smpss(struct pci_dev *dev, void *data)
+{
+       u8 *smpss = data;
+
+       if (!pci_is_pcie(dev))
+               return 0;
+
+       /* For PCIE hotplug enabled slots not connected directly to a
+        * PCI-E root port, there can be problems when hotplugging
+        * devices.  This is due to the possibility of hotplugging a
+        * device into the fabric with a smaller MPS that the devices
+        * currently running have configured.  Modifying the MPS on the
+        * running devices could cause a fatal bus error due to an
+        * incoming frame being larger than the newly configured MPS.
+        * To work around this, the MPS for the entire fabric must be
+        * set to the minimum size.  Any devices hotplugged into this
+        * fabric will have the minimum MPS set.  If the PCI hotplug
+        * slot is directly connected to the root port and there are not
+        * other devices on the fabric (which seems to be the most
+        * common case), then this is not an issue and MPS discovery
+        * will occur as normal.
+        */
+       if (dev->is_hotplug_bridge && (!list_is_singular(&dev->bus->devices) ||
+            (dev->bus->self &&
+             dev->bus->self->pcie_type != PCI_EXP_TYPE_ROOT_PORT)))
+               *smpss = 0;
+
+       if (*smpss > dev->pcie_mpss)
+               *smpss = dev->pcie_mpss;
+
+       return 0;
+}
+
+static void pcie_write_mps(struct pci_dev *dev, int mps)
+{
+       int rc, dev_mpss;
+
+       dev_mpss = 128 << dev->pcie_mpss;
+
+       if (pcie_bus_config == PCIE_BUS_PERFORMANCE) {
+               if (dev->bus->self) {
+                       dev_dbg(&dev->bus->dev, "Bus MPSS %d\n",
+                               128 << dev->bus->self->pcie_mpss);
+
+                       /* For "MPS Force Max", the assumption is made that
+                        * downstream communication will never be larger than
+                        * the MRRS.  So, the MPS only needs to be configured
+                        * for the upstream communication.  This being the case,
+                        * walk from the top down and set the MPS of the child
+                        * to that of the parent bus.
+                        */
+                       mps = 128 << dev->bus->self->pcie_mpss;
+                       if (mps > dev_mpss)
+                               dev_warn(&dev->dev, "MPS configured higher than"
+                                        " maximum supported by the device.  If"
+                                        " a bus issue occurs, try running with"
+                                        " pci=pcie_bus_safe.\n");
+               }
+
+               dev->pcie_mpss = ffs(mps) - 8;
+       }
+
+       rc = pcie_set_mps(dev, mps);
+       if (rc)
+               dev_err(&dev->dev, "Failed attempting to set the MPS\n");
+}
+
+static void pcie_write_mrrs(struct pci_dev *dev, int mps)
+{
+       int rc, mrrs, dev_mpss;
+
+       /* In the "safe" case, do not configure the MRRS.  There appear to be
+        * issues with setting MRRS to 0 on a number of devices.
+        */
+
+       if (pcie_bus_config != PCIE_BUS_PERFORMANCE)
+               return;
+
+       dev_mpss = 128 << dev->pcie_mpss;
+
+       /* For Max performance, the MRRS must be set to the largest supported
+        * value.  However, it cannot be configured larger than the MPS the
+        * device or the bus can support.  This assumes that the largest MRRS
+        * available on the device cannot be smaller than the device MPSS.
+        */
+       mrrs = min(mps, dev_mpss);
+
+       /* MRRS is a R/W register.  Invalid values can be written, but a
+        * subsequent read will verify if the value is acceptable or not.
+        * If the MRRS value provided is not acceptable (e.g., too large),
+        * shrink the value until it is acceptable to the HW.
+        */
+       while (mrrs != pcie_get_readrq(dev) && mrrs >= 128) {
+               dev_warn(&dev->dev, "Attempting to modify the PCI-E MRRS value"
+                        " to %d.  If any issues are encountered, please try "
+                        "running with pci=pcie_bus_safe\n", mrrs);
+               rc = pcie_set_readrq(dev, mrrs);
+               if (rc)
+                       dev_err(&dev->dev,
+                               "Failed attempting to set the MRRS\n");
+
+               mrrs /= 2;
+       }
+}
+
+static int pcie_bus_configure_set(struct pci_dev *dev, void *data)
+{
+       int mps = 128 << *(u8 *)data;
+
+       if (!pci_is_pcie(dev))
+               return 0;
+
+       dev_dbg(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n",
+                pcie_get_mps(dev), 128<<dev->pcie_mpss, pcie_get_readrq(dev));
+
+       pcie_write_mps(dev, mps);
+       pcie_write_mrrs(dev, mps);
+
+       dev_dbg(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n",
+                pcie_get_mps(dev), 128<<dev->pcie_mpss, pcie_get_readrq(dev));
+
+       return 0;
+}
+
+/* pcie_bus_configure_mps requires that pci_walk_bus work in a top-down,
+ * parents then children fashion.  If this changes, then this code will not
+ * work as designed.
+ */
+void pcie_bus_configure_settings(struct pci_bus *bus, u8 mpss)
+{
+       u8 smpss = mpss;
+
+       if (!pci_is_pcie(bus->self))
+               return;
+
+       if (pcie_bus_config == PCIE_BUS_SAFE) {
+               pcie_find_smpss(bus->self, &smpss);
+               pci_walk_bus(bus, pcie_find_smpss, &smpss);
+       }
+
+       pcie_bus_configure_set(bus->self, &smpss);
+       pci_walk_bus(bus, pcie_bus_configure_set, &smpss);
+}
+EXPORT_SYMBOL_GPL(pcie_bus_configure_settings);
+
  unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus)
  {
         unsigned int devfn, pass, max = bus->secondary;
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c

index 8a1d3c7863a89d5514f18357e8a5b6726452335e..784da9d3602985606d850d9d07f51b65f21e3f05 100644 (file)
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -34,6 +34,7 @@ struct resource_list_x {
         resource_size_t start;
         resource_size_t end;
         resource_size_t add_size;
+       resource_size_t min_align;
         unsigned long flags;
  };
  
@@ -65,7 +66,7 @@ void pci_realloc(void)
   */
  static void add_to_list(struct resource_list_x *head,
                  struct pci_dev *dev, struct resource *res,
-                resource_size_t add_size)
+                resource_size_t add_size, resource_size_t min_align)
  {
         struct resource_list_x *list = head;
         struct resource_list_x *ln = list->next;
@@ -84,13 +85,16 @@ static void add_to_list(struct resource_list_x *head,
         tmp->end = res->end;
         tmp->flags = res->flags;
         tmp->add_size = add_size;
+       tmp->min_align = min_align;
         list->next = tmp;
  }
  
  static void add_to_failed_list(struct resource_list_x *head,
                                 struct pci_dev *dev, struct resource *res)
  {
-       add_to_list(head, dev, res, 0);
+       add_to_list(head, dev, res,
+                       0 /* dont care */,
+                       0 /* dont care */);
  }
  
  static void __dev_sort_resources(struct pci_dev *dev,
@@ -121,18 +125,18 @@ static inline void reset_resource(struct resource *res)
  }
  
  /**
- * adjust_resources_sorted() - satisfy any additional resource requests
+ * reassign_resources_sorted() - satisfy any additional resource requests
   *
- * @add_head : head of the list tracking requests requiring additional
+ * @realloc_head : head of the list tracking requests requiring additional
   *             resources
   * @head     : head of the list tracking requests with allocated
   *             resources
   *
- * Walk through each element of the add_head and try to procure
+ * Walk through each element of the realloc_head and try to procure
   * additional resources for the element, provided the element
   * is in the head list.
   */
-static void adjust_resources_sorted(struct resource_list_x *add_head,
+static void reassign_resources_sorted(struct resource_list_x *realloc_head,
                 struct resource_list *head)
  {
         struct resource *res;
@@ -141,8 +145,8 @@ static void adjust_resources_sorted(struct resource_list_x *add_head,
         resource_size_t add_size;
         int idx;
  
-       prev = add_head;
-       for (list = add_head->next; list;) {
+       prev = realloc_head;
+       for (list = realloc_head->next; list;) {
                 res = list->res;
                 /* skip resource that has been reset */
                 if (!res->flags)
@@ -159,13 +163,17 @@ static void adjust_resources_sorted(struct resource_list_x *add_head,
  
                 idx = res - &list->dev->resource[0];
                 add_size=list->add_size;
-               if (!resource_size(res) && add_size) {
-                        res->end = res->start + add_size - 1;
-                        if(pci_assign_resource(list->dev, idx))
+               if (!resource_size(res)) {
+                       res->start = list->start;
+                       res->end = res->start + add_size - 1;
+                       if(pci_assign_resource(list->dev, idx))
                                 reset_resource(res);
-               } else if (add_size) {
-                       adjust_resource(res, res->start,
-                               resource_size(res) + add_size);
+               } else {
+                       resource_size_t align = list->min_align;
+                       res->flags |= list->flags & (IORESOURCE_STARTALIGN|IORESOURCE_SIZEALIGN);
+                       if (pci_reassign_resource(list->dev, idx, add_size, align))
+                               dev_printk(KERN_DEBUG, &list->dev->dev, "failed to add optional resources res=%pR\n",
+                                                       res);
                 }
  out:
                 tmp = list;
@@ -210,16 +218,16 @@ static void assign_requested_resources_sorted(struct resource_list *head,
  }
  
  static void __assign_resources_sorted(struct resource_list *head,
-                                struct resource_list_x *add_head,
+                                struct resource_list_x *realloc_head,
                                  struct resource_list_x *fail_head)
  {
         /* Satisfy the must-have resource requests */
         assign_requested_resources_sorted(head, fail_head);
  
-       /* Try to satisfy any additional nice-to-have resource
+       /* Try to satisfy any additional optional resource
                 requests */
-       if (add_head)
-               adjust_resources_sorted(add_head, head);
+       if (realloc_head)
+               reassign_resources_sorted(realloc_head, head);
         free_list(resource_list, head);
  }
  
@@ -235,7 +243,7 @@ static void pdev_assign_resources_sorted(struct pci_dev *dev,
  }
  
  static void pbus_assign_resources_sorted(const struct pci_bus *bus,
-                                        struct resource_list_x *add_head,
+                                        struct resource_list_x *realloc_head,
                                          struct resource_list_x *fail_head)
  {
         struct pci_dev *dev;
@@ -245,7 +253,7 @@ static void pbus_assign_resources_sorted(const struct pci_bus *bus,
         list_for_each_entry(dev, &bus->devices, bus_list)
                 __dev_sort_resources(dev, &head);
  
-       __assign_resources_sorted(&head, add_head, fail_head);
+       __assign_resources_sorted(&head, realloc_head, fail_head);
  }
  
  void pci_setup_cardbus(struct pci_bus *bus)
@@ -540,13 +548,27 @@ static resource_size_t calculate_memsize(resource_size_t size,
         return size;
  }
  
+static resource_size_t get_res_add_size(struct resource_list_x *realloc_head,
+                                       struct resource *res)
+{
+       struct resource_list_x *list;
+
+       /* check if it is in realloc_head list */
+       for (list = realloc_head->next; list && list->res != res;
+                       list = list->next);
+       if (list)
+               return list->add_size;
+
+       return 0;
+}
+
  /**
   * pbus_size_io() - size the io window of a given bus
   *
   * @bus : the bus
   * @min_size : the minimum io window that must to be allocated
   * @add_size : additional optional io window
- * @add_head : track the additional io window on this list
+ * @realloc_head : track the additional io window on this list
   *
   * Sizing the IO windows of the PCI-PCI bridge is trivial,
   * since these windows have 4K granularity and the IO ranges
@@ -554,11 +576,12 @@ static resource_size_t calculate_memsize(resource_size_t size,
   * We must be careful with the ISA aliasing though.
   */
  static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
-               resource_size_t add_size, struct resource_list_x *add_head)
+               resource_size_t add_size, struct resource_list_x *realloc_head)
  {
         struct pci_dev *dev;
         struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO);
         unsigned long size = 0, size0 = 0, size1 = 0;
+       resource_size_t children_add_size = 0;
  
         if (!b_res)
                 return;
@@ -579,11 +602,16 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
                                 size += r_size;
                         else
                                 size1 += r_size;
+
+                       if (realloc_head)
+                               children_add_size += get_res_add_size(realloc_head, r);
                 }
         }
         size0 = calculate_iosize(size, min_size, size1,
                         resource_size(b_res), 4096);
-       size1 = (!add_head || (add_head && !add_size)) ? size0 :
+       if (children_add_size > add_size)
+               add_size = children_add_size;
+       size1 = (!realloc_head || (realloc_head && !add_size)) ? size0 :
                 calculate_iosize(size, min_size+add_size, size1,
                         resource_size(b_res), 4096);
         if (!size0 && !size1) {
@@ -598,8 +626,8 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
         b_res->start = 4096;
         b_res->end = b_res->start + size0 - 1;
         b_res->flags |= IORESOURCE_STARTALIGN;
-       if (size1 > size0 && add_head)
-               add_to_list(add_head, bus->self, b_res, size1-size0);
+       if (size1 > size0 && realloc_head)
+               add_to_list(realloc_head, bus->self, b_res, size1-size0, 4096);
  }
  
  /**
@@ -608,7 +636,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
   * @bus : the bus
   * @min_size : the minimum memory window that must to be allocated
   * @add_size : additional optional memory window
- * @add_head : track the additional memory window on this list
+ * @realloc_head : track the additional memory window on this list
   *
   * Calculate the size of the bus and minimal alignment which
   * guarantees that all child resources fit in this size.
@@ -616,7 +644,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
  static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
                          unsigned long type, resource_size_t min_size,
                         resource_size_t add_size,
-                       struct resource_list_x *add_head)
+                       struct resource_list_x *realloc_head)
  {
         struct pci_dev *dev;
         resource_size_t min_align, align, size, size0, size1;
@@ -624,6 +652,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
         int order, max_order;
         struct resource *b_res = find_free_bus_resource(bus, type);
         unsigned int mem64_mask = 0;
+       resource_size_t children_add_size = 0;
  
         if (!b_res)
                 return 0;
@@ -645,6 +674,16 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
                         if (r->parent || (r->flags & mask) != type)
                                 continue;
                         r_size = resource_size(r);
+#ifdef CONFIG_PCI_IOV
+                       /* put SRIOV requested res to the optional list */
+                       if (realloc_head && i >= PCI_IOV_RESOURCES &&
+                                       i <= PCI_IOV_RESOURCE_END) {
+                               r->end = r->start - 1;
+                               add_to_list(realloc_head, dev, r, r_size, 0/* dont' care */);
+                               children_add_size += r_size;
+                               continue;
+                       }
+#endif
                         /* For bridges size != alignment */
                         align = pci_resource_alignment(dev, r);
                         order = __ffs(align) - 20;
@@ -665,6 +704,9 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
                         if (order > max_order)
                                 max_order = order;
                         mem64_mask &= r->flags & IORESOURCE_MEM_64;
+
+                       if (realloc_head)
+                               children_add_size += get_res_add_size(realloc_head, r);
                 }
         }
         align = 0;
@@ -681,7 +723,9 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
                 align += aligns[order];
         }
         size0 = calculate_memsize(size, min_size, 0, resource_size(b_res), min_align);
-       size1 = (!add_head || (add_head && !add_size)) ? size0 :
+       if (children_add_size > add_size)
+               add_size = children_add_size;
+       size1 = (!realloc_head || (realloc_head && !add_size)) ? size0 :
                 calculate_memsize(size, min_size+add_size, 0,
                                 resource_size(b_res), min_align);
         if (!size0 && !size1) {
@@ -695,12 +739,22 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
         b_res->start = min_align;
         b_res->end = size0 + min_align - 1;
         b_res->flags |= IORESOURCE_STARTALIGN | mem64_mask;
-       if (size1 > size0 && add_head)
-               add_to_list(add_head, bus->self, b_res, size1-size0);
+       if (size1 > size0 && realloc_head)
+               add_to_list(realloc_head, bus->self, b_res, size1-size0, min_align);
         return 1;
  }
  
-static void pci_bus_size_cardbus(struct pci_bus *bus)
+unsigned long pci_cardbus_resource_alignment(struct resource *res)
+{
+       if (res->flags & IORESOURCE_IO)
+               return pci_cardbus_io_size;
+       if (res->flags & IORESOURCE_MEM)
+               return pci_cardbus_mem_size;
+       return 0;
+}
+
+static void pci_bus_size_cardbus(struct pci_bus *bus,
+                       struct resource_list_x *realloc_head)
  {
         struct pci_dev *bridge = bus->self;
         struct resource *b_res = &bridge->resource[PCI_BRIDGE_RESOURCES];
@@ -711,12 +765,14 @@ static void pci_bus_size_cardbus(struct pci_bus *bus)
          * a fixed amount of bus space for CardBus bridges.
          */
         b_res[0].start = 0;
-       b_res[0].end = pci_cardbus_io_size - 1;
         b_res[0].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN;
+       if (realloc_head)
+               add_to_list(realloc_head, bridge, b_res, pci_cardbus_io_size, 0 /* dont care */);
  
         b_res[1].start = 0;
-       b_res[1].end = pci_cardbus_io_size - 1;
         b_res[1].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN;
+       if (realloc_head)
+               add_to_list(realloc_head, bridge, b_res+1, pci_cardbus_io_size, 0 /* dont care */);
  
         /*
          * Check whether prefetchable memory is supported
@@ -736,21 +792,31 @@ static void pci_bus_size_cardbus(struct pci_bus *bus)
          */
         if (ctrl & PCI_CB_BRIDGE_CTL_PREFETCH_MEM0) {
                 b_res[2].start = 0;
-               b_res[2].end = pci_cardbus_mem_size - 1;
                 b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH | IORESOURCE_SIZEALIGN;
+               if (realloc_head)
+                       add_to_list(realloc_head, bridge, b_res+2, pci_cardbus_mem_size, 0 /* dont care */);
  
                 b_res[3].start = 0;
-               b_res[3].end = pci_cardbus_mem_size - 1;
                 b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN;
+               if (realloc_head)
+                       add_to_list(realloc_head, bridge, b_res+3, pci_cardbus_mem_size, 0 /* dont care */);
         } else {
                 b_res[3].start = 0;
-               b_res[3].end = pci_cardbus_mem_size * 2 - 1;
                 b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN;
+               if (realloc_head)
+                       add_to_list(realloc_head, bridge, b_res+3, pci_cardbus_mem_size * 2, 0 /* dont care */);
         }
+
+       /* set the size of the resource to zero, so that the resource does not
+        * get assigned during required-resource allocation cycle but gets assigned
+        * during the optional-resource allocation cycle.
+        */
+       b_res[0].start = b_res[1].start = b_res[2].start = b_res[3].start = 1;
+       b_res[0].end = b_res[1].end = b_res[2].end = b_res[3].end = 0;
  }
  
  void __ref __pci_bus_size_bridges(struct pci_bus *bus,
-                       struct resource_list_x *add_head)
+                       struct resource_list_x *realloc_head)
  {
         struct pci_dev *dev;
         unsigned long mask, prefmask;
@@ -763,12 +829,12 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus,
  
                 switch (dev->class >> 8) {
                 case PCI_CLASS_BRIDGE_CARDBUS:
-                       pci_bus_size_cardbus(b);
+                       pci_bus_size_cardbus(b, realloc_head);
                         break;
  
                 case PCI_CLASS_BRIDGE_PCI:
                 default:
-                       __pci_bus_size_bridges(b, add_head);
+                       __pci_bus_size_bridges(b, realloc_head);
                         break;
                 }
         }
@@ -792,7 +858,7 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus,
                  * Follow thru
                  */
         default:
-               pbus_size_io(bus, 0, additional_io_size, add_head);
+               pbus_size_io(bus, 0, additional_io_size, realloc_head);
                 /* If the bridge supports prefetchable range, size it
                    separately. If it doesn't, or its prefetchable window
                    has already been allocated by arch code, try
@@ -800,11 +866,11 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus,
                    resources. */
                 mask = IORESOURCE_MEM;
                 prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH;
-               if (pbus_size_mem(bus, prefmask, prefmask, 0, additional_mem_size, add_head))
+               if (pbus_size_mem(bus, prefmask, prefmask, 0, additional_mem_size, realloc_head))
                         mask = prefmask; /* Success, size non-prefetch only. */
                 else
                         additional_mem_size += additional_mem_size;
-               pbus_size_mem(bus, mask, IORESOURCE_MEM, 0, additional_mem_size, add_head);
+               pbus_size_mem(bus, mask, IORESOURCE_MEM, 0, additional_mem_size, realloc_head);
                 break;
         }
  }
@@ -816,20 +882,20 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus)
  EXPORT_SYMBOL(pci_bus_size_bridges);
  
  static void __ref __pci_bus_assign_resources(const struct pci_bus *bus,
-                                        struct resource_list_x *add_head,
+                                        struct resource_list_x *realloc_head,
                                          struct resource_list_x *fail_head)
  {
         struct pci_bus *b;
         struct pci_dev *dev;
  
-       pbus_assign_resources_sorted(bus, add_head, fail_head);
+       pbus_assign_resources_sorted(bus, realloc_head, fail_head);
  
         list_for_each_entry(dev, &bus->devices, bus_list) {
                 b = dev->subordinate;
                 if (!b)
                         continue;
  
-               __pci_bus_assign_resources(b, add_head, fail_head);
+               __pci_bus_assign_resources(b, realloc_head, fail_head);
  
                 switch (dev->class >> 8) {
                 case PCI_CLASS_BRIDGE_PCI:
@@ -1039,7 +1105,7 @@ void __init
  pci_assign_unassigned_resources(void)
  {
         struct pci_bus *bus;
-       struct resource_list_x add_list; /* list of resources that
+       struct resource_list_x realloc_list; /* list of resources that
                                         want additional resources */
         int tried_times = 0;
         enum release_type rel_type = leaf_only;
@@ -1052,7 +1118,7 @@ pci_assign_unassigned_resources(void)
  
  
         head.next = NULL;
-       add_list.next = NULL;
+       realloc_list.next = NULL;
  
         pci_try_num = max_depth + 1;
         printk(KERN_DEBUG "PCI: max bus depth: %d pci_try_num: %d\n",
@@ -1062,12 +1128,12 @@ again:
         /* Depth first, calculate sizes and alignments of all
            subordinate buses. */
         list_for_each_entry(bus, &pci_root_buses, node)
-               __pci_bus_size_bridges(bus, &add_list);
+               __pci_bus_size_bridges(bus, &realloc_list);
  
         /* Depth last, allocate resources and update the hardware. */
         list_for_each_entry(bus, &pci_root_buses, node)
-               __pci_bus_assign_resources(bus, &add_list, &head);
-       BUG_ON(add_list.next);
+               __pci_bus_assign_resources(bus, &realloc_list, &head);
+       BUG_ON(realloc_list.next);
         tried_times++;
  
         /* any device complain? */
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c

index 319f359906e8198628e3d8a7ce60447dd82c3c0f..51a9095c7da4812ddfd843c615083eca61ea9889 100644 (file)
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -128,16 +128,16 @@ void pci_disable_bridge_window(struct pci_dev *dev)
  }
  #endif /* CONFIG_PCI_QUIRKS */
  
+
+
  static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev,
-                                int resno)
+               int resno, resource_size_t size, resource_size_t align)
  {
         struct resource *res = dev->resource + resno;
-       resource_size_t size, min, align;
+       resource_size_t min;
         int ret;
  
-       size = resource_size(res);
         min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
-       align = pci_resource_alignment(dev, res);
  
         /* First, try exact prefetching match.. */
         ret = pci_bus_alloc_resource(bus, res, size, align, min,
@@ -154,56 +154,101 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev,
                 ret = pci_bus_alloc_resource(bus, res, size, align, min, 0,
                                              pcibios_align_resource, dev);
         }
+       return ret;
+}
  
-       if (ret < 0 && dev->fw_addr[resno]) {
-               struct resource *root, *conflict;
-               resource_size_t start, end;
+static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev, 
+               int resno, resource_size_t size)
+{
+       struct resource *root, *conflict;
+       resource_size_t start, end;
+       int ret = 0;
  
-               /*
-                * If we failed to assign anything, let's try the address
-                * where firmware left it.  That at least has a chance of
-                * working, which is better than just leaving it disabled.
-                */
+       if (res->flags & IORESOURCE_IO)
+               root = &ioport_resource;
+       else
+               root = &iomem_resource;
+
+       start = res->start;
+       end = res->end;
+       res->start = dev->fw_addr[resno];
+       res->end = res->start + size - 1;
+       dev_info(&dev->dev, "BAR %d: trying firmware assignment %pR\n",
+                resno, res);
+       conflict = request_resource_conflict(root, res);
+       if (conflict) {
+               dev_info(&dev->dev,
+                        "BAR %d: %pR conflicts with %s %pR\n", resno,
+                        res, conflict->name, conflict);
+               res->start = start;
+               res->end = end;
+               ret = 1;
+       }
+       return ret;
+}
+
+static int _pci_assign_resource(struct pci_dev *dev, int resno, int size, resource_size_t min_align)
+{
+       struct resource *res = dev->resource + resno;
+       struct pci_bus *bus;
+       int ret;
+       char *type;
  
-               if (res->flags & IORESOURCE_IO)
-                       root = &ioport_resource;
+       bus = dev->bus;
+       while ((ret = __pci_assign_resource(bus, dev, resno, size, min_align))) {
+               if (!bus->parent || !bus->self->transparent)
+                       break;
+               bus = bus->parent;
+       }
+
+       if (ret) {
+               if (res->flags & IORESOURCE_MEM)
+                       if (res->flags & IORESOURCE_PREFETCH)
+                               type = "mem pref";
+                       else
+                               type = "mem";
+               else if (res->flags & IORESOURCE_IO)
+                       type = "io";
                 else
-                       root = &iomem_resource;
-
-               start = res->start;
-               end = res->end;
-               res->start = dev->fw_addr[resno];
-               res->end = res->start + size - 1;
-               dev_info(&dev->dev, "BAR %d: trying firmware assignment %pR\n",
-                        resno, res);
-               conflict = request_resource_conflict(root, res);
-               if (conflict) {
-                       dev_info(&dev->dev,
-                                "BAR %d: %pR conflicts with %s %pR\n", resno,
-                                res, conflict->name, conflict);
-                       res->start = start;
-                       res->end = end;
-               } else
-                       ret = 0;
+                       type = "unknown";
+               dev_info(&dev->dev,
+                        "BAR %d: can't assign %s (size %#llx)\n",
+                        resno, type, (unsigned long long) resource_size(res));
         }
  
+       return ret;
+}
+
+int pci_reassign_resource(struct pci_dev *dev, int resno, resource_size_t addsize,
+                       resource_size_t min_align)
+{
+       struct resource *res = dev->resource + resno;
+       resource_size_t new_size;
+       int ret;
+
+       if (!res->parent) {
+               dev_info(&dev->dev, "BAR %d: can't reassign an unassigned resouce %pR "
+                        "\n", resno, res);
+               return -EINVAL;
+       }
+
+       new_size = resource_size(res) + addsize + min_align;
+       ret = _pci_assign_resource(dev, resno, new_size, min_align);
         if (!ret) {
                 res->flags &= ~IORESOURCE_STARTALIGN;
                 dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res);
                 if (resno < PCI_BRIDGE_RESOURCES)
                         pci_update_resource(dev, resno);
         }
-
         return ret;
  }
  
  int pci_assign_resource(struct pci_dev *dev, int resno)
  {
         struct resource *res = dev->resource + resno;
-       resource_size_t align;
+       resource_size_t align, size;
         struct pci_bus *bus;
         int ret;
-       char *type;
  
         align = pci_resource_alignment(dev, res);
         if (!align) {
@@ -213,34 +258,27 @@ int pci_assign_resource(struct pci_dev *dev, int resno)
         }
  
         bus = dev->bus;
-       while ((ret = __pci_assign_resource(bus, dev, resno))) {
-               if (bus->parent && bus->self->transparent)
-                       bus = bus->parent;
-               else
-                       bus = NULL;
-               if (bus)
-                       continue;
-               break;
-       }
+       size = resource_size(res);
+       ret = _pci_assign_resource(dev, resno, size, align);
  
-       if (ret) {
-               if (res->flags & IORESOURCE_MEM)
-                       if (res->flags & IORESOURCE_PREFETCH)
-                               type = "mem pref";
-                       else
-                               type = "mem";
-               else if (res->flags & IORESOURCE_IO)
-                       type = "io";
-               else
-                       type = "unknown";
-               dev_info(&dev->dev,
-                        "BAR %d: can't assign %s (size %#llx)\n",
-                        resno, type, (unsigned long long) resource_size(res));
-       }
+       /*
+        * If we failed to assign anything, let's try the address
+        * where firmware left it.  That at least has a chance of
+        * working, which is better than just leaving it disabled.
+        */
+       if (ret < 0 && dev->fw_addr[resno])
+               ret = pci_revert_fw_address(res, dev, resno, size);
  
+       if (!ret) {
+               res->flags &= ~IORESOURCE_STARTALIGN;
+               dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res);
+               if (resno < PCI_BRIDGE_RESOURCES)
+                       pci_update_resource(dev, resno);
+       }
         return ret;
  }
  
+
  /* Sort resources by alignment */
  void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head)
  {
diff --git a/drivers/power/max8997_charger.c b/drivers/power/max8997_charger.c

index 7106b49b26e492dbea1bbdd888e46c95844ad0ed..ffc5033ea9c92fcbad8957cad81674a376cb1909 100644 (file)
--- a/drivers/power/max8997_charger.c
+++ b/drivers/power/max8997_charger.c
@@ -20,6 +20,7 @@
   */
  
  #include <linux/err.h>
+#include <linux/module.h>
  #include <linux/slab.h>
  #include <linux/platform_device.h>
  #include <linux/power_supply.h>
diff --git a/drivers/power/max8998_charger.c b/drivers/power/max8998_charger.c

index cc21fa2120be241e18f49de15805280fde97d077..ef8efadb58cb1778306e3b977dffc2b585b5c42e 100644 (file)
--- a/drivers/power/max8998_charger.c
+++ b/drivers/power/max8998_charger.c
@@ -20,6 +20,7 @@
   */
  
  #include <linux/err.h>
+#include <linux/module.h>
  #include <linux/slab.h>
  #include <linux/platform_device.h>
  #include <linux/power_supply.h>
diff --git a/drivers/power/s3c_adc_battery.c b/drivers/power/s3c_adc_battery.c

index a675e31b4f132d507cd2c8c72ac09a5ada5cac0b..d32d0d70f9ba951c21204a6fccaf9a2d61951f67 100644 (file)
--- a/drivers/power/s3c_adc_battery.c
+++ b/drivers/power/s3c_adc_battery.c
@@ -20,6 +20,7 @@
  #include <linux/s3c_adc_battery.h>
  #include <linux/errno.h>
  #include <linux/init.h>
+#include <linux/module.h>
  
  #include <plat/adc.h>
  
diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c

index ee893581d4b7ea8df5e386216de9610fcdc0c3e2..ebe77dd87dafb8c2fe9253c7ca2b38304838173f 100644 (file)
--- a/drivers/rapidio/rio-scan.c
+++ b/drivers/rapidio/rio-scan.c
@@ -505,8 +505,7 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net,
         rdev->dev.dma_mask = &rdev->dma_mask;
         rdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
  
-       if ((rdev->pef & RIO_PEF_INB_DOORBELL) &&
-           (rdev->dst_ops & RIO_DST_OPS_DOORBELL))
+       if (rdev->dst_ops & RIO_DST_OPS_DOORBELL)
                 rio_init_dbell_res(&rdev->riores[RIO_DOORBELL_RESOURCE],
                                    0, 0xffff);
  
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c

index 3195dbd3ec3468910396c029e42d9613e206b283..44e91e598f8d975ae7570b80366a67540611c8da 100644 (file)
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -639,7 +639,7 @@ EXPORT_SYMBOL_GPL(rtc_irq_unregister);
  static int rtc_update_hrtimer(struct rtc_device *rtc, int enabled)
  {
         /*
-        * We unconditionally cancel the timer here, because otherwise
+        * We always cancel the timer here first, because otherwise
          * we could run into BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
          * when we manage to start the timer before the callback
          * returns HRTIMER_RESTART.
@@ -708,7 +708,7 @@ int rtc_irq_set_freq(struct rtc_device *rtc, struct rtc_task *task, int freq)
         int err = 0;
         unsigned long flags;
  
-       if (freq <= 0 || freq > 5000)
+       if (freq <= 0 || freq > RTC_MAX_FREQ)
                 return -EINVAL;
  retry:
         spin_lock_irqsave(&rtc->irq_task_lock, flags);
diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c

index 335551d333b24f10a428c4f332792190181ec82f..14a42a1edc66d55be04a79c67aa622db7b10b38d 100644 (file)
--- a/drivers/rtc/rtc-ep93xx.c
+++ b/drivers/rtc/rtc-ep93xx.c
@@ -36,6 +36,7 @@
   */
  struct ep93xx_rtc {
         void __iomem    *mmio_base;
+       struct rtc_device *rtc;
  };
  
  static int ep93xx_rtc_get_swcomp(struct device *dev, unsigned short *preload,
@@ -130,7 +131,6 @@ static int __init ep93xx_rtc_probe(struct platform_device *pdev)
  {
         struct ep93xx_rtc *ep93xx_rtc;
         struct resource *res;
-       struct rtc_device *rtc;
         int err;
  
         ep93xx_rtc = devm_kzalloc(&pdev->dev, sizeof(*ep93xx_rtc), GFP_KERNEL);
@@ -151,12 +151,12 @@ static int __init ep93xx_rtc_probe(struct platform_device *pdev)
                 return -ENXIO;
  
         pdev->dev.platform_data = ep93xx_rtc;
-       platform_set_drvdata(pdev, rtc);
+       platform_set_drvdata(pdev, ep93xx_rtc);
  
-       rtc = rtc_device_register(pdev->name,
+       ep93xx_rtc->rtc = rtc_device_register(pdev->name,
                                 &pdev->dev, &ep93xx_rtc_ops, THIS_MODULE);
-       if (IS_ERR(rtc)) {
-               err = PTR_ERR(rtc);
+       if (IS_ERR(ep93xx_rtc->rtc)) {
+               err = PTR_ERR(ep93xx_rtc->rtc);
                 goto exit;
         }
  
@@ -167,7 +167,7 @@ static int __init ep93xx_rtc_probe(struct platform_device *pdev)
         return 0;
  
  fail:
-       rtc_device_unregister(rtc);
+       rtc_device_unregister(ep93xx_rtc->rtc);
  exit:
         platform_set_drvdata(pdev, NULL);
         pdev->dev.platform_data = NULL;
@@ -176,11 +176,11 @@ exit:
  
  static int __exit ep93xx_rtc_remove(struct platform_device *pdev)
  {
-       struct rtc_device *rtc = platform_get_drvdata(pdev);
+       struct ep93xx_rtc *ep93xx_rtc = platform_get_drvdata(pdev);
  
         sysfs_remove_group(&pdev->dev.kobj, &ep93xx_rtc_sysfs_files);
         platform_set_drvdata(pdev, NULL);
-       rtc_device_unregister(rtc);
+       rtc_device_unregister(ep93xx_rtc->rtc);
         pdev->dev.platform_data = NULL;
  
         return 0;
diff --git a/drivers/rtc/rtc-imxdi.c b/drivers/rtc/rtc-imxdi.c

index 2dd3c0163272afa30fb7b836851982afe1c8035a..d93a9608b1f0dfec14db5c3e5e3e6e941382b618 100644 (file)
--- a/drivers/rtc/rtc-imxdi.c
+++ b/drivers/rtc/rtc-imxdi.c
@@ -35,6 +35,7 @@
  #include <linux/module.h>
  #include <linux/platform_device.h>
  #include <linux/rtc.h>
+#include <linux/sched.h>
  #include <linux/workqueue.h>
  
  /* DryIce Register Definitions */
diff --git a/drivers/rtc/rtc-lib.c b/drivers/rtc/rtc-lib.c

index 075f1708deae844499b8de25e2b0c523846b9299..c4cf05731118a245cff79181ba81eac90ca92be6 100644 (file)
--- a/drivers/rtc/rtc-lib.c
+++ b/drivers/rtc/rtc-lib.c
@@ -85,6 +85,8 @@ void rtc_time_to_tm(unsigned long time, struct rtc_time *tm)
         time -= tm->tm_hour * 3600;
         tm->tm_min = time / 60;
         tm->tm_sec = time - tm->tm_min * 60;
+
+       tm->tm_isdst = 0;
  }
  EXPORT_SYMBOL(rtc_time_to_tm);
  
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c

index 9329dbb9ebabeafc5bbfe5f0b1123a5ed5bac33d..7639ab906f02e35ceec0a75bfaddd418e17478e8 100644 (file)
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -51,6 +51,27 @@ static enum s3c_cpu_type s3c_rtc_cpu_type;
  
  static DEFINE_SPINLOCK(s3c_rtc_pie_lock);
  
+static void s3c_rtc_alarm_clk_enable(bool enable)
+{
+       static DEFINE_SPINLOCK(s3c_rtc_alarm_clk_lock);
+       static bool alarm_clk_enabled;
+       unsigned long irq_flags;
+
+       spin_lock_irqsave(&s3c_rtc_alarm_clk_lock, irq_flags);
+       if (enable) {
+               if (!alarm_clk_enabled) {
+                       clk_enable(rtc_clk);
+                       alarm_clk_enabled = true;
+               }
+       } else {
+               if (alarm_clk_enabled) {
+                       clk_disable(rtc_clk);
+                       alarm_clk_enabled = false;
+               }
+       }
+       spin_unlock_irqrestore(&s3c_rtc_alarm_clk_lock, irq_flags);
+}
+
  /* IRQ Handlers */
  
  static irqreturn_t s3c_rtc_alarmirq(int irq, void *id)
@@ -64,6 +85,9 @@ static irqreturn_t s3c_rtc_alarmirq(int irq, void *id)
                 writeb(S3C2410_INTP_ALM, s3c_rtc_base + S3C2410_INTP);
  
         clk_disable(rtc_clk);
+
+       s3c_rtc_alarm_clk_enable(false);
+
         return IRQ_HANDLED;
  }
  
@@ -97,6 +121,8 @@ static int s3c_rtc_setaie(struct device *dev, unsigned int enabled)
         writeb(tmp, s3c_rtc_base + S3C2410_RTCALM);
         clk_disable(rtc_clk);
  
+       s3c_rtc_alarm_clk_enable(enabled);
+
         return 0;
  }
  
@@ -152,10 +178,6 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
                 goto retry_get_time;
         }
  
-       pr_debug("read time %04d.%02d.%02d %02d:%02d:%02d\n",
-                1900 + rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday,
-                rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec);
-
         rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec);
         rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min);
         rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour);
@@ -164,6 +186,11 @@ static int s3c_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
         rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year);
  
         rtc_tm->tm_year += 100;
+
+       pr_debug("read time %04d.%02d.%02d %02d:%02d:%02d\n",
+                1900 + rtc_tm->tm_year, rtc_tm->tm_mon, rtc_tm->tm_mday,
+                rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec);
+
         rtc_tm->tm_mon -= 1;
  
         clk_disable(rtc_clk);
@@ -269,10 +296,9 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
         clk_enable(rtc_clk);
         pr_debug("s3c_rtc_setalarm: %d, %04d.%02d.%02d %02d:%02d:%02d\n",
                  alrm->enabled,
-                1900 + tm->tm_year, tm->tm_mon, tm->tm_mday,
+                1900 + tm->tm_year, tm->tm_mon + 1, tm->tm_mday,
                  tm->tm_hour, tm->tm_min, tm->tm_sec);
  
-
         alrm_en = readb(base + S3C2410_RTCALM) & S3C2410_RTCALM_ALMEN;
         writeb(0x00, base + S3C2410_RTCALM);
  
@@ -319,49 +345,7 @@ static int s3c_rtc_proc(struct device *dev, struct seq_file *seq)
         return 0;
  }
  
-static int s3c_rtc_open(struct device *dev)
-{
-       struct platform_device *pdev = to_platform_device(dev);
-       struct rtc_device *rtc_dev = platform_get_drvdata(pdev);
-       int ret;
-
-       ret = request_irq(s3c_rtc_alarmno, s3c_rtc_alarmirq,
-                         IRQF_DISABLED,  "s3c2410-rtc alarm", rtc_dev);
-
-       if (ret) {
-               dev_err(dev, "IRQ%d error %d\n", s3c_rtc_alarmno, ret);
-               return ret;
-       }
-
-       ret = request_irq(s3c_rtc_tickno, s3c_rtc_tickirq,
-                         IRQF_DISABLED,  "s3c2410-rtc tick", rtc_dev);
-
-       if (ret) {
-               dev_err(dev, "IRQ%d error %d\n", s3c_rtc_tickno, ret);
-               goto tick_err;
-       }
-
-       return ret;
-
- tick_err:
-       free_irq(s3c_rtc_alarmno, rtc_dev);
-       return ret;
-}
-
-static void s3c_rtc_release(struct device *dev)
-{
-       struct platform_device *pdev = to_platform_device(dev);
-       struct rtc_device *rtc_dev = platform_get_drvdata(pdev);
-
-       /* do not clear AIE here, it may be needed for wake */
-
-       free_irq(s3c_rtc_alarmno, rtc_dev);
-       free_irq(s3c_rtc_tickno, rtc_dev);
-}
-
  static const struct rtc_class_ops s3c_rtcops = {
-       .open           = s3c_rtc_open,
-       .release        = s3c_rtc_release,
         .read_time      = s3c_rtc_gettime,
         .set_time       = s3c_rtc_settime,
         .read_alarm     = s3c_rtc_getalarm,
@@ -425,6 +409,9 @@ static int __devexit s3c_rtc_remove(struct platform_device *dev)
  {
         struct rtc_device *rtc = platform_get_drvdata(dev);
  
+       free_irq(s3c_rtc_alarmno, rtc);
+       free_irq(s3c_rtc_tickno, rtc);
+
         platform_set_drvdata(dev, NULL);
         rtc_device_unregister(rtc);
  
@@ -548,10 +535,32 @@ static int __devinit s3c_rtc_probe(struct platform_device *pdev)
  
         s3c_rtc_setfreq(&pdev->dev, 1);
  
+       ret = request_irq(s3c_rtc_alarmno, s3c_rtc_alarmirq,
+                         IRQF_DISABLED,  "s3c2410-rtc alarm", rtc);
+       if (ret) {
+               dev_err(&pdev->dev, "IRQ%d error %d\n", s3c_rtc_alarmno, ret);
+               goto err_alarm_irq;
+       }
+
+       ret = request_irq(s3c_rtc_tickno, s3c_rtc_tickirq,
+                         IRQF_DISABLED,  "s3c2410-rtc tick", rtc);
+       if (ret) {
+               dev_err(&pdev->dev, "IRQ%d error %d\n", s3c_rtc_tickno, ret);
+               free_irq(s3c_rtc_alarmno, rtc);
+               goto err_tick_irq;
+       }
+
         clk_disable(rtc_clk);
  
         return 0;
  
+ err_tick_irq:
+       free_irq(s3c_rtc_alarmno, rtc);
+
+ err_alarm_irq:
+       platform_set_drvdata(pdev, NULL);
+       rtc_device_unregister(rtc);
+
   err_nortc:
         s3c_rtc_enable(pdev, 0);
         clk_disable(rtc_clk);
diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c

index 9a81f778d6b22216469b641d0dbe44c809667fd8..20687d55e7a72d5eb2871fd8504d6a2fa457e9e5 100644 (file)
--- a/drivers/rtc/rtc-twl.c
+++ b/drivers/rtc/rtc-twl.c
@@ -362,14 +362,6 @@ static irqreturn_t twl_rtc_interrupt(int irq, void *rtc)
         int res;
         u8 rd_reg;
  
-#ifdef CONFIG_LOCKDEP
-       /* WORKAROUND for lockdep forcing IRQF_DISABLED on us, which
-        * we don't want and can't tolerate.  Although it might be
-        * friendlier not to borrow this thread context...
-        */
-       local_irq_enable();
-#endif
-
         res = twl_rtc_read_u8(&rd_reg, REG_RTC_STATUS_REG);
         if (res)
                 goto out;
@@ -428,24 +420,12 @@ static struct rtc_class_ops twl_rtc_ops = {
  static int __devinit twl_rtc_probe(struct platform_device *pdev)
  {
         struct rtc_device *rtc;
-       int ret = 0;
+       int ret = -EINVAL;
         int irq = platform_get_irq(pdev, 0);
         u8 rd_reg;
  
         if (irq <= 0)
-               return -EINVAL;
-
-       rtc = rtc_device_register(pdev->name,
-                                 &pdev->dev, &twl_rtc_ops, THIS_MODULE);
-       if (IS_ERR(rtc)) {
-               ret = PTR_ERR(rtc);
-               dev_err(&pdev->dev, "can't register RTC device, err %ld\n",
-                       PTR_ERR(rtc));
-               goto out0;
-
-       }
-
-       platform_set_drvdata(pdev, rtc);
+               goto out1;
  
         ret = twl_rtc_read_u8(&rd_reg, REG_RTC_STATUS_REG);
         if (ret < 0)
@@ -462,14 +442,6 @@ static int __devinit twl_rtc_probe(struct platform_device *pdev)
         if (ret < 0)
                 goto out1;
  
-       ret = request_irq(irq, twl_rtc_interrupt,
-                               IRQF_TRIGGER_RISING,
-                               dev_name(&rtc->dev), rtc);
-       if (ret < 0) {
-               dev_err(&pdev->dev, "IRQ is not free.\n");
-               goto out1;
-       }
-
         if (twl_class_is_6030()) {
                 twl6030_interrupt_unmask(TWL6030_RTC_INT_MASK,
                         REG_INT_MSK_LINE_A);
@@ -480,28 +452,44 @@ static int __devinit twl_rtc_probe(struct platform_device *pdev)
         /* Check RTC module status, Enable if it is off */
         ret = twl_rtc_read_u8(&rd_reg, REG_RTC_CTRL_REG);
         if (ret < 0)
-               goto out2;
+               goto out1;
  
         if (!(rd_reg & BIT_RTC_CTRL_REG_STOP_RTC_M)) {
                 dev_info(&pdev->dev, "Enabling TWL-RTC.\n");
                 rd_reg = BIT_RTC_CTRL_REG_STOP_RTC_M;
                 ret = twl_rtc_write_u8(rd_reg, REG_RTC_CTRL_REG);
                 if (ret < 0)
-                       goto out2;
+                       goto out1;
         }
  
         /* init cached IRQ enable bits */
         ret = twl_rtc_read_u8(&rtc_irq_bits, REG_RTC_INTERRUPTS_REG);
         if (ret < 0)
+               goto out1;
+
+       rtc = rtc_device_register(pdev->name,
+                                 &pdev->dev, &twl_rtc_ops, THIS_MODULE);
+       if (IS_ERR(rtc)) {
+               ret = PTR_ERR(rtc);
+               dev_err(&pdev->dev, "can't register RTC device, err %ld\n",
+                       PTR_ERR(rtc));
+               goto out1;
+       }
+
+       ret = request_threaded_irq(irq, NULL, twl_rtc_interrupt,
+                                  IRQF_TRIGGER_RISING,
+                                  dev_name(&rtc->dev), rtc);
+       if (ret < 0) {
+               dev_err(&pdev->dev, "IRQ is not free.\n");
                 goto out2;
+       }
  
-       return ret;
+       platform_set_drvdata(pdev, rtc);
+       return 0;
  
  out2:
-       free_irq(irq, rtc);
-out1:
         rtc_device_unregister(rtc);
-out0:
+out1:
         return ret;
  }
  
diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c

index eb4e034378cd0d4daba20ee6988343b571ac000f..f1a2016829fc5654335abaea3d7682eae4d33370 100644 (file)
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -249,6 +249,7 @@ static int dasd_ioctl_reset_profile(struct dasd_block *block)
  static int dasd_ioctl_read_profile(struct dasd_block *block, void __user *argp)
  {
         struct dasd_profile_info_t *data;
+       int rc = 0;
  
         data = kmalloc(sizeof(*data), GFP_KERNEL);
         if (!data)
@@ -279,11 +280,14 @@ static int dasd_ioctl_read_profile(struct dasd_block *block, void __user *argp)
                 spin_unlock_bh(&block->profile.lock);
         } else {
                 spin_unlock_bh(&block->profile.lock);
-               return -EIO;
+               rc = -EIO;
+               goto out;
         }
         if (copy_to_user(argp, data, sizeof(*data)))
-               return -EFAULT;
-       return 0;
+               rc = -EFAULT;
+out:
+       kfree(data);
+       return rc;
  }
  #else
  static int dasd_ioctl_reset_profile(struct dasd_block *block)
diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c

index be55fb2b1b1c58f78a3df48ad8ac7be544f3ec51..837e010299a894f4f249fc6c0707a95634146598 100644 (file)
--- a/drivers/s390/char/sclp_cmd.c
+++ b/drivers/s390/char/sclp_cmd.c
@@ -383,8 +383,10 @@ static int sclp_attach_storage(u8 id)
         switch (sccb->header.response_code) {
         case 0x0020:
                 set_bit(id, sclp_storage_ids);
-               for (i = 0; i < sccb->assigned; i++)
-                       sclp_unassign_storage(sccb->entries[i] >> 16);
+               for (i = 0; i < sccb->assigned; i++) {
+                       if (sccb->entries[i])
+                               sclp_unassign_storage(sccb->entries[i] >> 16);
+               }
                 break;
         default:
                 rc = -EIO;
diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c

index 9ae80cd5953bce99f24d9f7b85dd77d332b25a6f..dba72a4e6a1cd607288ce533c64a0313f132c2b4 100644 (file)
--- a/drivers/scsi/bnx2i/bnx2i_hwi.c
+++ b/drivers/scsi/bnx2i/bnx2i_hwi.c
@@ -563,7 +563,7 @@ int bnx2i_send_iscsi_nopout(struct bnx2i_conn *bnx2i_conn,
         nopout_wqe->itt = ((u16)task->itt |
                            (ISCSI_TASK_TYPE_MPATH <<
                             ISCSI_TMF_REQUEST_TYPE_SHIFT));
-       nopout_wqe->ttt = nopout_hdr->ttt;
+       nopout_wqe->ttt = be32_to_cpu(nopout_hdr->ttt);
         nopout_wqe->flags = 0;
         if (!unsol)
                 nopout_wqe->flags = ISCSI_NOP_OUT_REQUEST_LOCAL_COMPLETION;
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c

index ba710e350ac56931a234d1cf0d718f6ae12cea7d..5d0e9a24ae94effa0a5561d997a96d9802d50d0d 100644 (file)
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -432,6 +432,8 @@ void fcoe_interface_cleanup(struct fcoe_interface *fcoe)
         u8 flogi_maddr[ETH_ALEN];
         const struct net_device_ops *ops;
  
+       rtnl_lock();
+
         /*
          * Don't listen for Ethernet packets anymore.
          * synchronize_net() ensures that the packet handlers are not running
@@ -461,6 +463,8 @@ void fcoe_interface_cleanup(struct fcoe_interface *fcoe)
                                         " specific feature for LLD.\n");
         }
  
+       rtnl_unlock();
+
         /* Release the self-reference taken during fcoe_interface_create() */
         fcoe_interface_put(fcoe);
  }
@@ -1951,11 +1955,8 @@ static void fcoe_destroy_work(struct work_struct *work)
         fcoe_if_destroy(port->lport);
  
         /* Do not tear down the fcoe interface for NPIV port */
-       if (!npiv) {
-               rtnl_lock();
+       if (!npiv)
                 fcoe_interface_cleanup(fcoe);
-               rtnl_unlock();
-       }
  
         mutex_unlock(&fcoe_config_mutex);
  }
@@ -2009,8 +2010,9 @@ static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode)
                 printk(KERN_ERR "fcoe: Failed to create interface (%s)\n",
                        netdev->name);
                 rc = -EIO;
+               rtnl_unlock();
                 fcoe_interface_cleanup(fcoe);
-               goto out_nodev;
+               goto out_nortnl;
         }
  
         /* Make this the "master" N_Port */
@@ -2027,6 +2029,7 @@ static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode)
  
  out_nodev:
         rtnl_unlock();
+out_nortnl:
         mutex_unlock(&fcoe_config_mutex);
         return rc;
  }
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c

index ec61bdb833ac4baeab42c6c0473801dc02ec6743..b200b736b000dc357ba2e416fb899b394c12fd3a 100644 (file)
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -676,6 +676,16 @@ static void hpsa_scsi_replace_entry(struct ctlr_info *h, int hostno,
         BUG_ON(entry < 0 || entry >= HPSA_MAX_SCSI_DEVS_PER_HBA);
         removed[*nremoved] = h->dev[entry];
         (*nremoved)++;
+
+       /*
+        * New physical devices won't have target/lun assigned yet
+        * so we need to preserve the values in the slot we are replacing.
+        */
+       if (new_entry->target == -1) {
+               new_entry->target = h->dev[entry]->target;
+               new_entry->lun = h->dev[entry]->lun;
+       }
+
         h->dev[entry] = new_entry;
         added[*nadded] = new_entry;
         (*nadded)++;
@@ -1548,10 +1558,17 @@ static inline void hpsa_set_bus_target_lun(struct hpsa_scsi_dev_t *device,
  }
  
  static int hpsa_update_device_info(struct ctlr_info *h,
-       unsigned char scsi3addr[], struct hpsa_scsi_dev_t *this_device)
+       unsigned char scsi3addr[], struct hpsa_scsi_dev_t *this_device,
+       unsigned char *is_OBDR_device)
  {
-#define OBDR_TAPE_INQ_SIZE 49
+
+#define OBDR_SIG_OFFSET 43
+#define OBDR_TAPE_SIG "$DR-10"
+#define OBDR_SIG_LEN (sizeof(OBDR_TAPE_SIG) - 1)
+#define OBDR_TAPE_INQ_SIZE (OBDR_SIG_OFFSET + OBDR_SIG_LEN)
+
         unsigned char *inq_buff;
+       unsigned char *obdr_sig;
  
         inq_buff = kzalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL);
         if (!inq_buff)
@@ -1583,6 +1600,16 @@ static int hpsa_update_device_info(struct ctlr_info *h,
         else
                 this_device->raid_level = RAID_UNKNOWN;
  
+       if (is_OBDR_device) {
+               /* See if this is a One-Button-Disaster-Recovery device
+                * by looking for "$DR-10" at offset 43 in inquiry data.
+                */
+               obdr_sig = &inq_buff[OBDR_SIG_OFFSET];
+               *is_OBDR_device = (this_device->devtype == TYPE_ROM &&
+                                       strncmp(obdr_sig, OBDR_TAPE_SIG,
+                                               OBDR_SIG_LEN) == 0);
+       }
+
         kfree(inq_buff);
         return 0;
  
@@ -1716,7 +1743,7 @@ static int add_msa2xxx_enclosure_device(struct ctlr_info *h,
                 return 0;
         }
  
-       if (hpsa_update_device_info(h, scsi3addr, this_device))
+       if (hpsa_update_device_info(h, scsi3addr, this_device, NULL))
                 return 0;
         (*nmsa2xxx_enclosures)++;
         hpsa_set_bus_target_lun(this_device, bus, target, 0);
@@ -1808,7 +1835,6 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
          */
         struct ReportLUNdata *physdev_list = NULL;
         struct ReportLUNdata *logdev_list = NULL;
-       unsigned char *inq_buff = NULL;
         u32 nphysicals = 0;
         u32 nlogicals = 0;
         u32 ndev_allocated = 0;
@@ -1824,11 +1850,9 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
                 GFP_KERNEL);
         physdev_list = kzalloc(reportlunsize, GFP_KERNEL);
         logdev_list = kzalloc(reportlunsize, GFP_KERNEL);
-       inq_buff = kmalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL);
         tmpdevice = kzalloc(sizeof(*tmpdevice), GFP_KERNEL);
  
-       if (!currentsd || !physdev_list || !logdev_list ||
-               !inq_buff || !tmpdevice) {
+       if (!currentsd || !physdev_list || !logdev_list || !tmpdevice) {
                 dev_err(&h->pdev->dev, "out of memory\n");
                 goto out;
         }
@@ -1863,7 +1887,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
         /* adjust our table of devices */
         nmsa2xxx_enclosures = 0;
         for (i = 0; i < nphysicals + nlogicals + 1; i++) {
-               u8 *lunaddrbytes;
+               u8 *lunaddrbytes, is_OBDR = 0;
  
                 /* Figure out where the LUN ID info is coming from */
                 lunaddrbytes = figure_lunaddrbytes(h, raid_ctlr_position,
@@ -1874,7 +1898,8 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
                         continue;
  
                 /* Get device type, vendor, model, device id */
-               if (hpsa_update_device_info(h, lunaddrbytes, tmpdevice))
+               if (hpsa_update_device_info(h, lunaddrbytes, tmpdevice,
+                                                       &is_OBDR))
                         continue; /* skip it if we can't talk to it. */
                 figure_bus_target_lun(h, lunaddrbytes, &bus, &target, &lun,
                         tmpdevice);
@@ -1898,7 +1923,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
                 hpsa_set_bus_target_lun(this_device, bus, target, lun);
  
                 switch (this_device->devtype) {
-               case TYPE_ROM: {
+               case TYPE_ROM:
                         /* We don't *really* support actual CD-ROM devices,
                          * just "One Button Disaster Recovery" tape drive
                          * which temporarily pretends to be a CD-ROM drive.
@@ -1906,15 +1931,8 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
                          * device by checking for "$DR-10" in bytes 43-48 of
                          * the inquiry data.
                          */
-                               char obdr_sig[7];
-#define OBDR_TAPE_SIG "$DR-10"
-                               strncpy(obdr_sig, &inq_buff[43], 6);
-                               obdr_sig[6] = '\0';
-                               if (strncmp(obdr_sig, OBDR_TAPE_SIG, 6) != 0)
-                                       /* Not OBDR device, ignore it. */
-                                       break;
-                       }
-                       ncurrent++;
+                       if (is_OBDR)
+                               ncurrent++;
                         break;
                 case TYPE_DISK:
                         if (i < nphysicals)
@@ -1947,7 +1965,6 @@ out:
         for (i = 0; i < ndev_allocated; i++)
                 kfree(currentsd[i]);
         kfree(currentsd);
-       kfree(inq_buff);
         kfree(physdev_list);
         kfree(logdev_list);
  }
diff --git a/drivers/scsi/isci/host.c b/drivers/scsi/isci/host.c

index 26072f1e985250f42c3af9cc6c031565b83b123f..6981b773a88d42bdf9e7213e31844d2c22711317 100644 (file)
--- a/drivers/scsi/isci/host.c
+++ b/drivers/scsi/isci/host.c
@@ -531,6 +531,9 @@ static void sci_controller_process_completions(struct isci_host *ihost)
                         break;
  
                 case SCU_COMPLETION_TYPE_EVENT:
+                       sci_controller_event_completion(ihost, ent);
+                       break;
+
                 case SCU_COMPLETION_TYPE_NOTIFY: {
                         event_cycle ^= ((event_get+1) & SCU_MAX_EVENTS) <<
                                        (SMU_COMPLETION_QUEUE_GET_EVENT_CYCLE_BIT_SHIFT - SCU_MAX_EVENTS_SHIFT);
@@ -1091,6 +1094,7 @@ static void isci_host_completion_routine(unsigned long data)
         struct isci_request *request;
         struct isci_request *next_request;
         struct sas_task     *task;
+       u16 active;
  
         INIT_LIST_HEAD(&completed_request_list);
         INIT_LIST_HEAD(&errored_request_list);
@@ -1181,6 +1185,13 @@ static void isci_host_completion_routine(unsigned long data)
                 }
         }
  
+       /* the coalesence timeout doubles at each encoding step, so
+        * update it based on the ilog2 value of the outstanding requests
+        */
+       active = isci_tci_active(ihost);
+       writel(SMU_ICC_GEN_VAL(NUMBER, active) |
+              SMU_ICC_GEN_VAL(TIMER, ISCI_COALESCE_BASE + ilog2(active)),
+              &ihost->smu_registers->interrupt_coalesce_control);
  }
  
  /**
@@ -1471,7 +1482,7 @@ static void sci_controller_ready_state_enter(struct sci_base_state_machine *sm)
         struct isci_host *ihost = container_of(sm, typeof(*ihost), sm);
  
         /* set the default interrupt coalescence number and timeout value. */
-       sci_controller_set_interrupt_coalescence(ihost, 0x10, 250);
+       sci_controller_set_interrupt_coalescence(ihost, 0, 0);
  }
  
  static void sci_controller_ready_state_exit(struct sci_base_state_machine *sm)
diff --git a/drivers/scsi/isci/host.h b/drivers/scsi/isci/host.h

index 062101a39f79a07f67fd8e634d9711d2118e5fe7..9f33831a2f04ae211e43000d2b3fb4d773708fa6 100644 (file)
--- a/drivers/scsi/isci/host.h
+++ b/drivers/scsi/isci/host.h
@@ -369,6 +369,9 @@ static inline struct isci_host *dev_to_ihost(struct domain_device *dev)
  #define ISCI_TAG_SEQ(tag) (((tag) >> 12) & (SCI_MAX_SEQ-1))
  #define ISCI_TAG_TCI(tag) ((tag) & (SCI_MAX_IO_REQUESTS-1))
  
+/* interrupt coalescing baseline: 9 == 3 to 5us interrupt delay per command */
+#define ISCI_COALESCE_BASE 9
+
  /* expander attached sata devices require 3 rnc slots */
  static inline int sci_remote_device_node_count(struct isci_remote_device *idev)
  {
diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c

index 61e0d09e2b57950dc47f875a782f58941f42495a..29aa34efb0f5bd03e3165e0918f9fed9c72b4cd3 100644 (file)
--- a/drivers/scsi/isci/init.c
+++ b/drivers/scsi/isci/init.c
@@ -59,10 +59,19 @@
  #include <linux/firmware.h>
  #include <linux/efi.h>
  #include <asm/string.h>
+#include <scsi/scsi_host.h>
  #include "isci.h"
  #include "task.h"
  #include "probe_roms.h"
  
+#define MAJ 1
+#define MIN 0
+#define BUILD 0
+#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
+       __stringify(BUILD)
+
+MODULE_VERSION(DRV_VERSION);
+
  static struct scsi_transport_template *isci_transport_template;
  
  static DEFINE_PCI_DEVICE_TABLE(isci_id_table) = {
@@ -113,6 +122,22 @@ unsigned char max_concurr_spinup = 1;
  module_param(max_concurr_spinup, byte, 0);
  MODULE_PARM_DESC(max_concurr_spinup, "Max concurrent device spinup");
  
+static ssize_t isci_show_id(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       struct Scsi_Host *shost = container_of(dev, typeof(*shost), shost_dev);
+       struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(shost);
+       struct isci_host *ihost = container_of(sas_ha, typeof(*ihost), sas_ha);
+
+       return snprintf(buf, PAGE_SIZE, "%d\n", ihost->id);
+}
+
+static DEVICE_ATTR(isci_id, S_IRUGO, isci_show_id, NULL);
+
+struct device_attribute *isci_host_attrs[] = {
+       &dev_attr_isci_id,
+       NULL
+};
+
  static struct scsi_host_template isci_sht = {
  
         .module                         = THIS_MODULE,
@@ -138,6 +163,7 @@ static struct scsi_host_template isci_sht = {
         .slave_alloc                    = sas_slave_alloc,
         .target_destroy                 = sas_target_destroy,
         .ioctl                          = sas_ioctl,
+       .shost_attrs                    = isci_host_attrs,
  };
  
  static struct sas_domain_function_template isci_transport_ops  = {
@@ -232,17 +258,6 @@ static int isci_register_sas_ha(struct isci_host *isci_host)
         return 0;
  }
  
-static ssize_t isci_show_id(struct device *dev, struct device_attribute *attr, char *buf)
-{
-       struct Scsi_Host *shost = container_of(dev, typeof(*shost), shost_dev);
-       struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(shost);
-       struct isci_host *ihost = container_of(sas_ha, typeof(*ihost), sas_ha);
-
-       return snprintf(buf, PAGE_SIZE, "%d\n", ihost->id);
-}
-
-static DEVICE_ATTR(isci_id, S_IRUGO, isci_show_id, NULL);
-
  static void isci_unregister(struct isci_host *isci_host)
  {
         struct Scsi_Host *shost;
@@ -251,7 +266,6 @@ static void isci_unregister(struct isci_host *isci_host)
                 return;
  
         shost = isci_host->shost;
-       device_remove_file(&shost->shost_dev, &dev_attr_isci_id);
  
         sas_unregister_ha(&isci_host->sas_ha);
  
@@ -415,14 +429,8 @@ static struct isci_host *isci_host_alloc(struct pci_dev *pdev, int id)
         if (err)
                 goto err_shost_remove;
  
-       err = device_create_file(&shost->shost_dev, &dev_attr_isci_id);
-       if (err)
-               goto err_unregister_ha;
-
         return isci_host;
  
- err_unregister_ha:
-       sas_unregister_ha(&(isci_host->sas_ha));
   err_shost_remove:
         scsi_remove_host(shost);
   err_shost:
@@ -540,7 +548,8 @@ static __init int isci_init(void)
  {
         int err;
  
-       pr_info("%s: Intel(R) C600 SAS Controller Driver\n", DRV_NAME);
+       pr_info("%s: Intel(R) C600 SAS Controller Driver - version %s\n",
+               DRV_NAME, DRV_VERSION);
  
         isci_transport_template = sas_domain_attach_transport(&isci_transport_ops);
         if (!isci_transport_template)
diff --git a/drivers/scsi/isci/phy.c b/drivers/scsi/isci/phy.c

index 79313a7a2356ac2e85c9f80ec4d1f6e54d28fb00..430fc8ff014a167f7bbf61fab922313a4ca848e2 100644 (file)
--- a/drivers/scsi/isci/phy.c
+++ b/drivers/scsi/isci/phy.c
@@ -104,6 +104,7 @@ sci_phy_link_layer_initialization(struct isci_phy *iphy,
         u32 parity_count = 0;
         u32 llctl, link_rate;
         u32 clksm_value = 0;
+       u32 sp_timeouts = 0;
  
         iphy->link_layer_registers = reg;
  
@@ -211,6 +212,18 @@ sci_phy_link_layer_initialization(struct isci_phy *iphy,
         llctl |= SCU_SAS_LLCTL_GEN_VAL(MAX_LINK_RATE, link_rate);
         writel(llctl, &iphy->link_layer_registers->link_layer_control);
  
+       sp_timeouts = readl(&iphy->link_layer_registers->sas_phy_timeouts);
+
+       /* Clear the default 0x36 (54us) RATE_CHANGE timeout value. */
+       sp_timeouts &= ~SCU_SAS_PHYTOV_GEN_VAL(RATE_CHANGE, 0xFF);
+
+       /* Set RATE_CHANGE timeout value to 0x3B (59us).  This ensures SCU can
+        * lock with 3Gb drive when SCU max rate is set to 1.5Gb.
+        */
+       sp_timeouts |= SCU_SAS_PHYTOV_GEN_VAL(RATE_CHANGE, 0x3B);
+
+       writel(sp_timeouts, &iphy->link_layer_registers->sas_phy_timeouts);
+
         if (is_a2(ihost->pdev)) {
                 /* Program the max ARB time for the PHY to 700us so we inter-operate with
                  * the PMC expander which shuts down PHYs if the expander PHY generates too
diff --git a/drivers/scsi/isci/registers.h b/drivers/scsi/isci/registers.h

index 9b266c7428e87befc0020cb93a9a9d609f8a9f9f..00afc738bbed368baacef402058a1639ebabf248 100644 (file)
--- a/drivers/scsi/isci/registers.h
+++ b/drivers/scsi/isci/registers.h
@@ -1299,6 +1299,18 @@ struct scu_transport_layer_registers {
  #define SCU_AFE_XCVRCR_OFFSET       0x00DC
  #define SCU_AFE_LUTCR_OFFSET        0x00E0
  
+#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_ALIGN_DETECTION_SHIFT          (0UL)
+#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_ALIGN_DETECTION_MASK           (0x000000FFUL)
+#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_HOT_PLUG_SHIFT                 (8UL)
+#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_HOT_PLUG_MASK                  (0x0000FF00UL)
+#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_COMSAS_DETECTION_SHIFT         (16UL)
+#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_COMSAS_DETECTION_MASK          (0x00FF0000UL)
+#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_RATE_CHANGE_SHIFT              (24UL)
+#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_RATE_CHANGE_MASK               (0xFF000000UL)
+
+#define SCU_SAS_PHYTOV_GEN_VAL(name, value) \
+       SCU_GEN_VALUE(SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_##name, value)
+
  #define SCU_SAS_LINK_LAYER_CONTROL_MAX_LINK_RATE_SHIFT                  (0)
  #define SCU_SAS_LINK_LAYER_CONTROL_MAX_LINK_RATE_MASK                   (0x00000003)
  #define SCU_SAS_LINK_LAYER_CONTROL_MAX_LINK_RATE_GEN1                   (0)
diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c

index a46e07ac789f64afcd2a6360ad2574fbda39d76b..b5d3a8c4d3297ebf87e72a8087d5277500abc17d 100644 (file)
--- a/drivers/scsi/isci/request.c
+++ b/drivers/scsi/isci/request.c
@@ -732,12 +732,20 @@ sci_io_request_terminate(struct isci_request *ireq)
                 sci_change_state(&ireq->sm, SCI_REQ_ABORTING);
                 return SCI_SUCCESS;
         case SCI_REQ_TASK_WAIT_TC_RESP:
+               /* The task frame was already confirmed to have been
+                * sent by the SCU HW.  Since the state machine is
+                * now only waiting for the task response itself,
+                * abort the request and complete it immediately
+                * and don't wait for the task response.
+                */
                 sci_change_state(&ireq->sm, SCI_REQ_ABORTING);
                 sci_change_state(&ireq->sm, SCI_REQ_COMPLETED);
                 return SCI_SUCCESS;
         case SCI_REQ_ABORTING:
-               sci_change_state(&ireq->sm, SCI_REQ_COMPLETED);
-               return SCI_SUCCESS;
+               /* If a request has a termination requested twice, return
+                * a failure indication, since HW confirmation of the first
+                * abort is still outstanding.
+                */
         case SCI_REQ_COMPLETED:
         default:
                 dev_warn(&ireq->owning_controller->pdev->dev,
@@ -2399,22 +2407,19 @@ static void isci_task_save_for_upper_layer_completion(
         }
  }
  
-static void isci_request_process_stp_response(struct sas_task *task,
-                                             void *response_buffer)
+static void isci_process_stp_response(struct sas_task *task, struct dev_to_host_fis *fis)
  {
-       struct dev_to_host_fis *d2h_reg_fis = response_buffer;
         struct task_status_struct *ts = &task->task_status;
         struct ata_task_resp *resp = (void *)&ts->buf[0];
  
-       resp->frame_len = le16_to_cpu(*(__le16 *)(response_buffer + 6));
-       memcpy(&resp->ending_fis[0], response_buffer + 16, 24);
+       resp->frame_len = sizeof(*fis);
+       memcpy(resp->ending_fis, fis, sizeof(*fis));
         ts->buf_valid_size = sizeof(*resp);
  
-       /**
-        * If the device fault bit is set in the status register, then
+       /* If the device fault bit is set in the status register, then
          * set the sense data and return.
          */
-       if (d2h_reg_fis->status & ATA_DF)
+       if (fis->status & ATA_DF)
                 ts->stat = SAS_PROTO_RESPONSE;
         else
                 ts->stat = SAM_STAT_GOOD;
@@ -2428,7 +2433,6 @@ static void isci_request_io_request_complete(struct isci_host *ihost,
  {
         struct sas_task *task = isci_request_access_task(request);
         struct ssp_response_iu *resp_iu;
-       void *resp_buf;
         unsigned long task_flags;
         struct isci_remote_device *idev = isci_lookup_device(task->dev);
         enum service_response response       = SAS_TASK_UNDELIVERED;
@@ -2565,9 +2569,7 @@ static void isci_request_io_request_complete(struct isci_host *ihost,
                                 task);
  
                         if (sas_protocol_ata(task->task_proto)) {
-                               resp_buf = &request->stp.rsp;
-                               isci_request_process_stp_response(task,
-                                                                 resp_buf);
+                               isci_process_stp_response(task, &request->stp.rsp);
                         } else if (SAS_PROTOCOL_SSP == task->task_proto) {
  
                                 /* crack the iu response buffer. */
diff --git a/drivers/scsi/isci/unsolicited_frame_control.c b/drivers/scsi/isci/unsolicited_frame_control.c

index e9e1e2abacb9e7adb59820d568345769262e0187..16f88ab939c87f1cf516df5a69a3c3ad9f8ad8b6 100644 (file)
--- a/drivers/scsi/isci/unsolicited_frame_control.c
+++ b/drivers/scsi/isci/unsolicited_frame_control.c
@@ -72,7 +72,7 @@ int sci_unsolicited_frame_control_construct(struct isci_host *ihost)
          */
         buf_len = SCU_MAX_UNSOLICITED_FRAMES * SCU_UNSOLICITED_FRAME_BUFFER_SIZE;
         header_len = SCU_MAX_UNSOLICITED_FRAMES * sizeof(struct scu_unsolicited_frame_header);
-       size = buf_len + header_len + SCU_MAX_UNSOLICITED_FRAMES * sizeof(dma_addr_t);
+       size = buf_len + header_len + SCU_MAX_UNSOLICITED_FRAMES * sizeof(uf_control->address_table.array[0]);
  
         /*
          * The Unsolicited Frame buffers are set at the start of the UF
diff --git a/drivers/scsi/isci/unsolicited_frame_control.h b/drivers/scsi/isci/unsolicited_frame_control.h

index 31cb9506f52d79abfbaeb7df78235a8d300a19d8..75d896686f5aaa0750fe92c3886fe331566b67e7 100644 (file)
--- a/drivers/scsi/isci/unsolicited_frame_control.h
+++ b/drivers/scsi/isci/unsolicited_frame_control.h
@@ -214,7 +214,7 @@ struct sci_uf_address_table_array {
          * starting address of the UF address table.
          * 64-bit pointers are required by the hardware.
          */
-       dma_addr_t *array;
+       u64 *array;
  
         /**
          * This field specifies the physical address location for the UF
diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c

index 01ff082dc34cfaac127eb62ebc4ad2cd02ef3ff5..d261e982a2fae5a0c8b13ef098cc3265ebb2ebc5 100644 (file)
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -494,6 +494,9 @@ static int fc_seq_send(struct fc_lport *lport, struct fc_seq *sp,
          */
         error = lport->tt.frame_send(lport, fp);
  
+       if (fh->fh_type == FC_TYPE_BLS)
+               return error;
+
         /*
          * Update the exchange and sequence flags,
          * assuming all frames for the sequence have been sent.
@@ -575,42 +578,35 @@ static void fc_seq_set_resp(struct fc_seq *sp,
  }
  
  /**
- * fc_seq_exch_abort() - Abort an exchange and sequence
- * @req_sp:    The sequence to be aborted
+ * fc_exch_abort_locked() - Abort an exchange
+ * @ep:        The exchange to be aborted
   * @timer_msec: The period of time to wait before aborting
   *
- * Generally called because of a timeout or an abort from the upper layer.
+ * Locking notes:  Called with exch lock held
+ *
+ * Return value: 0 on success else error code
   */
-static int fc_seq_exch_abort(const struct fc_seq *req_sp,
-                            unsigned int timer_msec)
+static int fc_exch_abort_locked(struct fc_exch *ep,
+                               unsigned int timer_msec)
  {
         struct fc_seq *sp;
-       struct fc_exch *ep;
         struct fc_frame *fp;
         int error;
  
-       ep = fc_seq_exch(req_sp);
-
-       spin_lock_bh(&ep->ex_lock);
         if (ep->esb_stat & (ESB_ST_COMPLETE | ESB_ST_ABNORMAL) ||
-           ep->state & (FC_EX_DONE | FC_EX_RST_CLEANUP)) {
-               spin_unlock_bh(&ep->ex_lock);
+           ep->state & (FC_EX_DONE | FC_EX_RST_CLEANUP))
                 return -ENXIO;
-       }
  
         /*
          * Send the abort on a new sequence if possible.
          */
         sp = fc_seq_start_next_locked(&ep->seq);
-       if (!sp) {
-               spin_unlock_bh(&ep->ex_lock);
+       if (!sp)
                 return -ENOMEM;
-       }
  
         ep->esb_stat |= ESB_ST_SEQ_INIT | ESB_ST_ABNORMAL;
         if (timer_msec)
                 fc_exch_timer_set_locked(ep, timer_msec);
-       spin_unlock_bh(&ep->ex_lock);
  
         /*
          * If not logged into the fabric, don't send ABTS but leave
@@ -632,6 +628,28 @@ static int fc_seq_exch_abort(const struct fc_seq *req_sp,
         return error;
  }
  
+/**
+ * fc_seq_exch_abort() - Abort an exchange and sequence
+ * @req_sp:    The sequence to be aborted
+ * @timer_msec: The period of time to wait before aborting
+ *
+ * Generally called because of a timeout or an abort from the upper layer.
+ *
+ * Return value: 0 on success else error code
+ */
+static int fc_seq_exch_abort(const struct fc_seq *req_sp,
+                            unsigned int timer_msec)
+{
+       struct fc_exch *ep;
+       int error;
+
+       ep = fc_seq_exch(req_sp);
+       spin_lock_bh(&ep->ex_lock);
+       error = fc_exch_abort_locked(ep, timer_msec);
+       spin_unlock_bh(&ep->ex_lock);
+       return error;
+}
+
  /**
   * fc_exch_timeout() - Handle exchange timer expiration
   * @work: The work_struct identifying the exchange that timed out
@@ -1715,6 +1733,7 @@ static void fc_exch_reset(struct fc_exch *ep)
         int rc = 1;
  
         spin_lock_bh(&ep->ex_lock);
+       fc_exch_abort_locked(ep, 0);
         ep->state |= FC_EX_RST_CLEANUP;
         if (cancel_delayed_work(&ep->timeout_work))
                 atomic_dec(&ep->ex_refcnt);     /* drop hold for timer */
@@ -1962,6 +1981,7 @@ static struct fc_seq *fc_exch_seq_send(struct fc_lport *lport,
         struct fc_exch *ep;
         struct fc_seq *sp = NULL;
         struct fc_frame_header *fh;
+       struct fc_fcp_pkt *fsp = NULL;
         int rc = 1;
  
         ep = fc_exch_alloc(lport, fp);
@@ -1984,8 +2004,10 @@ static struct fc_seq *fc_exch_seq_send(struct fc_lport *lport,
         fc_exch_setup_hdr(ep, fp, ep->f_ctl);
         sp->cnt++;
  
-       if (ep->xid <= lport->lro_xid && fh->fh_r_ctl == FC_RCTL_DD_UNSOL_CMD)
+       if (ep->xid <= lport->lro_xid && fh->fh_r_ctl == FC_RCTL_DD_UNSOL_CMD) {
+               fsp = fr_fsp(fp);
                 fc_fcp_ddp_setup(fr_fsp(fp), ep->xid);
+       }
  
         if (unlikely(lport->tt.frame_send(lport, fp)))
                 goto err;
@@ -1999,7 +2021,8 @@ static struct fc_seq *fc_exch_seq_send(struct fc_lport *lport,
         spin_unlock_bh(&ep->ex_lock);
         return sp;
  err:
-       fc_fcp_ddp_done(fr_fsp(fp));
+       if (fsp)
+               fc_fcp_ddp_done(fsp);
         rc = fc_exch_done_locked(ep);
         spin_unlock_bh(&ep->ex_lock);
         if (!rc)
diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c

index afb63c843144b483c362794f72e41de8689d3358..4c41ee816f0bbbedf5b88f4b72981c9991fbc843 100644 (file)
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -2019,6 +2019,11 @@ int fc_eh_abort(struct scsi_cmnd *sc_cmd)
         struct fc_fcp_internal *si;
         int rc = FAILED;
         unsigned long flags;
+       int rval;
+
+       rval = fc_block_scsi_eh(sc_cmd);
+       if (rval)
+               return rval;
  
         lport = shost_priv(sc_cmd->device->host);
         if (lport->state != LPORT_ST_READY)
@@ -2068,9 +2073,9 @@ int fc_eh_device_reset(struct scsi_cmnd *sc_cmd)
         int rc = FAILED;
         int rval;
  
-       rval = fc_remote_port_chkready(rport);
+       rval = fc_block_scsi_eh(sc_cmd);
         if (rval)
-               goto out;
+               return rval;
  
         lport = shost_priv(sc_cmd->device->host);
  
@@ -2116,6 +2121,8 @@ int fc_eh_host_reset(struct scsi_cmnd *sc_cmd)
  
         FC_SCSI_DBG(lport, "Resetting host\n");
  
+       fc_block_scsi_eh(sc_cmd);
+
         lport->tt.lport_reset(lport);
         wait_tmo = jiffies + FC_HOST_RESET_TIMEOUT;
         while (!fc_fcp_lport_queue_ready(lport) && time_before(jiffies,
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c

index e55ed9cf23fb15cfa8a386eff6921471b39b1e57..628f347404f9b355e1fbe14bd58fdae3f0d7171d 100644 (file)
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -88,6 +88,7 @@
   */
  
  #include <linux/timer.h>
+#include <linux/delay.h>
  #include <linux/slab.h>
  #include <asm/unaligned.h>
  
@@ -1029,8 +1030,16 @@ static void fc_lport_enter_reset(struct fc_lport *lport)
                            FCH_EVT_LIPRESET, 0);
         fc_vports_linkchange(lport);
         fc_lport_reset_locked(lport);
-       if (lport->link_up)
+       if (lport->link_up) {
+               /*
+                * Wait upto resource allocation time out before
+                * doing re-login since incomplete FIP exchanged
+                * from last session may collide with exchanges
+                * in new session.
+                */
+               msleep(lport->r_a_tov);
                 fc_lport_enter_flogi(lport);
+       }
  }
  
  /**
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c

index 7836eb01c7fcd16a583e2d55884da2eea035de47..a31e05f3bfd4f214920b0b7a6ca4ba40b5f47676 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -1786,13 +1786,16 @@ qla24xx_vport_create(struct fc_vport *fc_vport, bool disable)
                         fc_vport_set_state(fc_vport, FC_VPORT_LINKDOWN);
         }
  
-       if ((IS_QLA25XX(ha) || IS_QLA81XX(ha)) && ql2xenabledif) {
+       if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif) {
                 if (ha->fw_attributes & BIT_4) {
+                       int prot = 0;
                         vha->flags.difdix_supported = 1;
                         ql_dbg(ql_dbg_user, vha, 0x7082,
                             "Registered for DIF/DIX type 1 and 3 protection.\n");
+                       if (ql2xenabledif == 1)
+                               prot = SHOST_DIX_TYPE0_PROTECTION;
                         scsi_host_set_prot(vha->host,
-                           SHOST_DIF_TYPE1_PROTECTION
+                           prot | SHOST_DIF_TYPE1_PROTECTION
                             | SHOST_DIF_TYPE2_PROTECTION
                             | SHOST_DIF_TYPE3_PROTECTION
                             | SHOST_DIX_TYPE1_PROTECTION
diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c

index 2155071f3100a0489f39164bdb58b559deb74c1e..d79cd8a5f83181e29f41051f79e38331e1e91a22 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_dbg.c
+++ b/drivers/scsi/qla2xxx/qla_dbg.c
@@ -8,24 +8,24 @@
  /*
   * Table for showing the current message id in use for particular level
   * Change this table for addition of log/debug messages.
- * -----------------------------------------------------
- * |             Level            |   Last Value Used  |
- * -----------------------------------------------------
- * | Module Init and Probe        |       0x0116       |
- * | Mailbox commands             |       0x111e       |
- * | Device Discovery             |       0x2083       |
- * | Queue Command and IO tracing |       0x302e       |
- * | DPC Thread                   |       0x401c       |
- * | Async Events                 |       0x5059       |
- * | Timer Routines               |       0x600d       |
- * | User Space Interactions      |       0x709c       |
- * | Task Management              |       0x8043       |
- * | AER/EEH                      |       0x900f       |
- * | Virtual Port                 |       0xa007       |
- * | ISP82XX Specific             |       0xb027       |
- * | MultiQ                       |       0xc00b       |
- * | Misc                         |       0xd00b       |
- * -----------------------------------------------------
+ * ----------------------------------------------------------------------
+ * |             Level            |   Last Value Used  |     Holes     |
+ * ----------------------------------------------------------------------
+ * | Module Init and Probe        |       0x0116       |               |
+ * | Mailbox commands             |       0x1126       |               |
+ * | Device Discovery             |       0x2083       |               |
+ * | Queue Command and IO tracing |       0x302e       |     0x3008     |
+ * | DPC Thread                   |       0x401c       |               |
+ * | Async Events                 |       0x5059       |               |
+ * | Timer Routines               |       0x600d       |               |
+ * | User Space Interactions      |       0x709d       |               |
+ * | Task Management              |       0x8041       |               |
+ * | AER/EEH                      |       0x900f       |               |
+ * | Virtual Port                 |       0xa007       |               |
+ * | ISP82XX Specific             |       0xb04f       |               |
+ * | MultiQ                       |       0xc00b       |               |
+ * | Misc                         |       0xd00b       |               |
+ * ----------------------------------------------------------------------
   */
  
  #include "qla_def.h"
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h

index cc5a79259d336531e8a23d8ce3ed4fd656f6d4b8..a03eaf40f377e91d10468f665b5ab8e60434aa5d 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2529,6 +2529,7 @@ struct qla_hw_data {
  #define DT_ISP8021                     BIT_14
  #define DT_ISP_LAST                    (DT_ISP8021 << 1)
  
+#define DT_T10_PI                       BIT_25
  #define DT_IIDMA                        BIT_26
  #define DT_FWI2                         BIT_27
  #define DT_ZIO_SUPPORTED                BIT_28
@@ -2572,6 +2573,7 @@ struct qla_hw_data {
  #define IS_NOCACHE_VPD_TYPE(ha)        (IS_QLA81XX(ha))
  #define IS_ALOGIO_CAPABLE(ha)  (IS_QLA23XX(ha) || IS_FWI2_CAPABLE(ha))
  
+#define IS_T10_PI_CAPABLE(ha)   ((ha)->device_type & DT_T10_PI)
  #define IS_IIDMA_CAPABLE(ha)    ((ha)->device_type & DT_IIDMA)
  #define IS_FWI2_CAPABLE(ha)     ((ha)->device_type & DT_FWI2)
  #define IS_ZIO_SUPPORTED(ha)    ((ha)->device_type & DT_ZIO_SUPPORTED)
diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h

index 691783abfb6927da41f94c79ccd1cea8c2c0c32f..aa69486dc06487107aee2e029a2d545906ae2cb8 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_fw.h
+++ b/drivers/scsi/qla2xxx/qla_fw.h
@@ -537,6 +537,11 @@ struct sts_entry_24xx {
         /*
          * If DIF Error is set in comp_status, these additional fields are
          * defined:
+        *
+        * !!! NOTE: Firmware sends expected/actual DIF data in big endian
+        * format; but all of the "data" field gets swab32-d in the beginning
+        * of qla2x00_status_entry().
+        *
          * &data[10] : uint8_t report_runt_bg[2];       - computed guard
          * &data[12] : uint8_t actual_dif[8];           - DIF Data received
          * &data[20] : uint8_t expected_dif[8];         - DIF Data computed
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c

index def694271bf79bf96cb6190878434b902381fb79..37da04d3db2634a0f9fc17e55f7389eead88a27a 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -3838,15 +3838,12 @@ qla2x00_loop_resync(scsi_qla_host_t *vha)
                 req = vha->req;
         rsp = req->rsp;
  
-       atomic_set(&vha->loop_state, LOOP_UPDATE);
         clear_bit(ISP_ABORT_RETRY, &vha->dpc_flags);
         if (vha->flags.online) {
                 if (!(rval = qla2x00_fw_ready(vha))) {
                         /* Wait at most MAX_TARGET RSCNs for a stable link. */
                         wait_time = 256;
                         do {
-                               atomic_set(&vha->loop_state, LOOP_UPDATE);
-
                                 /* Issue a marker after FW becomes ready. */
                                 qla2x00_marker(vha, req, rsp, 0, 0,
                                         MK_SYNC_ALL);
diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h

index d2e904bc21c018606d3a40bf40f34ba3437d6159..9902834e0b741f387acfa5a9f9fe2fbc62866847 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_inline.h
+++ b/drivers/scsi/qla2xxx/qla_inline.h
@@ -102,3 +102,32 @@ qla2x00_set_fcport_state(fc_port_t *fcport, int state)
                     fcport->d_id.b.al_pa);
         }
  }
+
+static inline int
+qla2x00_hba_err_chk_enabled(srb_t *sp)
+{
+       /*
+        * Uncomment when corresponding SCSI changes are done.
+        *
+       if (!sp->cmd->prot_chk)
+               return 0;
+        *
+        */
+
+       switch (scsi_get_prot_op(sp->cmd)) {
+       case SCSI_PROT_READ_STRIP:
+       case SCSI_PROT_WRITE_INSERT:
+               if (ql2xenablehba_err_chk >= 1)
+                       return 1;
+               break;
+       case SCSI_PROT_READ_PASS:
+       case SCSI_PROT_WRITE_PASS:
+               if (ql2xenablehba_err_chk >= 2)
+                       return 1;
+               break;
+       case SCSI_PROT_READ_INSERT:
+       case SCSI_PROT_WRITE_STRIP:
+               return 1;
+       }
+       return 0;
+}
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c

index 49d6906af886e06649494ba2eab37b8bddeac3aa..dbec89622a0fa09d77ae6ad6c45e7df9237da3e0 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -709,20 +709,28 @@ struct fw_dif_context {
   *
   */
  static inline void
-qla24xx_set_t10dif_tags(struct scsi_cmnd *cmd, struct fw_dif_context *pkt,
+qla24xx_set_t10dif_tags(srb_t *sp, struct fw_dif_context *pkt,
      unsigned int protcnt)
  {
-       struct sd_dif_tuple *spt;
+       struct scsi_cmnd *cmd = sp->cmd;
         scsi_qla_host_t *vha = shost_priv(cmd->device->host);
-       unsigned char op = scsi_get_prot_op(cmd);
  
         switch (scsi_get_prot_type(cmd)) {
-       /* For TYPE 0 protection: no checking */
         case SCSI_PROT_DIF_TYPE0:
-               pkt->ref_tag_mask[0] = 0x00;
-               pkt->ref_tag_mask[1] = 0x00;
-               pkt->ref_tag_mask[2] = 0x00;
-               pkt->ref_tag_mask[3] = 0x00;
+               /*
+                * No check for ql2xenablehba_err_chk, as it would be an
+                * I/O error if hba tag generation is not done.
+                */
+               pkt->ref_tag = cpu_to_le32((uint32_t)
+                   (0xffffffff & scsi_get_lba(cmd)));
+
+               if (!qla2x00_hba_err_chk_enabled(sp))
+                       break;
+
+               pkt->ref_tag_mask[0] = 0xff;
+               pkt->ref_tag_mask[1] = 0xff;
+               pkt->ref_tag_mask[2] = 0xff;
+               pkt->ref_tag_mask[3] = 0xff;
                 break;
  
         /*
@@ -730,20 +738,16 @@ qla24xx_set_t10dif_tags(struct scsi_cmnd *cmd, struct fw_dif_context *pkt,
          * match LBA in CDB + N
          */
         case SCSI_PROT_DIF_TYPE2:
-               if (!ql2xenablehba_err_chk)
-                       break;
-
-               if (scsi_prot_sg_count(cmd)) {
-                       spt = page_address(sg_page(scsi_prot_sglist(cmd))) +
-                           scsi_prot_sglist(cmd)[0].offset;
-                       pkt->app_tag = swab32(spt->app_tag);
-                       pkt->app_tag_mask[0] =  0xff;
-                       pkt->app_tag_mask[1] =  0xff;
-               }
+               pkt->app_tag = __constant_cpu_to_le16(0);
+               pkt->app_tag_mask[0] = 0x0;
+               pkt->app_tag_mask[1] = 0x0;
  
                 pkt->ref_tag = cpu_to_le32((uint32_t)
                     (0xffffffff & scsi_get_lba(cmd)));
  
+               if (!qla2x00_hba_err_chk_enabled(sp))
+                       break;
+
                 /* enable ALL bytes of the ref tag */
                 pkt->ref_tag_mask[0] = 0xff;
                 pkt->ref_tag_mask[1] = 0xff;
@@ -763,26 +767,15 @@ qla24xx_set_t10dif_tags(struct scsi_cmnd *cmd, struct fw_dif_context *pkt,
          * 16 bit app tag.
          */
         case SCSI_PROT_DIF_TYPE1:
-               if (!ql2xenablehba_err_chk)
+               pkt->ref_tag = cpu_to_le32((uint32_t)
+                   (0xffffffff & scsi_get_lba(cmd)));
+               pkt->app_tag = __constant_cpu_to_le16(0);
+               pkt->app_tag_mask[0] = 0x0;
+               pkt->app_tag_mask[1] = 0x0;
+
+               if (!qla2x00_hba_err_chk_enabled(sp))
                         break;
  
-               if (protcnt && (op == SCSI_PROT_WRITE_STRIP ||
-                   op == SCSI_PROT_WRITE_PASS)) {
-                       spt = page_address(sg_page(scsi_prot_sglist(cmd))) +
-                           scsi_prot_sglist(cmd)[0].offset;
-                       ql_dbg(ql_dbg_io, vha, 0x3008,
-                           "LBA from user %p, lba = 0x%x for cmd=%p.\n",
-                           spt, (int)spt->ref_tag, cmd);
-                       pkt->ref_tag = swab32(spt->ref_tag);
-                       pkt->app_tag_mask[0] = 0x0;
-                       pkt->app_tag_mask[1] = 0x0;
-               } else {
-                       pkt->ref_tag = cpu_to_le32((uint32_t)
-                           (0xffffffff & scsi_get_lba(cmd)));
-                       pkt->app_tag = __constant_cpu_to_le16(0);
-                       pkt->app_tag_mask[0] = 0x0;
-                       pkt->app_tag_mask[1] = 0x0;
-               }
                 /* enable ALL bytes of the ref tag */
                 pkt->ref_tag_mask[0] = 0xff;
                 pkt->ref_tag_mask[1] = 0xff;
@@ -798,7 +791,161 @@ qla24xx_set_t10dif_tags(struct scsi_cmnd *cmd, struct fw_dif_context *pkt,
             scsi_get_prot_type(cmd), cmd);
  }
  
+struct qla2_sgx {
+       dma_addr_t              dma_addr;       /* OUT */
+       uint32_t                dma_len;        /* OUT */
+
+       uint32_t                tot_bytes;      /* IN */
+       struct scatterlist      *cur_sg;        /* IN */
+
+       /* for book keeping, bzero on initial invocation */
+       uint32_t                bytes_consumed;
+       uint32_t                num_bytes;
+       uint32_t                tot_partial;
+
+       /* for debugging */
+       uint32_t                num_sg;
+       srb_t                   *sp;
+};
  
+static int
+qla24xx_get_one_block_sg(uint32_t blk_sz, struct qla2_sgx *sgx,
+       uint32_t *partial)
+{
+       struct scatterlist *sg;
+       uint32_t cumulative_partial, sg_len;
+       dma_addr_t sg_dma_addr;
+
+       if (sgx->num_bytes == sgx->tot_bytes)
+               return 0;
+
+       sg = sgx->cur_sg;
+       cumulative_partial = sgx->tot_partial;
+
+       sg_dma_addr = sg_dma_address(sg);
+       sg_len = sg_dma_len(sg);
+
+       sgx->dma_addr = sg_dma_addr + sgx->bytes_consumed;
+
+       if ((cumulative_partial + (sg_len - sgx->bytes_consumed)) >= blk_sz) {
+               sgx->dma_len = (blk_sz - cumulative_partial);
+               sgx->tot_partial = 0;
+               sgx->num_bytes += blk_sz;
+               *partial = 0;
+       } else {
+               sgx->dma_len = sg_len - sgx->bytes_consumed;
+               sgx->tot_partial += sgx->dma_len;
+               *partial = 1;
+       }
+
+       sgx->bytes_consumed += sgx->dma_len;
+
+       if (sg_len == sgx->bytes_consumed) {
+               sg = sg_next(sg);
+               sgx->num_sg++;
+               sgx->cur_sg = sg;
+               sgx->bytes_consumed = 0;
+       }
+
+       return 1;
+}
+
+static int
+qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *ha, srb_t *sp,
+       uint32_t *dsd, uint16_t tot_dsds)
+{
+       void *next_dsd;
+       uint8_t avail_dsds = 0;
+       uint32_t dsd_list_len;
+       struct dsd_dma *dsd_ptr;
+       struct scatterlist *sg_prot;
+       uint32_t *cur_dsd = dsd;
+       uint16_t        used_dsds = tot_dsds;
+
+       uint32_t        prot_int;
+       uint32_t        partial;
+       struct qla2_sgx sgx;
+       dma_addr_t      sle_dma;
+       uint32_t        sle_dma_len, tot_prot_dma_len = 0;
+       struct scsi_cmnd *cmd = sp->cmd;
+
+       prot_int = cmd->device->sector_size;
+
+       memset(&sgx, 0, sizeof(struct qla2_sgx));
+       sgx.tot_bytes = scsi_bufflen(sp->cmd);
+       sgx.cur_sg = scsi_sglist(sp->cmd);
+       sgx.sp = sp;
+
+       sg_prot = scsi_prot_sglist(sp->cmd);
+
+       while (qla24xx_get_one_block_sg(prot_int, &sgx, &partial)) {
+
+               sle_dma = sgx.dma_addr;
+               sle_dma_len = sgx.dma_len;
+alloc_and_fill:
+               /* Allocate additional continuation packets? */
+               if (avail_dsds == 0) {
+                       avail_dsds = (used_dsds > QLA_DSDS_PER_IOCB) ?
+                                       QLA_DSDS_PER_IOCB : used_dsds;
+                       dsd_list_len = (avail_dsds + 1) * 12;
+                       used_dsds -= avail_dsds;
+
+                       /* allocate tracking DS */
+                       dsd_ptr = kzalloc(sizeof(struct dsd_dma), GFP_ATOMIC);
+                       if (!dsd_ptr)
+                               return 1;
+
+                       /* allocate new list */
+                       dsd_ptr->dsd_addr = next_dsd =
+                           dma_pool_alloc(ha->dl_dma_pool, GFP_ATOMIC,
+                               &dsd_ptr->dsd_list_dma);
+
+                       if (!next_dsd) {
+                               /*
+                                * Need to cleanup only this dsd_ptr, rest
+                                * will be done by sp_free_dma()
+                                */
+                               kfree(dsd_ptr);
+                               return 1;
+                       }
+
+                       list_add_tail(&dsd_ptr->list,
+                           &((struct crc_context *)sp->ctx)->dsd_list);
+
+                       sp->flags |= SRB_CRC_CTX_DSD_VALID;
+
+                       /* add new list to cmd iocb or last list */
+                       *cur_dsd++ = cpu_to_le32(LSD(dsd_ptr->dsd_list_dma));
+                       *cur_dsd++ = cpu_to_le32(MSD(dsd_ptr->dsd_list_dma));
+                       *cur_dsd++ = dsd_list_len;
+                       cur_dsd = (uint32_t *)next_dsd;
+               }
+               *cur_dsd++ = cpu_to_le32(LSD(sle_dma));
+               *cur_dsd++ = cpu_to_le32(MSD(sle_dma));
+               *cur_dsd++ = cpu_to_le32(sle_dma_len);
+               avail_dsds--;
+
+               if (partial == 0) {
+                       /* Got a full protection interval */
+                       sle_dma = sg_dma_address(sg_prot) + tot_prot_dma_len;
+                       sle_dma_len = 8;
+
+                       tot_prot_dma_len += sle_dma_len;
+                       if (tot_prot_dma_len == sg_dma_len(sg_prot)) {
+                               tot_prot_dma_len = 0;
+                               sg_prot = sg_next(sg_prot);
+                       }
+
+                       partial = 1; /* So as to not re-enter this block */
+                       goto alloc_and_fill;
+               }
+       }
+       /* Null termination */
+       *cur_dsd++ = 0;
+       *cur_dsd++ = 0;
+       *cur_dsd++ = 0;
+       return 0;
+}
  static int
  qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd,
         uint16_t tot_dsds)
@@ -981,7 +1128,7 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
         struct scsi_cmnd        *cmd;
         struct scatterlist      *cur_seg;
         int                     sgc;
-       uint32_t                total_bytes;
+       uint32_t                total_bytes = 0;
         uint32_t                data_bytes;
         uint32_t                dif_bytes;
         uint8_t                 bundling = 1;
@@ -1023,8 +1170,10 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
                     __constant_cpu_to_le16(CF_READ_DATA);
         }
  
-       tot_prot_dsds = scsi_prot_sg_count(cmd);
-       if (!tot_prot_dsds)
+       if ((scsi_get_prot_op(sp->cmd) == SCSI_PROT_READ_INSERT) ||
+           (scsi_get_prot_op(sp->cmd) == SCSI_PROT_WRITE_STRIP) ||
+           (scsi_get_prot_op(sp->cmd) == SCSI_PROT_READ_STRIP) ||
+           (scsi_get_prot_op(sp->cmd) == SCSI_PROT_WRITE_INSERT))
                 bundling = 0;
  
         /* Allocate CRC context from global pool */
@@ -1047,7 +1196,7 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
  
         INIT_LIST_HEAD(&crc_ctx_pkt->dsd_list);
  
-       qla24xx_set_t10dif_tags(cmd, (struct fw_dif_context *)
+       qla24xx_set_t10dif_tags(sp, (struct fw_dif_context *)
             &crc_ctx_pkt->ref_tag, tot_prot_dsds);
  
         cmd_pkt->crc_context_address[0] = cpu_to_le32(LSD(crc_ctx_dma));
@@ -1076,7 +1225,6 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
                 fcp_cmnd->additional_cdb_len |= 2;
  
         int_to_scsilun(sp->cmd->device->lun, &fcp_cmnd->lun);
-       host_to_fcp_swap((uint8_t *)&fcp_cmnd->lun, sizeof(fcp_cmnd->lun));
         memcpy(fcp_cmnd->cdb, cmd->cmnd, cmd->cmd_len);
         cmd_pkt->fcp_cmnd_dseg_len = cpu_to_le16(fcp_cmnd_len);
         cmd_pkt->fcp_cmnd_dseg_address[0] = cpu_to_le32(
@@ -1107,15 +1255,28 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
         cmd_pkt->fcp_rsp_dseg_len = 0; /* Let response come in status iocb */
  
         /* Compute dif len and adjust data len to incude protection */
-       total_bytes = data_bytes;
         dif_bytes = 0;
         blk_size = cmd->device->sector_size;
-       if (scsi_get_prot_op(cmd) != SCSI_PROT_NORMAL) {
-               dif_bytes = (data_bytes / blk_size) * 8;
-               total_bytes += dif_bytes;
+       dif_bytes = (data_bytes / blk_size) * 8;
+
+       switch (scsi_get_prot_op(sp->cmd)) {
+       case SCSI_PROT_READ_INSERT:
+       case SCSI_PROT_WRITE_STRIP:
+           total_bytes = data_bytes;
+           data_bytes += dif_bytes;
+           break;
+
+       case SCSI_PROT_READ_STRIP:
+       case SCSI_PROT_WRITE_INSERT:
+       case SCSI_PROT_READ_PASS:
+       case SCSI_PROT_WRITE_PASS:
+           total_bytes = data_bytes + dif_bytes;
+           break;
+       default:
+           BUG();
         }
  
-       if (!ql2xenablehba_err_chk)
+       if (!qla2x00_hba_err_chk_enabled(sp))
                 fw_prot_opts |= 0x10; /* Disable Guard tag checking */
  
         if (!bundling) {
@@ -1151,7 +1312,12 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
  
         cmd_pkt->control_flags |=
             __constant_cpu_to_le16(CF_DATA_SEG_DESCR_ENABLE);
-       if (qla24xx_walk_and_build_sglist(ha, sp, cur_dsd,
+
+       if (!bundling && tot_prot_dsds) {
+               if (qla24xx_walk_and_build_sglist_no_difb(ha, sp,
+                   cur_dsd, tot_dsds))
+                       goto crc_queuing_error;
+       } else if (qla24xx_walk_and_build_sglist(ha, sp, cur_dsd,
             (tot_dsds - tot_prot_dsds)))
                 goto crc_queuing_error;
  
@@ -1414,6 +1580,22 @@ qla24xx_dif_start_scsi(srb_t *sp)
                         goto queuing_error;
                 else
                         sp->flags |= SRB_DMA_VALID;
+
+               if ((scsi_get_prot_op(cmd) == SCSI_PROT_READ_INSERT) ||
+                   (scsi_get_prot_op(cmd) == SCSI_PROT_WRITE_STRIP)) {
+                       struct qla2_sgx sgx;
+                       uint32_t        partial;
+
+                       memset(&sgx, 0, sizeof(struct qla2_sgx));
+                       sgx.tot_bytes = scsi_bufflen(cmd);
+                       sgx.cur_sg = scsi_sglist(cmd);
+                       sgx.sp = sp;
+
+                       nseg = 0;
+                       while (qla24xx_get_one_block_sg(
+                           cmd->device->sector_size, &sgx, &partial))
+                               nseg++;
+               }
         } else
                 nseg = 0;
  
@@ -1428,6 +1610,11 @@ qla24xx_dif_start_scsi(srb_t *sp)
                         goto queuing_error;
                 else
                         sp->flags |= SRB_CRC_PROT_DMA_VALID;
+
+               if ((scsi_get_prot_op(cmd) == SCSI_PROT_READ_INSERT) ||
+                   (scsi_get_prot_op(cmd) == SCSI_PROT_WRITE_STRIP)) {
+                       nseg = scsi_bufflen(cmd) / cmd->device->sector_size;
+               }
         } else {
                 nseg = 0;
         }
@@ -1454,6 +1641,7 @@ qla24xx_dif_start_scsi(srb_t *sp)
         /* Build header part of command packet (excluding the OPCODE). */
         req->current_outstanding_cmd = handle;
         req->outstanding_cmds[handle] = sp;
+       sp->handle = handle;
         sp->cmd->host_scribble = (unsigned char *)(unsigned long)handle;
         req->cnt -= req_cnt;
  
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c

index b16b7725dee001ee791b359a270d5a8792311957..646fc5263d50808364b51e284a277dda4fce12fa 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -719,7 +719,6 @@ skip_rio:
                         vha->flags.rscn_queue_overflow = 1;
                 }
  
-               atomic_set(&vha->loop_state, LOOP_UPDATE);
                 atomic_set(&vha->loop_down_timer, 0);
                 vha->flags.management_server_logged_in = 0;
  
@@ -1435,25 +1434,27 @@ struct scsi_dif_tuple {
   * ASC/ASCQ fields in the sense buffer with ILLEGAL_REQUEST
   * to indicate to the kernel that the HBA detected error.
   */
-static inline void
+static inline int
  qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24)
  {
         struct scsi_qla_host *vha = sp->fcport->vha;
         struct scsi_cmnd *cmd = sp->cmd;
-       struct scsi_dif_tuple   *ep =
-                       (struct scsi_dif_tuple *)&sts24->data[20];
-       struct scsi_dif_tuple   *ap =
-                       (struct scsi_dif_tuple *)&sts24->data[12];
+       uint8_t         *ap = &sts24->data[12];
+       uint8_t         *ep = &sts24->data[20];
         uint32_t        e_ref_tag, a_ref_tag;
         uint16_t        e_app_tag, a_app_tag;
         uint16_t        e_guard, a_guard;
  
-       e_ref_tag = be32_to_cpu(ep->ref_tag);
-       a_ref_tag = be32_to_cpu(ap->ref_tag);
-       e_app_tag = be16_to_cpu(ep->app_tag);
-       a_app_tag = be16_to_cpu(ap->app_tag);
-       e_guard = be16_to_cpu(ep->guard);
-       a_guard = be16_to_cpu(ap->guard);
+       /*
+        * swab32 of the "data" field in the beginning of qla2x00_status_entry()
+        * would make guard field appear at offset 2
+        */
+       a_guard   = le16_to_cpu(*(uint16_t *)(ap + 2));
+       a_app_tag = le16_to_cpu(*(uint16_t *)(ap + 0));
+       a_ref_tag = le32_to_cpu(*(uint32_t *)(ap + 4));
+       e_guard   = le16_to_cpu(*(uint16_t *)(ep + 2));
+       e_app_tag = le16_to_cpu(*(uint16_t *)(ep + 0));
+       e_ref_tag = le32_to_cpu(*(uint32_t *)(ep + 4));
  
         ql_dbg(ql_dbg_io, vha, 0x3023,
             "iocb(s) %p Returned STATUS.\n", sts24);
@@ -1465,6 +1466,63 @@ qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24)
             cmd->cmnd[0], (u64)scsi_get_lba(cmd), a_ref_tag, e_ref_tag,
             a_app_tag, e_app_tag, a_guard, e_guard);
  
+       /*
+        * Ignore sector if:
+        * For type     3: ref & app tag is all 'f's
+        * For type 0,1,2: app tag is all 'f's
+        */
+       if ((a_app_tag == 0xffff) &&
+           ((scsi_get_prot_type(cmd) != SCSI_PROT_DIF_TYPE3) ||
+            (a_ref_tag == 0xffffffff))) {
+               uint32_t blocks_done, resid;
+               sector_t lba_s = scsi_get_lba(cmd);
+
+               /* 2TB boundary case covered automatically with this */
+               blocks_done = e_ref_tag - (uint32_t)lba_s + 1;
+
+               resid = scsi_bufflen(cmd) - (blocks_done *
+                   cmd->device->sector_size);
+
+               scsi_set_resid(cmd, resid);
+               cmd->result = DID_OK << 16;
+
+               /* Update protection tag */
+               if (scsi_prot_sg_count(cmd)) {
+                       uint32_t i, j = 0, k = 0, num_ent;
+                       struct scatterlist *sg;
+                       struct sd_dif_tuple *spt;
+
+                       /* Patch the corresponding protection tags */
+                       scsi_for_each_prot_sg(cmd, sg,
+                           scsi_prot_sg_count(cmd), i) {
+                               num_ent = sg_dma_len(sg) / 8;
+                               if (k + num_ent < blocks_done) {
+                                       k += num_ent;
+                                       continue;
+                               }
+                               j = blocks_done - k - 1;
+                               k = blocks_done;
+                               break;
+                       }
+
+                       if (k != blocks_done) {
+                               qla_printk(KERN_WARNING, sp->fcport->vha->hw,
+                                   "unexpected tag values tag:lba=%x:%lx)\n",
+                                   e_ref_tag, lba_s);
+                               return 1;
+                       }
+
+                       spt = page_address(sg_page(sg)) + sg->offset;
+                       spt += j;
+
+                       spt->app_tag = 0xffff;
+                       if (scsi_get_prot_type(cmd) == SCSI_PROT_DIF_TYPE3)
+                               spt->ref_tag = 0xffffffff;
+               }
+
+               return 0;
+       }
+
         /* check guard */
         if (e_guard != a_guard) {
                 scsi_build_sense_buffer(1, cmd->sense_buffer, ILLEGAL_REQUEST,
@@ -1472,28 +1530,30 @@ qla2x00_handle_dif_error(srb_t *sp, struct sts_entry_24xx *sts24)
                 set_driver_byte(cmd, DRIVER_SENSE);
                 set_host_byte(cmd, DID_ABORT);
                 cmd->result |= SAM_STAT_CHECK_CONDITION << 1;
-               return;
+               return 1;
         }
  
-       /* check appl tag */
-       if (e_app_tag != a_app_tag) {
+       /* check ref tag */
+       if (e_ref_tag != a_ref_tag) {
                 scsi_build_sense_buffer(1, cmd->sense_buffer, ILLEGAL_REQUEST,
-                   0x10, 0x2);
+                   0x10, 0x3);
                 set_driver_byte(cmd, DRIVER_SENSE);
                 set_host_byte(cmd, DID_ABORT);
                 cmd->result |= SAM_STAT_CHECK_CONDITION << 1;
-               return;
+               return 1;
         }
  
-       /* check ref tag */
-       if (e_ref_tag != a_ref_tag) {
+       /* check appl tag */
+       if (e_app_tag != a_app_tag) {
                 scsi_build_sense_buffer(1, cmd->sense_buffer, ILLEGAL_REQUEST,
-                   0x10, 0x3);
+                   0x10, 0x2);
                 set_driver_byte(cmd, DRIVER_SENSE);
                 set_host_byte(cmd, DID_ABORT);
                 cmd->result |= SAM_STAT_CHECK_CONDITION << 1;
-               return;
+               return 1;
         }
+
+       return 1;
  }
  
  /**
@@ -1767,7 +1827,7 @@ check_scsi_status:
                 break;
  
         case CS_DIF_ERROR:
-               qla2x00_handle_dif_error(sp, sts24);
+               logit = qla2x00_handle_dif_error(sp, sts24);
                 break;
         default:
                 cp->result = DID_ERROR << 16;
@@ -2468,11 +2528,10 @@ qla2x00_request_irqs(struct qla_hw_data *ha, struct rsp_que *rsp)
                 goto skip_msi;
         }
  
-       if (IS_QLA2432(ha) && (ha->pdev->revision < QLA_MSIX_CHIP_REV_24XX ||
-               !QLA_MSIX_FW_MODE_1(ha->fw_attributes))) {
+       if (IS_QLA2432(ha) && (ha->pdev->revision < QLA_MSIX_CHIP_REV_24XX)) {
                 ql_log(ql_log_warn, vha, 0x0035,
                     "MSI-X; Unsupported ISP2432 (0x%X, 0x%X).\n",
-                   ha->pdev->revision, ha->fw_attributes);
+                   ha->pdev->revision, QLA_MSIX_CHIP_REV_24XX);
                 goto skip_msix;
         }
  
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c

index c706ed370000390f8450256aa2427817800e8430..f488cc69fc79fe552a7c1022a84bec1a0233ae63 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -472,7 +472,7 @@ qla24xx_create_vhost(struct fc_vport *fc_vport)
         host->can_queue = base_vha->req->length + 128;
         host->this_id = 255;
         host->cmd_per_lun = 3;
-       if ((IS_QLA25XX(ha) || IS_QLA81XX(ha)) && ql2xenabledif)
+       if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif)
                 host->max_cmd_len = 32;
         else
                 host->max_cmd_len = MAX_CMDSZ;
diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c

index 5cbf33a50b14415eed6b5101cba7f2d17080fccc..049807cda4199781eaa6c5cbd993d4451d14bc03 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_nx.c
+++ b/drivers/scsi/qla2xxx/qla_nx.c
@@ -2208,6 +2208,7 @@ qla82xx_msix_rsp_q(int irq, void *dev_id)
         struct qla_hw_data *ha;
         struct rsp_que *rsp;
         struct device_reg_82xx __iomem *reg;
+       unsigned long flags;
  
         rsp = (struct rsp_que *) dev_id;
         if (!rsp) {
@@ -2218,11 +2219,11 @@ qla82xx_msix_rsp_q(int irq, void *dev_id)
  
         ha = rsp->hw;
         reg = &ha->iobase->isp82;
-       spin_lock_irq(&ha->hardware_lock);
+       spin_lock_irqsave(&ha->hardware_lock, flags);
         vha = pci_get_drvdata(ha->pdev);
         qla24xx_process_response_queue(vha, rsp);
         WRT_REG_DWORD(&reg->host_int, 0);
-       spin_unlock_irq(&ha->hardware_lock);
+       spin_unlock_irqrestore(&ha->hardware_lock, flags);
         return IRQ_HANDLED;
  }
  
@@ -2838,6 +2839,16 @@ sufficient_dsds:
                 int_to_scsilun(sp->cmd->device->lun, &cmd_pkt->lun);
                 host_to_fcp_swap((uint8_t *)&cmd_pkt->lun, sizeof(cmd_pkt->lun));
  
+               /* build FCP_CMND IU */
+               memset(ctx->fcp_cmnd, 0, sizeof(struct fcp_cmnd));
+               int_to_scsilun(sp->cmd->device->lun, &ctx->fcp_cmnd->lun);
+               ctx->fcp_cmnd->additional_cdb_len = additional_cdb_len;
+
+               if (cmd->sc_data_direction == DMA_TO_DEVICE)
+                       ctx->fcp_cmnd->additional_cdb_len |= 1;
+               else if (cmd->sc_data_direction == DMA_FROM_DEVICE)
+                       ctx->fcp_cmnd->additional_cdb_len |= 2;
+
                 /*
                  * Update tagged queuing modifier -- default is TSK_SIMPLE (0).
                  */
@@ -2854,16 +2865,6 @@ sufficient_dsds:
                         }
                 }
  
-               /* build FCP_CMND IU */
-               memset(ctx->fcp_cmnd, 0, sizeof(struct fcp_cmnd));
-               int_to_scsilun(sp->cmd->device->lun, &ctx->fcp_cmnd->lun);
-               ctx->fcp_cmnd->additional_cdb_len = additional_cdb_len;
-
-               if (cmd->sc_data_direction == DMA_TO_DEVICE)
-                       ctx->fcp_cmnd->additional_cdb_len |= 1;
-               else if (cmd->sc_data_direction == DMA_FROM_DEVICE)
-                       ctx->fcp_cmnd->additional_cdb_len |= 2;
-
                 memcpy(ctx->fcp_cmnd->cdb, cmd->cmnd, cmd->cmd_len);
  
                 fcp_dl = (uint32_t *)(ctx->fcp_cmnd->cdb + 16 +
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c

index e02df276804ed25e119ca06d3ce7bf6f159d0876..4cace3f20c04f5f0170db61fb6ec5f693a47922b 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -106,17 +106,21 @@ MODULE_PARM_DESC(ql2xmaxqdepth,
                 "Maximum queue depth to report for target devices.");
  
  /* Do not change the value of this after module load */
-int ql2xenabledif = 1;
+int ql2xenabledif = 0;
  module_param(ql2xenabledif, int, S_IRUGO|S_IWUSR);
  MODULE_PARM_DESC(ql2xenabledif,
                 " Enable T10-CRC-DIF "
-               " Default is 0 - No DIF Support. 1 - Enable it");
+               " Default is 0 - No DIF Support. 1 - Enable it"
+               ", 2 - Enable DIF for all types, except Type 0.");
  
-int ql2xenablehba_err_chk;
+int ql2xenablehba_err_chk = 2;
  module_param(ql2xenablehba_err_chk, int, S_IRUGO|S_IWUSR);
  MODULE_PARM_DESC(ql2xenablehba_err_chk,
-               " Enable T10-CRC-DIF Error isolation by HBA"
-               " Default is 0 - Error isolation disabled, 1 - Enable it");
+               " Enable T10-CRC-DIF Error isolation by HBA:\n"
+               " Default is 1.\n"
+               "  0 -- Error isolation disabled\n"
+               "  1 -- Error isolation enabled only for DIX Type 0\n"
+               "  2 -- Error isolation enabled for all Types\n");
  
  int ql2xiidmaenable=1;
  module_param(ql2xiidmaenable, int, S_IRUGO);
@@ -909,7 +913,14 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
                     "Abort command mbx success.\n");
                 wait = 1;
         }
+
+       spin_lock_irqsave(&ha->hardware_lock, flags);
         qla2x00_sp_compl(ha, sp);
+       spin_unlock_irqrestore(&ha->hardware_lock, flags);
+
+       /* Did the command return during mailbox execution? */
+       if (ret == FAILED && !CMD_SP(cmd))
+               ret = SUCCESS;
  
         /* Wait for the command to be returned. */
         if (wait) {
@@ -2251,7 +2262,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
         host->this_id = 255;
         host->cmd_per_lun = 3;
         host->unique_id = host->host_no;
-       if ((IS_QLA25XX(ha) || IS_QLA81XX(ha)) && ql2xenabledif)
+       if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif)
                 host->max_cmd_len = 32;
         else
                 host->max_cmd_len = MAX_CMDSZ;
@@ -2378,13 +2389,16 @@ skip_dpc:
             "Detected hba at address=%p.\n",
             ha);
  
-       if ((IS_QLA25XX(ha) || IS_QLA81XX(ha)) && ql2xenabledif) {
+       if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif) {
                 if (ha->fw_attributes & BIT_4) {
+                       int prot = 0;
                         base_vha->flags.difdix_supported = 1;
                         ql_dbg(ql_dbg_init, base_vha, 0x00f1,
                             "Registering for DIF/DIX type 1 and 3 protection.\n");
+                       if (ql2xenabledif == 1)
+                               prot = SHOST_DIX_TYPE0_PROTECTION;
                         scsi_host_set_prot(host,
-                           SHOST_DIF_TYPE1_PROTECTION
+                           prot | SHOST_DIF_TYPE1_PROTECTION
                             | SHOST_DIF_TYPE2_PROTECTION
                             | SHOST_DIF_TYPE3_PROTECTION
                             | SHOST_DIX_TYPE1_PROTECTION
diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h

index 062c97bf62f57b9a959fcb486915ec2570068349..13b6357c1fa2ae297c811b7588b3fb1172f4dd3f 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_version.h
+++ b/drivers/scsi/qla2xxx/qla_version.h
@@ -7,7 +7,7 @@
  /*
   * Driver version
   */
-#define QLA2XXX_VERSION      "8.03.07.03-k"
+#define QLA2XXX_VERSION      "8.03.07.07-k"
  
  #define QLA_DRIVER_MAJOR_VER   8
  #define QLA_DRIVER_MINOR_VER   3
diff --git a/drivers/scsi/qla4xxx/Kconfig b/drivers/scsi/qla4xxx/Kconfig

index 2c33ce6eac1ee57ed2fbcf26832fa25835502340..0f5599e0abf6a6d362e0f94312faaa7abe3ea2a7 100644 (file)
--- a/drivers/scsi/qla4xxx/Kconfig
+++ b/drivers/scsi/qla4xxx/Kconfig
@@ -1,6 +1,6 @@
  config SCSI_QLA_ISCSI
         tristate "QLogic ISP4XXX and ISP82XX host adapter family support"
-       depends on PCI && SCSI
+       depends on PCI && SCSI && NET
         select SCSI_ISCSI_ATTRS
         ---help---
         This driver supports the QLogic 40xx (ISP4XXX) and 8022 (ISP82XX)
diff --git a/drivers/sh/intc/chip.c b/drivers/sh/intc/chip.c

index f33e2dd979348f4b78560d15b2f8e9f9d33ef09e..33b2ed451e095dde15bedd14ce04ded170e17fd4 100644 (file)
--- a/drivers/sh/intc/chip.c
+++ b/drivers/sh/intc/chip.c
@@ -186,6 +186,9 @@ static unsigned char intc_irq_sense_table[IRQ_TYPE_SENSE_MASK + 1] = {
      !defined(CONFIG_CPU_SUBTYPE_SH7709)
         [IRQ_TYPE_LEVEL_HIGH] = VALID(3),
  #endif
+#if defined(CONFIG_ARCH_SH7372)
+       [IRQ_TYPE_EDGE_BOTH] = VALID(4),
+#endif
  };
  
  static int intc_set_type(struct irq_data *data, unsigned int type)
diff --git a/drivers/staging/ath6kl/miscdrv/ar3kps/ar3kpsparser.c b/drivers/staging/ath6kl/miscdrv/ar3kps/ar3kpsparser.c

index c01c0cb0af4e83ced297d74b04b44b89d53733e7..b99a11a9dd6930d4978e062ebb570de09ebf3f03 100644 (file)
--- a/drivers/staging/ath6kl/miscdrv/ar3kps/ar3kpsparser.c
+++ b/drivers/staging/ath6kl/miscdrv/ar3kps/ar3kpsparser.c
@@ -812,7 +812,7 @@ int AthCreateCommandList(struct ps_cmd_packet **HciPacketList, u32 *numPackets)
      for(count = 0; count < Patch_Count; count++) {
  
          AR_DEBUG_PRINTF(ATH_DEBUG_ERR,("Freeing Patch Buffer %d \r\n",count));
-        kfree(RamPatch[Patch_Count].Data);
+       kfree(RamPatch[count].Data);
      }
  
      for(count = 0; count < Tag_Count; count++) {
diff --git a/drivers/staging/brcm80211/brcmsmac/otp.c b/drivers/staging/brcm80211/brcmsmac/otp.c

index 34253cf37812a60c190dbc2f5143623f6ba9f354..4a70180eba5d71060358edb08c18bce98e9b9b18 100644 (file)
--- a/drivers/staging/brcm80211/brcmsmac/otp.c
+++ b/drivers/staging/brcm80211/brcmsmac/otp.c
@@ -16,6 +16,7 @@
  
  #include <linux/io.h>
  #include <linux/errno.h>
+#include <linux/string.h>
  
  #include <brcm_hw_ids.h>
  #include <chipcommon.h>
diff --git a/drivers/staging/brcm80211/brcmsmac/types.h b/drivers/staging/brcm80211/brcmsmac/types.h

index bbf21897ae0e32137c7f38bbabf5159a31b4cda7..823b5e4672e29fbae23e7bab1175e94154c1b9d6 100644 (file)
--- a/drivers/staging/brcm80211/brcmsmac/types.h
+++ b/drivers/staging/brcm80211/brcmsmac/types.h
@@ -18,6 +18,7 @@
  #define _BRCM_TYPES_H_
  
  #include <linux/types.h>
+#include <linux/io.h>
  
  /* Bus types */
  #define        SI_BUS                  0       /* SOC Interconnect */
diff --git a/drivers/staging/comedi/drivers/ni_labpc.c b/drivers/staging/comedi/drivers/ni_labpc.c

index 6859af0778cfdba3b6cecd322e79a566884496e2..7611def97d06e308fb7e92e62d19b73ede8a105b 100644 (file)
--- a/drivers/staging/comedi/drivers/ni_labpc.c
+++ b/drivers/staging/comedi/drivers/ni_labpc.c
@@ -241,8 +241,10 @@ static int labpc_eeprom_write_insn(struct comedi_device *dev,
                                    struct comedi_insn *insn,
                                    unsigned int *data);
  static void labpc_adc_timing(struct comedi_device *dev, struct comedi_cmd *cmd);
-#ifdef CONFIG_COMEDI_PCI
+#ifdef CONFIG_ISA_DMA_API
  static unsigned int labpc_suggest_transfer_size(struct comedi_cmd cmd);
+#endif
+#ifdef CONFIG_COMEDI_PCI
  static int labpc_find_device(struct comedi_device *dev, int bus, int slot);
  #endif
  static int labpc_dio_mem_callback(int dir, int port, int data,
diff --git a/drivers/staging/dt3155v4l/dt3155v4l.c b/drivers/staging/dt3155v4l/dt3155v4l.c

index fe02d22274b4d21b3f5cf69943fd49fe4a9dc8b0..05aa41cf875bb9df300bf071c2202fbb9a45d46b 100644 (file)
--- a/drivers/staging/dt3155v4l/dt3155v4l.c
+++ b/drivers/staging/dt3155v4l/dt3155v4l.c
@@ -22,6 +22,7 @@
  #include <linux/stringify.h>
  #include <linux/delay.h>
  #include <linux/kthread.h>
+#include <linux/slab.h>
  #include <media/v4l2-dev.h>
  #include <media/v4l2-ioctl.h>
  #include <media/videobuf2-dma-contig.h>
diff --git a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_proc.c b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_proc.c

index 627a98b4ec308095b4a197184914b64d611b9a6c..9e728b3415e324ca9edaddc7416fe119c60a7ef6 100644 (file)
--- a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_proc.c
+++ b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_proc.c
@@ -22,6 +22,7 @@
  #include <linux/string.h>
  #include <linux/vmalloc.h>
  #include <linux/netdevice.h>
+#include <asm/io.h>
  #include <asm/uaccess.h>
  #include "ft1000.h"
  
diff --git a/drivers/staging/gma500/gem_glue.c b/drivers/staging/gma500/gem_glue.c

index 779ac1a12d24909cf9940794934a5ed5f55dbd30..daac121206535cd8161a4e87520d21c0a173be20 100644 (file)
--- a/drivers/staging/gma500/gem_glue.c
+++ b/drivers/staging/gma500/gem_glue.c
@@ -20,26 +20,6 @@
  #include <drm/drmP.h>
  #include <drm/drm.h>
  
-/**
- * Initialize an already allocated GEM object of the specified size with
- * no GEM provided backing store. Instead the caller is responsible for
- * backing the object and handling it.
- */
-int drm_gem_private_object_init(struct drm_device *dev,
-                       struct drm_gem_object *obj, size_t size)
-{
-       BUG_ON((size & (PAGE_SIZE - 1)) != 0);
-
-       obj->dev = dev;
-       obj->filp = NULL;
-
-       kref_init(&obj->refcount);
-       atomic_set(&obj->handle_count, 0);
-       obj->size = size;
-
-       return 0;
-}
-
  void drm_gem_object_release_wrap(struct drm_gem_object *obj)
  {
         /* Remove the list map if one is present */
@@ -51,8 +31,7 @@ void drm_gem_object_release_wrap(struct drm_gem_object *obj)
                 kfree(list->map);
                 list->map = NULL;
         }
-       if (obj->filp)
-               drm_gem_object_release(obj);
+       drm_gem_object_release(obj);
  }
  
  /**
diff --git a/drivers/staging/gma500/gem_glue.h b/drivers/staging/gma500/gem_glue.h

index a0f2bc4e4ae709ad7cfd39944ebf41fbfda5393b..ce5ce30f74dbff75ea343c0936795311d8ae40fe 100644 (file)
--- a/drivers/staging/gma500/gem_glue.h
+++ b/drivers/staging/gma500/gem_glue.h
@@ -1,4 +1,2 @@
  extern void drm_gem_object_release_wrap(struct drm_gem_object *obj);
-extern int drm_gem_private_object_init(struct drm_device *dev,
-                       struct drm_gem_object *obj, size_t size);
  extern int gem_create_mmap_offset(struct drm_gem_object *obj);
diff --git a/drivers/staging/gma500/mdfld_dsi_dbi.c b/drivers/staging/gma500/mdfld_dsi_dbi.c

index 02e17c9c8637e5d3b61a63214b65dae28adb6afa..fd211f3467c41931904bcb155bbd98d0c00e1af4 100644 (file)
--- a/drivers/staging/gma500/mdfld_dsi_dbi.c
+++ b/drivers/staging/gma500/mdfld_dsi_dbi.c
@@ -711,10 +711,11 @@ struct mdfld_dsi_encoder *mdfld_dsi_dbi_init(struct drm_device *dev,
         /* Create drm encoder object */
         connector = &dsi_connector->base.base;
         encoder = &dbi_output->base.base;
+       /* Review this if we ever get MIPI-HDMI bridges or similar */
         drm_encoder_init(dev,
                         encoder,
                         p_funcs->encoder_funcs,
-                       DRM_MODE_ENCODER_MIPI);
+                       DRM_MODE_ENCODER_LVDS);
         drm_encoder_helper_add(encoder, p_funcs->encoder_helper_funcs);
  
         /* Attach to given connector */
diff --git a/drivers/staging/gma500/mdfld_dsi_dbi.h b/drivers/staging/gma500/mdfld_dsi_dbi.h

index dc6242c51d0bc69f3332313a617b8c6ce54d9f88..f0fa986fd9341923ec7bbb99eeca1859a48f5271 100644 (file)
--- a/drivers/staging/gma500/mdfld_dsi_dbi.h
+++ b/drivers/staging/gma500/mdfld_dsi_dbi.h
@@ -42,9 +42,6 @@
  #include "mdfld_dsi_output.h"
  #include "mdfld_output.h"
  
-#define DRM_MODE_ENCODER_MIPI  5
-
-
  /*
   * DBI encoder which inherits from mdfld_dsi_encoder
   */
diff --git a/drivers/staging/gma500/mdfld_dsi_dpi.c b/drivers/staging/gma500/mdfld_dsi_dpi.c

index 6e03a91e947ea6cd5687f6eeca9a59dbd90df96e..e685f1217baa89d303243daec92e158d43ffca7d 100644 (file)
--- a/drivers/staging/gma500/mdfld_dsi_dpi.c
+++ b/drivers/staging/gma500/mdfld_dsi_dpi.c
@@ -777,10 +777,15 @@ struct mdfld_dsi_encoder *mdfld_dsi_dpi_init(struct drm_device *dev,
         /* Create drm encoder object */
         connector = &dsi_connector->base.base;
         encoder = &dpi_output->base.base;
+       /*
+        * On existing hardware this will be a panel of some form,
+        * if future devices also have HDMI bridges this will need
+        * revisiting
+        */
         drm_encoder_init(dev,
                         encoder,
                         p_funcs->encoder_funcs,
-                       DRM_MODE_ENCODER_MIPI);
+                       DRM_MODE_ENCODER_LVDS);
         drm_encoder_helper_add(encoder,
                                 p_funcs->encoder_helper_funcs);
         
diff --git a/drivers/staging/gma500/mdfld_dsi_output.c b/drivers/staging/gma500/mdfld_dsi_output.c

index 7536095c30a0653c566dafa788020920a6461ea7..9050c0f78b15b1c964ed7f35a576bda9f9bdf96d 100644 (file)
--- a/drivers/staging/gma500/mdfld_dsi_output.c
+++ b/drivers/staging/gma500/mdfld_dsi_output.c
@@ -955,7 +955,9 @@ void mdfld_dsi_output_init(struct drm_device *dev,
         psb_output->type = (pipe == 0) ? INTEL_OUTPUT_MIPI : INTEL_OUTPUT_MIPI2;
  
         connector = &psb_output->base;
-       drm_connector_init(dev, connector, &mdfld_dsi_connector_funcs, DRM_MODE_CONNECTOR_MIPI);
+       /* Revisit type if MIPI/HDMI bridges ever appear on Medfield */
+       drm_connector_init(dev, connector, &mdfld_dsi_connector_funcs,
+                                               DRM_MODE_CONNECTOR_LVDS);
         drm_connector_helper_add(connector, &mdfld_dsi_connector_helper_funcs);
         
         connector->display_info.subpixel_order = SubPixelHorizontalRGB;
diff --git a/drivers/staging/gma500/medfield.h b/drivers/staging/gma500/medfield.h

index 38165e8367e5943f61485cad6409363d8c71a2fa..09e9687431f16a634de396ec0b83ae0bb0e78f81 100644 (file)
--- a/drivers/staging/gma500/medfield.h
+++ b/drivers/staging/gma500/medfield.h
@@ -21,8 +21,6 @@
   * DEALINGS IN THE SOFTWARE.
   */
  
-#define DRM_MODE_ENCODER_MIPI  5
-
  /* Medfield DSI controller registers */
  
  #define MIPIA_DEVICE_READY_REG                         0xb000
diff --git a/drivers/staging/gma500/psb_drv.h b/drivers/staging/gma500/psb_drv.h

index 72f487a2a1b7a9d95e5e5323c8d634f776d5ed54..fd4732dd783a81c945d7109f9233a395d0a25e91 100644 (file)
--- a/drivers/staging/gma500/psb_drv.h
+++ b/drivers/staging/gma500/psb_drv.h
@@ -35,7 +35,6 @@
  
  /* Append new drm mode definition here, align with libdrm definition */
  #define DRM_MODE_SCALE_NO_SCALE        2
-#define DRM_MODE_CONNECTOR_MIPI         15
  
  enum {
         CHIP_PSB_8108 = 0,              /* Poulsbo */
diff --git a/drivers/staging/hv/blkvsc_drv.c b/drivers/staging/hv/blkvsc_drv.c

index 3612574ca52087c6a192a5ab061479d7403e08ab..d286b2223181623d284a8bd9b17ecce5445a899c 100644 (file)
--- a/drivers/staging/hv/blkvsc_drv.c
+++ b/drivers/staging/hv/blkvsc_drv.c
@@ -325,7 +325,7 @@ static int blkvsc_do_operation(struct block_device_context *blkdev,
  
         page_buf = alloc_page(GFP_KERNEL);
         if (!page_buf) {
-               kmem_cache_free(blkvsc_req->dev->request_pool, blkvsc_req);
+               kmem_cache_free(blkdev->request_pool, blkvsc_req);
                 return -ENOMEM;
         }
  
@@ -422,7 +422,7 @@ cleanup:
  
         __free_page(page_buf);
  
-       kmem_cache_free(blkvsc_req->dev->request_pool, blkvsc_req);
+       kmem_cache_free(blkdev->request_pool, blkvsc_req);
  
         return ret;
  }
diff --git a/drivers/staging/iio/accel/adis16203_core.c b/drivers/staging/iio/accel/adis16203_core.c

index bf1988884e93e1e22807eaea7d212f0ba1712520..cf5d15da76ada1038b3fc31baa8863a626bdae8b 100644 (file)
--- a/drivers/staging/iio/accel/adis16203_core.c
+++ b/drivers/staging/iio/accel/adis16203_core.c
@@ -311,13 +311,17 @@ static int adis16203_read_raw(struct iio_dev *indio_dev,
                 mutex_lock(&indio_dev->mlock);
                 addr = adis16203_addresses[chan->address][0];
                 ret = adis16203_spi_read_reg_16(indio_dev, addr, &val16);
-               if (ret)
+               if (ret) {
+                       mutex_unlock(&indio_dev->mlock);
                         return ret;
+               }
  
                 if (val16 & ADIS16203_ERROR_ACTIVE) {
                         ret = adis16203_check_status(indio_dev);
-                       if (ret)
+                       if (ret) {
+                               mutex_unlock(&indio_dev->mlock);
                                 return ret;
+                       }
                 }
                 val16 = val16 & ((1 << chan->scan_type.realbits) - 1);
                 if (chan->scan_type.sign == 's')
diff --git a/drivers/staging/iio/accel/adis16204_core.c b/drivers/staging/iio/accel/adis16204_core.c

index cfd09b3b9937e2d5e657222cce944cab87a2c2f1..3e2b62654b7d42e89ca8592b11e76dedc45ce1aa 100644 (file)
--- a/drivers/staging/iio/accel/adis16204_core.c
+++ b/drivers/staging/iio/accel/adis16204_core.c
@@ -341,13 +341,17 @@ static int adis16204_read_raw(struct iio_dev *indio_dev,
                 mutex_lock(&indio_dev->mlock);
                 addr = adis16204_addresses[chan->address][0];
                 ret = adis16204_spi_read_reg_16(indio_dev, addr, &val16);
-               if (ret)
+               if (ret) {
+                       mutex_unlock(&indio_dev->mlock);
                         return ret;
+               }
  
                 if (val16 & ADIS16204_ERROR_ACTIVE) {
                         ret = adis16204_check_status(indio_dev);
-                       if (ret)
+                       if (ret) {
+                               mutex_unlock(&indio_dev->mlock);
                                 return ret;
+                       }
                 }
                 val16 = val16 & ((1 << chan->scan_type.realbits) - 1);
                 if (chan->scan_type.sign == 's')
diff --git a/drivers/staging/iio/accel/adis16209_core.c b/drivers/staging/iio/accel/adis16209_core.c

index 55f3a7bcaf0a67b8603de9ee0c7d6bbb5cfa7439..bec1fa8de9b9eb3aab1b0d021d0d193245e9537d 100644 (file)
--- a/drivers/staging/iio/accel/adis16209_core.c
+++ b/drivers/staging/iio/accel/adis16209_core.c
@@ -337,13 +337,17 @@ static int adis16209_read_raw(struct iio_dev *indio_dev,
                 mutex_lock(&indio_dev->mlock);
                 addr = adis16209_addresses[chan->address][0];
                 ret = adis16209_spi_read_reg_16(indio_dev, addr, &val16);
-               if (ret)
+               if (ret) {
+                       mutex_unlock(&indio_dev->mlock);
                         return ret;
+               }
  
                 if (val16 & ADIS16209_ERROR_ACTIVE) {
                         ret = adis16209_check_status(indio_dev);
-                       if (ret)
+                       if (ret) {
+                               mutex_unlock(&indio_dev->mlock);
                                 return ret;
+                       }
                 }
                 val16 = val16 & ((1 << chan->scan_type.realbits) - 1);
                 if (chan->scan_type.sign == 's')
diff --git a/drivers/staging/iio/accel/adis16240_core.c b/drivers/staging/iio/accel/adis16240_core.c

index 4a4eafc58630998d96c2bfdb0d96e3288636ca77..aee8b69173c440f581774738c7a51b5ae1c1e46b 100644 (file)
--- a/drivers/staging/iio/accel/adis16240_core.c
+++ b/drivers/staging/iio/accel/adis16240_core.c
@@ -370,13 +370,17 @@ static int adis16240_read_raw(struct iio_dev *indio_dev,
                 mutex_lock(&indio_dev->mlock);
                 addr = adis16240_addresses[chan->address][0];
                 ret = adis16240_spi_read_reg_16(indio_dev, addr, &val16);
-               if (ret)
+               if (ret) {
+                       mutex_unlock(&indio_dev->mlock);
                         return ret;
+               }
  
                 if (val16 & ADIS16240_ERROR_ACTIVE) {
                         ret = adis16240_check_status(indio_dev);
-                       if (ret)
+                       if (ret) {
+                               mutex_unlock(&indio_dev->mlock);
                                 return ret;
+                       }
                 }
                 val16 = val16 & ((1 << chan->scan_type.realbits) - 1);
                 if (chan->scan_type.sign == 's')
diff --git a/drivers/staging/iio/gyro/adis16260_core.c b/drivers/staging/iio/gyro/adis16260_core.c

index 05797f404beaa7aded9702524b55b743e6c68a94..f2d43cfcc4937b6c15402f80341eb57fd1cdfe21 100644 (file)
--- a/drivers/staging/iio/gyro/adis16260_core.c
+++ b/drivers/staging/iio/gyro/adis16260_core.c
@@ -446,13 +446,17 @@ static int adis16260_read_raw(struct iio_dev *indio_dev,
                 mutex_lock(&indio_dev->mlock);
                 addr = adis16260_addresses[chan->address][0];
                 ret = adis16260_spi_read_reg_16(indio_dev, addr, &val16);
-               if (ret)
+               if (ret) {
+                       mutex_unlock(&indio_dev->mlock);
                         return ret;
+               }
  
                 if (val16 & ADIS16260_ERROR_ACTIVE) {
                         ret = adis16260_check_status(indio_dev);
-                       if (ret)
+                       if (ret) {
+                               mutex_unlock(&indio_dev->mlock);
                                 return ret;
+                       }
                 }
                 val16 = val16 & ((1 << chan->scan_type.realbits) - 1);
                 if (chan->scan_type.sign == 's')
diff --git a/drivers/staging/nvec/TODO b/drivers/staging/nvec/TODO

index 77b47f763f22436d6cbeb44d684854c8c302ede6..649d6b70deaabc0d5d06a60bd4777e628a5675a5 100644 (file)
--- a/drivers/staging/nvec/TODO
+++ b/drivers/staging/nvec/TODO
@@ -4,5 +4,7 @@ ToDo list (incomplete, unordered)
         - add compile as module support
         - move nvec devices to mfd cells?
         - adjust to kernel style
-
-
+       - fix clk usage
+         should not be using clk_get_sys(), but clk_get(&pdev->dev, conn)
+         where conn is either NULL if the device only has one clock, or
+         the device specific name if it has multiple clocks.
diff --git a/drivers/staging/octeon/ethernet-rgmii.c b/drivers/staging/octeon/ethernet-rgmii.c

index 9c0d2936e4862e2d3c8888fa8618e12416966212..c3d73f8431ae88f3cc7604ac603aabd36ae26a29 100644 (file)
--- a/drivers/staging/octeon/ethernet-rgmii.c
+++ b/drivers/staging/octeon/ethernet-rgmii.c
@@ -26,6 +26,7 @@
  **********************************************************************/
  #include <linux/kernel.h>
  #include <linux/netdevice.h>
+#include <linux/interrupt.h>
  #include <linux/phy.h>
  #include <linux/ratelimit.h>
  #include <net/dst.h>
diff --git a/drivers/staging/octeon/ethernet-spi.c b/drivers/staging/octeon/ethernet-spi.c

index 970825421884d44526385d7782d4faa40207501c..d0e2d514968a6f864f5fbda312a854e2a1cc414c 100644 (file)
--- a/drivers/staging/octeon/ethernet-spi.c
+++ b/drivers/staging/octeon/ethernet-spi.c
@@ -26,6 +26,7 @@
  **********************************************************************/
  #include <linux/kernel.h>
  #include <linux/netdevice.h>
+#include <linux/interrupt.h>
  #include <net/dst.h>
  
  #include <asm/octeon/octeon.h>
diff --git a/drivers/staging/rtl8192u/r819xU_firmware.c b/drivers/staging/rtl8192u/r819xU_firmware.c

index 6766f468639f539fba5d6250fb7861d0690da8b6..4bb5fffca5b9a0d54c5191a412c060ca3233d0e0 100644 (file)
--- a/drivers/staging/rtl8192u/r819xU_firmware.c
+++ b/drivers/staging/rtl8192u/r819xU_firmware.c
@@ -399,10 +399,7 @@ download_firmware_fail:
  
  }
  
-
-
-
-
-
-
+MODULE_FIRMWARE("RTL8192U/boot.img");
+MODULE_FIRMWARE("RTL8192U/main.img");
+MODULE_FIRMWARE("RTL8192U/data.img");
  
diff --git a/drivers/staging/rts_pstor/rtsx.c b/drivers/staging/rts_pstor/rtsx.c

index 5ff59f27d101cf341f6660b98fb2945482957a4f..16c73fbff51fe52d9bd3b11d5e91a39a1654c573 100644 (file)
--- a/drivers/staging/rts_pstor/rtsx.c
+++ b/drivers/staging/rts_pstor/rtsx.c
@@ -66,12 +66,6 @@ static int msi_en;
  module_param(msi_en, int, S_IRUGO | S_IWUSR);
  MODULE_PARM_DESC(msi_en, "enable msi");
  
-/* These are used to make sure the module doesn't unload before all the
- * threads have exited.
- */
-static atomic_t total_threads = ATOMIC_INIT(0);
-static DECLARE_COMPLETION(threads_gone);
-
  static irqreturn_t rtsx_interrupt(int irq, void *dev_id);
  
  /***********************************************************************
@@ -192,7 +186,7 @@ static int queuecommand_lck(struct scsi_cmnd *srb,
         /* enqueue the command and wake up the control thread */
         srb->scsi_done = done;
         chip->srb = srb;
-       up(&(dev->sema));
+       complete(&dev->cmnd_ready);
  
         return 0;
  }
@@ -475,7 +469,7 @@ static int rtsx_control_thread(void *__dev)
         current->flags |= PF_NOFREEZE;
  
         for (;;) {
-               if (down_interruptible(&dev->sema))
+               if (wait_for_completion_interruptible(&dev->cmnd_ready))
                         break;
  
                 /* lock the device pointers */
@@ -557,8 +551,6 @@ SkipForAbort:
                 mutex_unlock(&dev->dev_mutex);
         } /* for (;;) */
  
-       scsi_host_put(host);
-
         /* notify the exit routine that we're actually exiting now
          *
          * complete()/wait_for_completion() is similar to up()/down(),
@@ -573,7 +565,7 @@ SkipForAbort:
          * This is important in preemption kernels, which transfer the flow
          * of execution immediately upon a complete().
          */
-       complete_and_exit(&threads_gone, 0);
+       complete_and_exit(&dev->control_exit, 0);
  }
  
  
@@ -581,7 +573,6 @@ static int rtsx_polling_thread(void *__dev)
  {
         struct rtsx_dev *dev = (struct rtsx_dev *)__dev;
         struct rtsx_chip *chip = dev->chip;
-       struct Scsi_Host *host = rtsx_to_host(dev);
         struct sd_info *sd_card = &(chip->sd_card);
         struct xd_info *xd_card = &(chip->xd_card);
         struct ms_info *ms_card = &(chip->ms_card);
@@ -621,8 +612,7 @@ static int rtsx_polling_thread(void *__dev)
                 mutex_unlock(&dev->dev_mutex);
         }
  
-       scsi_host_put(host);
-       complete_and_exit(&threads_gone, 0);
+       complete_and_exit(&dev->polling_exit, 0);
  }
  
  /*
@@ -699,29 +689,38 @@ static void rtsx_release_resources(struct rtsx_dev *dev)
  {
         printk(KERN_INFO "-- %s\n", __func__);
  
+       /* Tell the control thread to exit.  The SCSI host must
+        * already have been removed so it won't try to queue
+        * any more commands.
+        */
+       printk(KERN_INFO "-- sending exit command to thread\n");
+       complete(&dev->cmnd_ready);
+       if (dev->ctl_thread)
+               wait_for_completion(&dev->control_exit);
+       if (dev->polling_thread)
+               wait_for_completion(&dev->polling_exit);
+
+       wait_timeout(200);
+
         if (dev->rtsx_resv_buf) {
-               dma_free_coherent(&(dev->pci->dev), HOST_CMDS_BUF_LEN,
+               dma_free_coherent(&(dev->pci->dev), RTSX_RESV_BUF_LEN,
                                 dev->rtsx_resv_buf, dev->rtsx_resv_buf_addr);
                 dev->chip->host_cmds_ptr = NULL;
                 dev->chip->host_sg_tbl_ptr = NULL;
         }
  
-       pci_disable_device(dev->pci);
-       pci_release_regions(dev->pci);
-
-       if (dev->irq > 0) {
+       if (dev->irq > 0)
                 free_irq(dev->irq, (void *)dev);
-       }
-       if (dev->chip->msi_en) {
+       if (dev->chip->msi_en)
                 pci_disable_msi(dev->pci);
-       }
+       if (dev->remap_addr)
+               iounmap(dev->remap_addr);
  
-       /* Tell the control thread to exit.  The SCSI host must
-        * already have been removed so it won't try to queue
-        * any more commands.
-        */
-       printk(KERN_INFO "-- sending exit command to thread\n");
-       up(&dev->sema);
+       pci_disable_device(dev->pci);
+       pci_release_regions(dev->pci);
+
+       rtsx_release_chip(dev->chip);
+       kfree(dev->chip);
  }
  
  /* First stage of disconnect processing: stop all commands and remove
@@ -739,6 +738,7 @@ static void quiesce_and_remove_host(struct rtsx_dev *dev)
         scsi_unlock(host);
         mutex_unlock(&dev->dev_mutex);
         wake_up(&dev->delay_wait);
+       wait_for_completion(&dev->scanning_done);
  
         /* Wait some time to let other threads exist */
         wait_timeout(100);
@@ -793,8 +793,7 @@ static int rtsx_scan_thread(void *__dev)
                 /* Should we unbind if no devices were detected? */
         }
  
-       scsi_host_put(rtsx_to_host(dev));
-       complete_and_exit(&threads_gone, 0);
+       complete_and_exit(&dev->scanning_done, 0);
  }
  
  static void rtsx_init_options(struct rtsx_chip *chip)
@@ -941,8 +940,11 @@ static int __devinit rtsx_probe(struct pci_dev *pci, const struct pci_device_id
  
         spin_lock_init(&dev->reg_lock);
         mutex_init(&(dev->dev_mutex));
-       sema_init(&(dev->sema), 0);
+       init_completion(&dev->cmnd_ready);
+       init_completion(&dev->control_exit);
+       init_completion(&dev->polling_exit);
         init_completion(&(dev->notify));
+       init_completion(&dev->scanning_done);
         init_waitqueue_head(&dev->delay_wait);
  
         dev->pci = pci;
@@ -992,28 +994,22 @@ static int __devinit rtsx_probe(struct pci_dev *pci, const struct pci_device_id
         pci_set_master(pci);
         synchronize_irq(dev->irq);
  
-       err = scsi_add_host(host, &pci->dev);
-       if (err) {
-               printk(KERN_ERR "Unable to add the scsi host\n");
-               goto errout;
-       }
-
         rtsx_init_chip(dev->chip);
  
         /* Start up our control thread */
-       th = kthread_create(rtsx_control_thread, dev, CR_DRIVER_NAME);
+       th = kthread_run(rtsx_control_thread, dev, CR_DRIVER_NAME);
         if (IS_ERR(th)) {
                 printk(KERN_ERR "Unable to start control thread\n");
                 err = PTR_ERR(th);
                 goto errout;
         }
+       dev->ctl_thread = th;
  
-       /* Take a reference to the host for the control thread and
-        * count it among all the threads we have launched.  Then
-        * start it up. */
-       scsi_host_get(rtsx_to_host(dev));
-       atomic_inc(&total_threads);
-       wake_up_process(th);
+       err = scsi_add_host(host, &pci->dev);
+       if (err) {
+               printk(KERN_ERR "Unable to add the scsi host\n");
+               goto errout;
+       }
  
         /* Start up the thread for delayed SCSI-device scanning */
         th = kthread_create(rtsx_scan_thread, dev, "rtsx-scan");
@@ -1024,28 +1020,17 @@ static int __devinit rtsx_probe(struct pci_dev *pci, const struct pci_device_id
                 goto errout;
         }
  
-       /* Take a reference to the host for the scanning thread and
-        * count it among all the threads we have launched.  Then
-        * start it up. */
-       scsi_host_get(rtsx_to_host(dev));
-       atomic_inc(&total_threads);
         wake_up_process(th);
  
         /* Start up the thread for polling thread */
-       th = kthread_create(rtsx_polling_thread, dev, "rtsx-polling");
+       th = kthread_run(rtsx_polling_thread, dev, "rtsx-polling");
         if (IS_ERR(th)) {
                 printk(KERN_ERR "Unable to start the device-polling thread\n");
                 quiesce_and_remove_host(dev);
                 err = PTR_ERR(th);
                 goto errout;
         }
-
-       /* Take a reference to the host for the polling thread and
-        * count it among all the threads we have launched.  Then
-        * start it up. */
-       scsi_host_get(rtsx_to_host(dev));
-       atomic_inc(&total_threads);
-       wake_up_process(th);
+       dev->polling_thread = th;
  
         pci_set_drvdata(pci, dev);
  
@@ -1108,16 +1093,6 @@ static void __exit rtsx_exit(void)
  
         pci_unregister_driver(&driver);
  
-       /* Don't return until all of our control and scanning threads
-        * have exited.  Since each thread signals threads_gone as its
-        * last act, we have to call wait_for_completion the right number
-        * of times.
-        */
-       while (atomic_read(&total_threads) > 0) {
-               wait_for_completion(&threads_gone);
-               atomic_dec(&total_threads);
-       }
-
         printk(KERN_INFO "%s module exit\n", CR_DRIVER_NAME);
  }
  
diff --git a/drivers/staging/rts_pstor/rtsx.h b/drivers/staging/rts_pstor/rtsx.h

index 247615ba1d2ae5750cf2fd5777b390f7a381a893..86e47c2e3e3c41858293a8be5f1165b528d3630b 100644 (file)
--- a/drivers/staging/rts_pstor/rtsx.h
+++ b/drivers/staging/rts_pstor/rtsx.h
@@ -112,9 +112,16 @@ struct rtsx_dev {
         /* locks */
         spinlock_t              reg_lock;
  
+       struct task_struct      *ctl_thread;     /* the control thread   */
+       struct task_struct      *polling_thread; /* the polling thread   */
+
         /* mutual exclusion and synchronization structures */
-       struct semaphore        sema;            /* to sleep thread on      */
+       struct completion       cmnd_ready;      /* to sleep thread on      */
+       struct completion       control_exit;    /* control thread exit     */
+       struct completion       polling_exit;    /* polling thread exit     */
         struct completion       notify;          /* thread begin/end        */
+       struct completion       scanning_done;   /* wait for scan thread    */
+
         wait_queue_head_t       delay_wait;      /* wait during scan, reset */
         struct mutex            dev_mutex;
  
diff --git a/drivers/staging/solo6x10/core.c b/drivers/staging/solo6x10/core.c

index 76779949f141bbd81d0be64ee5334768278430cd..f974f6412ad7303b72f85374a0f3d0e94cdacf0d 100644 (file)
--- a/drivers/staging/solo6x10/core.c
+++ b/drivers/staging/solo6x10/core.c
@@ -21,6 +21,7 @@
  #include <linux/module.h>
  #include <linux/pci.h>
  #include <linux/interrupt.h>
+#include <linux/slab.h>
  #include <linux/videodev2.h>
  #include "solo6x10.h"
  #include "tw28.h"
diff --git a/drivers/staging/solo6x10/enc.c b/drivers/staging/solo6x10/enc.c

index 285f7f3500626a459aa0391b4a8a97bbda48cf89..de502599bb197a7ac3481174955779cae0e21048 100644 (file)
--- a/drivers/staging/solo6x10/enc.c
+++ b/drivers/staging/solo6x10/enc.c
@@ -18,6 +18,7 @@
   */
  
  #include <linux/kernel.h>
+#include <linux/slab.h>
  #include "solo6x10.h"
  #include "osd-font.h"
  
diff --git a/drivers/staging/solo6x10/g723.c b/drivers/staging/solo6x10/g723.c

index bd8eb92c94b1584cea9120ba6b0de06b5de3e318..59274bfca95bf53182f2800aae636ad0d91a7725 100644 (file)
--- a/drivers/staging/solo6x10/g723.c
+++ b/drivers/staging/solo6x10/g723.c
@@ -21,6 +21,7 @@
  #include <linux/mempool.h>
  #include <linux/poll.h>
  #include <linux/kthread.h>
+#include <linux/slab.h>
  #include <linux/freezer.h>
  #include <sound/core.h>
  #include <sound/initval.h>
diff --git a/drivers/staging/solo6x10/p2m.c b/drivers/staging/solo6x10/p2m.c

index 5717eabb04a45b5ee03c413ec267bac093fdfb10..56210f0fc5ec162a59b466418487b2ad4bfecce6 100644 (file)
--- a/drivers/staging/solo6x10/p2m.c
+++ b/drivers/staging/solo6x10/p2m.c
@@ -18,6 +18,7 @@
   */
  
  #include <linux/kernel.h>
+#include <linux/slab.h>
  #include <linux/scatterlist.h>
  #include "solo6x10.h"
  
diff --git a/drivers/staging/solo6x10/solo6x10.h b/drivers/staging/solo6x10/solo6x10.h

index 17c06bd6cc91270986d2541c580b9d83e2713165..abee7213202f066c8234ff2a06932627e330170e 100644 (file)
--- a/drivers/staging/solo6x10/solo6x10.h
+++ b/drivers/staging/solo6x10/solo6x10.h
@@ -28,6 +28,7 @@
  #include <linux/list.h>
  #include <linux/wait.h>
  #include <linux/delay.h>
+#include <linux/slab.h>
  #include <asm/io.h>
  #include <linux/atomic.h>
  #include <linux/videodev2.h>
diff --git a/drivers/staging/speakup/devsynth.c b/drivers/staging/speakup/devsynth.c

index 39dc586fc8bb57f21cd95aec22c336255a0a8562..940769ef883fcb7d6cad69931bc66fb5490328f9 100644 (file)
--- a/drivers/staging/speakup/devsynth.c
+++ b/drivers/staging/speakup/devsynth.c
@@ -18,13 +18,14 @@ static ssize_t speakup_file_write(struct file *fp, const char *buffer,
  {
         size_t count = nbytes;
         const char *ptr = buffer;
-       int bytes;
+       size_t bytes;
         unsigned long flags;
         u_char buf[256];
+
         if (synth == NULL)
                 return -ENODEV;
         while (count > 0) {
-               bytes = min_t(size_t, count, sizeof(buf));
+               bytes = min(count, sizeof(buf));
                 if (copy_from_user(buf, ptr, bytes))
                         return -EFAULT;
                 count -= bytes;
diff --git a/drivers/staging/tidspbridge/core/dsp-clock.c b/drivers/staging/tidspbridge/core/dsp-clock.c

index 589a0554332ea74b150429fe1d3640da011152ee..3d1279c424a85155163c498ca463d0838fdfab65 100644 (file)
--- a/drivers/staging/tidspbridge/core/dsp-clock.c
+++ b/drivers/staging/tidspbridge/core/dsp-clock.c
@@ -209,7 +209,6 @@ int dsp_clk_enable(enum dsp_clk_id clk_id)
                 break;
  #ifdef CONFIG_OMAP_MCBSP
         case MCBSP_CLK:
-               omap_mcbsp_set_io_type(MCBSP_ID(clk_id), OMAP_MCBSP_POLL_IO);
                 omap_mcbsp_request(MCBSP_ID(clk_id));
                 omap2_mcbsp_set_clks_src(MCBSP_ID(clk_id), MCBSP_CLKS_PAD_SRC);
                 break;
diff --git a/drivers/staging/zcache/Makefile b/drivers/staging/zcache/Makefile

index f5ec64f94470f7311989a2d4c503934986a6df3b..60daa272c204b6191525d1990b4b38937400b94e 100644 (file)
--- a/drivers/staging/zcache/Makefile
+++ b/drivers/staging/zcache/Makefile
@@ -1,3 +1,3 @@
-zcache-y       :=      tmem.o
+zcache-y       :=      zcache-main.o tmem.o
  
  obj-$(CONFIG_ZCACHE)   +=      zcache.o
diff --git a/drivers/staging/zcache/tmem.c b/drivers/staging/zcache/tmem.c

index 975e34bcd722aecf4ed753a9b86ad25e90c5d19c..1ca66ea9b28123825a4db9b08d8621da909c8c66 100644 (file)
--- a/drivers/staging/zcache/tmem.c
+++ b/drivers/staging/zcache/tmem.c
@@ -604,7 +604,7 @@ int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
         struct tmem_obj *obj;
         void *pampd;
         bool ephemeral = is_ephemeral(pool);
-       uint32_t ret = -1;
+       int ret = -1;
         struct tmem_hashbucket *hb;
         bool free = (get_and_free == 1) || ((get_and_free == 0) && ephemeral);
         bool lock_held = false;
diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c

new file mode 100644 (file)

index 0000000..a3f5162
--- /dev/null
+++ b/drivers/staging/zcache/zcache-main.c
@@ -0,0 +1,2003 @@
+/*
+ * zcache.c
+ *
+ * Copyright (c) 2010,2011, Dan Magenheimer, Oracle Corp.
+ * Copyright (c) 2010,2011, Nitin Gupta
+ *
+ * Zcache provides an in-kernel "host implementation" for transcendent memory
+ * and, thus indirectly, for cleancache and frontswap.  Zcache includes two
+ * page-accessible memory [1] interfaces, both utilizing lzo1x compression:
+ * 1) "compression buddies" ("zbud") is used for ephemeral pages
+ * 2) xvmalloc is used for persistent pages.
+ * Xvmalloc (based on the TLSF allocator) has very low fragmentation
+ * so maximizes space efficiency, while zbud allows pairs (and potentially,
+ * in the future, more than a pair of) compressed pages to be closely linked
+ * so that reclaiming can be done via the kernel's physical-page-oriented
+ * "shrinker" interface.
+ *
+ * [1] For a definition of page-accessible memory (aka PAM), see:
+ *   http://marc.info/?l=linux-mm&m=127811271605009
+ */
+
+#include <linux/module.h>
+#include <linux/cpu.h>
+#include <linux/highmem.h>
+#include <linux/list.h>
+#include <linux/lzo.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/atomic.h>
+#include <linux/math64.h>
+#include "tmem.h"
+
+#include "../zram/xvmalloc.h" /* if built in drivers/staging */
+
+#if (!defined(CONFIG_CLEANCACHE) && !defined(CONFIG_FRONTSWAP))
+#error "zcache is useless without CONFIG_CLEANCACHE or CONFIG_FRONTSWAP"
+#endif
+#ifdef CONFIG_CLEANCACHE
+#include <linux/cleancache.h>
+#endif
+#ifdef CONFIG_FRONTSWAP
+#include <linux/frontswap.h>
+#endif
+
+#if 0
+/* this is more aggressive but may cause other problems? */
+#define ZCACHE_GFP_MASK        (GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN)
+#else
+#define ZCACHE_GFP_MASK \
+       (__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | __GFP_NOMEMALLOC)
+#endif
+
+#define MAX_POOLS_PER_CLIENT 16
+
+#define MAX_CLIENTS 16
+#define LOCAL_CLIENT ((uint16_t)-1)
+
+MODULE_LICENSE("GPL");
+
+struct zcache_client {
+       struct tmem_pool *tmem_pools[MAX_POOLS_PER_CLIENT];
+       struct xv_pool *xvpool;
+       bool allocated;
+       atomic_t refcount;
+};
+
+static struct zcache_client zcache_host;
+static struct zcache_client zcache_clients[MAX_CLIENTS];
+
+static inline uint16_t get_client_id_from_client(struct zcache_client *cli)
+{
+       BUG_ON(cli == NULL);
+       if (cli == &zcache_host)
+               return LOCAL_CLIENT;
+       return cli - &zcache_clients[0];
+}
+
+static inline bool is_local_client(struct zcache_client *cli)
+{
+       return cli == &zcache_host;
+}
+
+/**********
+ * Compression buddies ("zbud") provides for packing two (or, possibly
+ * in the future, more) compressed ephemeral pages into a single "raw"
+ * (physical) page and tracking them with data structures so that
+ * the raw pages can be easily reclaimed.
+ *
+ * A zbud page ("zbpg") is an aligned page containing a list_head,
+ * a lock, and two "zbud headers".  The remainder of the physical
+ * page is divided up into aligned 64-byte "chunks" which contain
+ * the compressed data for zero, one, or two zbuds.  Each zbpg
+ * resides on: (1) an "unused list" if it has no zbuds; (2) a
+ * "buddied" list if it is fully populated  with two zbuds; or
+ * (3) one of PAGE_SIZE/64 "unbuddied" lists indexed by how many chunks
+ * the one unbuddied zbud uses.  The data inside a zbpg cannot be
+ * read or written unless the zbpg's lock is held.
+ */
+
+#define ZBH_SENTINEL  0x43214321
+#define ZBPG_SENTINEL  0xdeadbeef
+
+#define ZBUD_MAX_BUDS 2
+
+struct zbud_hdr {
+       uint16_t client_id;
+       uint16_t pool_id;
+       struct tmem_oid oid;
+       uint32_t index;
+       uint16_t size; /* compressed size in bytes, zero means unused */
+       DECL_SENTINEL
+};
+
+struct zbud_page {
+       struct list_head bud_list;
+       spinlock_t lock;
+       struct zbud_hdr buddy[ZBUD_MAX_BUDS];
+       DECL_SENTINEL
+       /* followed by NUM_CHUNK aligned CHUNK_SIZE-byte chunks */
+};
+
+#define CHUNK_SHIFT    6
+#define CHUNK_SIZE     (1 << CHUNK_SHIFT)
+#define CHUNK_MASK     (~(CHUNK_SIZE-1))
+#define NCHUNKS                (((PAGE_SIZE - sizeof(struct zbud_page)) & \
+                               CHUNK_MASK) >> CHUNK_SHIFT)
+#define MAX_CHUNK      (NCHUNKS-1)
+
+static struct {
+       struct list_head list;
+       unsigned count;
+} zbud_unbuddied[NCHUNKS];
+/* list N contains pages with N chunks USED and NCHUNKS-N unused */
+/* element 0 is never used but optimizing that isn't worth it */
+static unsigned long zbud_cumul_chunk_counts[NCHUNKS];
+
+struct list_head zbud_buddied_list;
+static unsigned long zcache_zbud_buddied_count;
+
+/* protects the buddied list and all unbuddied lists */
+static DEFINE_SPINLOCK(zbud_budlists_spinlock);
+
+static LIST_HEAD(zbpg_unused_list);
+static unsigned long zcache_zbpg_unused_list_count;
+
+/* protects the unused page list */
+static DEFINE_SPINLOCK(zbpg_unused_list_spinlock);
+
+static atomic_t zcache_zbud_curr_raw_pages;
+static atomic_t zcache_zbud_curr_zpages;
+static unsigned long zcache_zbud_curr_zbytes;
+static unsigned long zcache_zbud_cumul_zpages;
+static unsigned long zcache_zbud_cumul_zbytes;
+static unsigned long zcache_compress_poor;
+static unsigned long zcache_mean_compress_poor;
+
+/* forward references */
+static void *zcache_get_free_page(void);
+static void zcache_free_page(void *p);
+
+/*
+ * zbud helper functions
+ */
+
+static inline unsigned zbud_max_buddy_size(void)
+{
+       return MAX_CHUNK << CHUNK_SHIFT;
+}
+
+static inline unsigned zbud_size_to_chunks(unsigned size)
+{
+       BUG_ON(size == 0 || size > zbud_max_buddy_size());
+       return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
+}
+
+static inline int zbud_budnum(struct zbud_hdr *zh)
+{
+       unsigned offset = (unsigned long)zh & (PAGE_SIZE - 1);
+       struct zbud_page *zbpg = NULL;
+       unsigned budnum = -1U;
+       int i;
+
+       for (i = 0; i < ZBUD_MAX_BUDS; i++)
+               if (offset == offsetof(typeof(*zbpg), buddy[i])) {
+                       budnum = i;
+                       break;
+               }
+       BUG_ON(budnum == -1U);
+       return budnum;
+}
+
+static char *zbud_data(struct zbud_hdr *zh, unsigned size)
+{
+       struct zbud_page *zbpg;
+       char *p;
+       unsigned budnum;
+
+       ASSERT_SENTINEL(zh, ZBH);
+       budnum = zbud_budnum(zh);
+       BUG_ON(size == 0 || size > zbud_max_buddy_size());
+       zbpg = container_of(zh, struct zbud_page, buddy[budnum]);
+       ASSERT_SPINLOCK(&zbpg->lock);
+       p = (char *)zbpg;
+       if (budnum == 0)
+               p += ((sizeof(struct zbud_page) + CHUNK_SIZE - 1) &
+                                                       CHUNK_MASK);
+       else if (budnum == 1)
+               p += PAGE_SIZE - ((size + CHUNK_SIZE - 1) & CHUNK_MASK);
+       return p;
+}
+
+/*
+ * zbud raw page management
+ */
+
+static struct zbud_page *zbud_alloc_raw_page(void)
+{
+       struct zbud_page *zbpg = NULL;
+       struct zbud_hdr *zh0, *zh1;
+       bool recycled = 0;
+
+       /* if any pages on the zbpg list, use one */
+       spin_lock(&zbpg_unused_list_spinlock);
+       if (!list_empty(&zbpg_unused_list)) {
+               zbpg = list_first_entry(&zbpg_unused_list,
+                               struct zbud_page, bud_list);
+               list_del_init(&zbpg->bud_list);
+               zcache_zbpg_unused_list_count--;
+               recycled = 1;
+       }
+       spin_unlock(&zbpg_unused_list_spinlock);
+       if (zbpg == NULL)
+               /* none on zbpg list, try to get a kernel page */
+               zbpg = zcache_get_free_page();
+       if (likely(zbpg != NULL)) {
+               INIT_LIST_HEAD(&zbpg->bud_list);
+               zh0 = &zbpg->buddy[0]; zh1 = &zbpg->buddy[1];
+               spin_lock_init(&zbpg->lock);
+               if (recycled) {
+                       ASSERT_INVERTED_SENTINEL(zbpg, ZBPG);
+                       SET_SENTINEL(zbpg, ZBPG);
+                       BUG_ON(zh0->size != 0 || tmem_oid_valid(&zh0->oid));
+                       BUG_ON(zh1->size != 0 || tmem_oid_valid(&zh1->oid));
+               } else {
+                       atomic_inc(&zcache_zbud_curr_raw_pages);
+                       INIT_LIST_HEAD(&zbpg->bud_list);
+                       SET_SENTINEL(zbpg, ZBPG);
+                       zh0->size = 0; zh1->size = 0;
+                       tmem_oid_set_invalid(&zh0->oid);
+                       tmem_oid_set_invalid(&zh1->oid);
+               }
+       }
+       return zbpg;
+}
+
+static void zbud_free_raw_page(struct zbud_page *zbpg)
+{
+       struct zbud_hdr *zh0 = &zbpg->buddy[0], *zh1 = &zbpg->buddy[1];
+
+       ASSERT_SENTINEL(zbpg, ZBPG);
+       BUG_ON(!list_empty(&zbpg->bud_list));
+       ASSERT_SPINLOCK(&zbpg->lock);
+       BUG_ON(zh0->size != 0 || tmem_oid_valid(&zh0->oid));
+       BUG_ON(zh1->size != 0 || tmem_oid_valid(&zh1->oid));
+       INVERT_SENTINEL(zbpg, ZBPG);
+       spin_unlock(&zbpg->lock);
+       spin_lock(&zbpg_unused_list_spinlock);
+       list_add(&zbpg->bud_list, &zbpg_unused_list);
+       zcache_zbpg_unused_list_count++;
+       spin_unlock(&zbpg_unused_list_spinlock);
+}
+
+/*
+ * core zbud handling routines
+ */
+
+static unsigned zbud_free(struct zbud_hdr *zh)
+{
+       unsigned size;
+
+       ASSERT_SENTINEL(zh, ZBH);
+       BUG_ON(!tmem_oid_valid(&zh->oid));
+       size = zh->size;
+       BUG_ON(zh->size == 0 || zh->size > zbud_max_buddy_size());
+       zh->size = 0;
+       tmem_oid_set_invalid(&zh->oid);
+       INVERT_SENTINEL(zh, ZBH);
+       zcache_zbud_curr_zbytes -= size;
+       atomic_dec(&zcache_zbud_curr_zpages);
+       return size;
+}
+
+static void zbud_free_and_delist(struct zbud_hdr *zh)
+{
+       unsigned chunks;
+       struct zbud_hdr *zh_other;
+       unsigned budnum = zbud_budnum(zh), size;
+       struct zbud_page *zbpg =
+               container_of(zh, struct zbud_page, buddy[budnum]);
+
+       spin_lock(&zbpg->lock);
+       if (list_empty(&zbpg->bud_list)) {
+               /* ignore zombie page... see zbud_evict_pages() */
+               spin_unlock(&zbpg->lock);
+               return;
+       }
+       size = zbud_free(zh);
+       ASSERT_SPINLOCK(&zbpg->lock);
+       zh_other = &zbpg->buddy[(budnum == 0) ? 1 : 0];
+       if (zh_other->size == 0) { /* was unbuddied: unlist and free */
+               chunks = zbud_size_to_chunks(size) ;
+               spin_lock(&zbud_budlists_spinlock);
+               BUG_ON(list_empty(&zbud_unbuddied[chunks].list));
+               list_del_init(&zbpg->bud_list);
+               zbud_unbuddied[chunks].count--;
+               spin_unlock(&zbud_budlists_spinlock);
+               zbud_free_raw_page(zbpg);
+       } else { /* was buddied: move remaining buddy to unbuddied list */
+               chunks = zbud_size_to_chunks(zh_other->size) ;
+               spin_lock(&zbud_budlists_spinlock);
+               list_del_init(&zbpg->bud_list);
+               zcache_zbud_buddied_count--;
+               list_add_tail(&zbpg->bud_list, &zbud_unbuddied[chunks].list);
+               zbud_unbuddied[chunks].count++;
+               spin_unlock(&zbud_budlists_spinlock);
+               spin_unlock(&zbpg->lock);
+       }
+}
+
+static struct zbud_hdr *zbud_create(uint16_t client_id, uint16_t pool_id,
+                                       struct tmem_oid *oid,
+                                       uint32_t index, struct page *page,
+                                       void *cdata, unsigned size)
+{
+       struct zbud_hdr *zh0, *zh1, *zh = NULL;
+       struct zbud_page *zbpg = NULL, *ztmp;
+       unsigned nchunks;
+       char *to;
+       int i, found_good_buddy = 0;
+
+       nchunks = zbud_size_to_chunks(size) ;
+       for (i = MAX_CHUNK - nchunks + 1; i > 0; i--) {
+               spin_lock(&zbud_budlists_spinlock);
+               if (!list_empty(&zbud_unbuddied[i].list)) {
+                       list_for_each_entry_safe(zbpg, ztmp,
+                                   &zbud_unbuddied[i].list, bud_list) {
+                               if (spin_trylock(&zbpg->lock)) {
+                                       found_good_buddy = i;
+                                       goto found_unbuddied;
+                               }
+                       }
+               }
+               spin_unlock(&zbud_budlists_spinlock);
+       }
+       /* didn't find a good buddy, try allocating a new page */
+       zbpg = zbud_alloc_raw_page();
+       if (unlikely(zbpg == NULL))
+               goto out;
+       /* ok, have a page, now compress the data before taking locks */
+       spin_lock(&zbpg->lock);
+       spin_lock(&zbud_budlists_spinlock);
+       list_add_tail(&zbpg->bud_list, &zbud_unbuddied[nchunks].list);
+       zbud_unbuddied[nchunks].count++;
+       zh = &zbpg->buddy[0];
+       goto init_zh;
+
+found_unbuddied:
+       ASSERT_SPINLOCK(&zbpg->lock);
+       zh0 = &zbpg->buddy[0]; zh1 = &zbpg->buddy[1];
+       BUG_ON(!((zh0->size == 0) ^ (zh1->size == 0)));
+       if (zh0->size != 0) { /* buddy0 in use, buddy1 is vacant */
+               ASSERT_SENTINEL(zh0, ZBH);
+               zh = zh1;
+       } else if (zh1->size != 0) { /* buddy1 in use, buddy0 is vacant */
+               ASSERT_SENTINEL(zh1, ZBH);
+               zh = zh0;
+       } else
+               BUG();
+       list_del_init(&zbpg->bud_list);
+       zbud_unbuddied[found_good_buddy].count--;
+       list_add_tail(&zbpg->bud_list, &zbud_buddied_list);
+       zcache_zbud_buddied_count++;
+
+init_zh:
+       SET_SENTINEL(zh, ZBH);
+       zh->size = size;
+       zh->index = index;
+       zh->oid = *oid;
+       zh->pool_id = pool_id;
+       zh->client_id = client_id;
+       /* can wait to copy the data until the list locks are dropped */
+       spin_unlock(&zbud_budlists_spinlock);
+
+       to = zbud_data(zh, size);
+       memcpy(to, cdata, size);
+       spin_unlock(&zbpg->lock);
+       zbud_cumul_chunk_counts[nchunks]++;
+       atomic_inc(&zcache_zbud_curr_zpages);
+       zcache_zbud_cumul_zpages++;
+       zcache_zbud_curr_zbytes += size;
+       zcache_zbud_cumul_zbytes += size;
+out:
+       return zh;
+}
+
+static int zbud_decompress(struct page *page, struct zbud_hdr *zh)
+{
+       struct zbud_page *zbpg;
+       unsigned budnum = zbud_budnum(zh);
+       size_t out_len = PAGE_SIZE;
+       char *to_va, *from_va;
+       unsigned size;
+       int ret = 0;
+
+       zbpg = container_of(zh, struct zbud_page, buddy[budnum]);
+       spin_lock(&zbpg->lock);
+       if (list_empty(&zbpg->bud_list)) {
+               /* ignore zombie page... see zbud_evict_pages() */
+               ret = -EINVAL;
+               goto out;
+       }
+       ASSERT_SENTINEL(zh, ZBH);
+       BUG_ON(zh->size == 0 || zh->size > zbud_max_buddy_size());
+       to_va = kmap_atomic(page, KM_USER0);
+       size = zh->size;
+       from_va = zbud_data(zh, size);
+       ret = lzo1x_decompress_safe(from_va, size, to_va, &out_len);
+       BUG_ON(ret != LZO_E_OK);
+       BUG_ON(out_len != PAGE_SIZE);
+       kunmap_atomic(to_va, KM_USER0);
+out:
+       spin_unlock(&zbpg->lock);
+       return ret;
+}
+
+/*
+ * The following routines handle shrinking of ephemeral pages by evicting
+ * pages "least valuable" first.
+ */
+
+static unsigned long zcache_evicted_raw_pages;
+static unsigned long zcache_evicted_buddied_pages;
+static unsigned long zcache_evicted_unbuddied_pages;
+
+static struct tmem_pool *zcache_get_pool_by_id(uint16_t cli_id,
+                                               uint16_t poolid);
+static void zcache_put_pool(struct tmem_pool *pool);
+
+/*
+ * Flush and free all zbuds in a zbpg, then free the pageframe
+ */
+static void zbud_evict_zbpg(struct zbud_page *zbpg)
+{
+       struct zbud_hdr *zh;
+       int i, j;
+       uint32_t pool_id[ZBUD_MAX_BUDS], client_id[ZBUD_MAX_BUDS];
+       uint32_t index[ZBUD_MAX_BUDS];
+       struct tmem_oid oid[ZBUD_MAX_BUDS];
+       struct tmem_pool *pool;
+
+       ASSERT_SPINLOCK(&zbpg->lock);
+       BUG_ON(!list_empty(&zbpg->bud_list));
+       for (i = 0, j = 0; i < ZBUD_MAX_BUDS; i++) {
+               zh = &zbpg->buddy[i];
+               if (zh->size) {
+                       client_id[j] = zh->client_id;
+                       pool_id[j] = zh->pool_id;
+                       oid[j] = zh->oid;
+                       index[j] = zh->index;
+                       j++;
+                       zbud_free(zh);
+               }
+       }
+       spin_unlock(&zbpg->lock);
+       for (i = 0; i < j; i++) {
+               pool = zcache_get_pool_by_id(client_id[i], pool_id[i]);
+               if (pool != NULL) {
+                       tmem_flush_page(pool, &oid[i], index[i]);
+                       zcache_put_pool(pool);
+               }
+       }
+       ASSERT_SENTINEL(zbpg, ZBPG);
+       spin_lock(&zbpg->lock);
+       zbud_free_raw_page(zbpg);
+}
+
+/*
+ * Free nr pages.  This code is funky because we want to hold the locks
+ * protecting various lists for as short a time as possible, and in some
+ * circumstances the list may change asynchronously when the list lock is
+ * not held.  In some cases we also trylock not only to avoid waiting on a
+ * page in use by another cpu, but also to avoid potential deadlock due to
+ * lock inversion.
+ */
+static void zbud_evict_pages(int nr)
+{
+       struct zbud_page *zbpg;
+       int i;
+
+       /* first try freeing any pages on unused list */
+retry_unused_list:
+       spin_lock_bh(&zbpg_unused_list_spinlock);
+       if (!list_empty(&zbpg_unused_list)) {
+               /* can't walk list here, since it may change when unlocked */
+               zbpg = list_first_entry(&zbpg_unused_list,
+                               struct zbud_page, bud_list);
+               list_del_init(&zbpg->bud_list);
+               zcache_zbpg_unused_list_count--;
+               atomic_dec(&zcache_zbud_curr_raw_pages);
+               spin_unlock_bh(&zbpg_unused_list_spinlock);
+               zcache_free_page(zbpg);
+               zcache_evicted_raw_pages++;
+               if (--nr <= 0)
+                       goto out;
+               goto retry_unused_list;
+       }
+       spin_unlock_bh(&zbpg_unused_list_spinlock);
+
+       /* now try freeing unbuddied pages, starting with least space avail */
+       for (i = 0; i < MAX_CHUNK; i++) {
+retry_unbud_list_i:
+               spin_lock_bh(&zbud_budlists_spinlock);
+               if (list_empty(&zbud_unbuddied[i].list)) {
+                       spin_unlock_bh(&zbud_budlists_spinlock);
+                       continue;
+               }
+               list_for_each_entry(zbpg, &zbud_unbuddied[i].list, bud_list) {
+                       if (unlikely(!spin_trylock(&zbpg->lock)))
+                               continue;
+                       list_del_init(&zbpg->bud_list);
+                       zbud_unbuddied[i].count--;
+                       spin_unlock(&zbud_budlists_spinlock);
+                       zcache_evicted_unbuddied_pages++;
+                       /* want budlists unlocked when doing zbpg eviction */
+                       zbud_evict_zbpg(zbpg);
+                       local_bh_enable();
+                       if (--nr <= 0)
+                               goto out;
+                       goto retry_unbud_list_i;
+               }
+               spin_unlock_bh(&zbud_budlists_spinlock);
+       }
+
+       /* as a last resort, free buddied pages */
+retry_bud_list:
+       spin_lock_bh(&zbud_budlists_spinlock);
+       if (list_empty(&zbud_buddied_list)) {
+               spin_unlock_bh(&zbud_budlists_spinlock);
+               goto out;
+       }
+       list_for_each_entry(zbpg, &zbud_buddied_list, bud_list) {
+               if (unlikely(!spin_trylock(&zbpg->lock)))
+                       continue;
+               list_del_init(&zbpg->bud_list);
+               zcache_zbud_buddied_count--;
+               spin_unlock(&zbud_budlists_spinlock);
+               zcache_evicted_buddied_pages++;
+               /* want budlists unlocked when doing zbpg eviction */
+               zbud_evict_zbpg(zbpg);
+               local_bh_enable();
+               if (--nr <= 0)
+                       goto out;
+               goto retry_bud_list;
+       }
+       spin_unlock_bh(&zbud_budlists_spinlock);
+out:
+       return;
+}
+
+static void zbud_init(void)
+{
+       int i;
+
+       INIT_LIST_HEAD(&zbud_buddied_list);
+       zcache_zbud_buddied_count = 0;
+       for (i = 0; i < NCHUNKS; i++) {
+               INIT_LIST_HEAD(&zbud_unbuddied[i].list);
+               zbud_unbuddied[i].count = 0;
+       }
+}
+
+#ifdef CONFIG_SYSFS
+/*
+ * These sysfs routines show a nice distribution of how many zbpg's are
+ * currently (and have ever been placed) in each unbuddied list.  It's fun
+ * to watch but can probably go away before final merge.
+ */
+static int zbud_show_unbuddied_list_counts(char *buf)
+{
+       int i;
+       char *p = buf;
+
+       for (i = 0; i < NCHUNKS; i++)
+               p += sprintf(p, "%u ", zbud_unbuddied[i].count);
+       return p - buf;
+}
+
+static int zbud_show_cumul_chunk_counts(char *buf)
+{
+       unsigned long i, chunks = 0, total_chunks = 0, sum_total_chunks = 0;
+       unsigned long total_chunks_lte_21 = 0, total_chunks_lte_32 = 0;
+       unsigned long total_chunks_lte_42 = 0;
+       char *p = buf;
+
+       for (i = 0; i < NCHUNKS; i++) {
+               p += sprintf(p, "%lu ", zbud_cumul_chunk_counts[i]);
+               chunks += zbud_cumul_chunk_counts[i];
+               total_chunks += zbud_cumul_chunk_counts[i];
+               sum_total_chunks += i * zbud_cumul_chunk_counts[i];
+               if (i == 21)
+                       total_chunks_lte_21 = total_chunks;
+               if (i == 32)
+                       total_chunks_lte_32 = total_chunks;
+               if (i == 42)
+                       total_chunks_lte_42 = total_chunks;
+       }
+       p += sprintf(p, "<=21:%lu <=32:%lu <=42:%lu, mean:%lu\n",
+               total_chunks_lte_21, total_chunks_lte_32, total_chunks_lte_42,
+               chunks == 0 ? 0 : sum_total_chunks / chunks);
+       return p - buf;
+}
+#endif
+
+/**********
+ * This "zv" PAM implementation combines the TLSF-based xvMalloc
+ * with lzo1x compression to maximize the amount of data that can
+ * be packed into a physical page.
+ *
+ * Zv represents a PAM page with the index and object (plus a "size" value
+ * necessary for decompression) immediately preceding the compressed data.
+ */
+
+#define ZVH_SENTINEL  0x43214321
+
+struct zv_hdr {
+       uint32_t pool_id;
+       struct tmem_oid oid;
+       uint32_t index;
+       DECL_SENTINEL
+};
+
+/* rudimentary policy limits */
+/* total number of persistent pages may not exceed this percentage */
+static unsigned int zv_page_count_policy_percent = 75;
+/*
+ * byte count defining poor compression; pages with greater zsize will be
+ * rejected
+ */
+static unsigned int zv_max_zsize = (PAGE_SIZE / 8) * 7;
+/*
+ * byte count defining poor *mean* compression; pages with greater zsize
+ * will be rejected until sufficient better-compressed pages are accepted
+ * driving the man below this threshold
+ */
+static unsigned int zv_max_mean_zsize = (PAGE_SIZE / 8) * 5;
+
+static unsigned long zv_curr_dist_counts[NCHUNKS];
+static unsigned long zv_cumul_dist_counts[NCHUNKS];
+
+static struct zv_hdr *zv_create(struct xv_pool *xvpool, uint32_t pool_id,
+                               struct tmem_oid *oid, uint32_t index,
+                               void *cdata, unsigned clen)
+{
+       struct page *page;
+       struct zv_hdr *zv = NULL;
+       uint32_t offset;
+       int alloc_size = clen + sizeof(struct zv_hdr);
+       int chunks = (alloc_size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT;
+       int ret;
+
+       BUG_ON(!irqs_disabled());
+       BUG_ON(chunks >= NCHUNKS);
+       ret = xv_malloc(xvpool, alloc_size,
+                       &page, &offset, ZCACHE_GFP_MASK);
+       if (unlikely(ret))
+               goto out;
+       zv_curr_dist_counts[chunks]++;
+       zv_cumul_dist_counts[chunks]++;
+       zv = kmap_atomic(page, KM_USER0) + offset;
+       zv->index = index;
+       zv->oid = *oid;
+       zv->pool_id = pool_id;
+       SET_SENTINEL(zv, ZVH);
+       memcpy((char *)zv + sizeof(struct zv_hdr), cdata, clen);
+       kunmap_atomic(zv, KM_USER0);
+out:
+       return zv;
+}
+
+static void zv_free(struct xv_pool *xvpool, struct zv_hdr *zv)
+{
+       unsigned long flags;
+       struct page *page;
+       uint32_t offset;
+       uint16_t size = xv_get_object_size(zv);
+       int chunks = (size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT;
+
+       ASSERT_SENTINEL(zv, ZVH);
+       BUG_ON(chunks >= NCHUNKS);
+       zv_curr_dist_counts[chunks]--;
+       size -= sizeof(*zv);
+       BUG_ON(size == 0);
+       INVERT_SENTINEL(zv, ZVH);
+       page = virt_to_page(zv);
+       offset = (unsigned long)zv & ~PAGE_MASK;
+       local_irq_save(flags);
+       xv_free(xvpool, page, offset);
+       local_irq_restore(flags);
+}
+
+static void zv_decompress(struct page *page, struct zv_hdr *zv)
+{
+       size_t clen = PAGE_SIZE;
+       char *to_va;
+       unsigned size;
+       int ret;
+
+       ASSERT_SENTINEL(zv, ZVH);
+       size = xv_get_object_size(zv) - sizeof(*zv);
+       BUG_ON(size == 0);
+       to_va = kmap_atomic(page, KM_USER0);
+       ret = lzo1x_decompress_safe((char *)zv + sizeof(*zv),
+                                       size, to_va, &clen);
+       kunmap_atomic(to_va, KM_USER0);
+       BUG_ON(ret != LZO_E_OK);
+       BUG_ON(clen != PAGE_SIZE);
+}
+
+#ifdef CONFIG_SYSFS
+/*
+ * show a distribution of compression stats for zv pages.
+ */
+
+static int zv_curr_dist_counts_show(char *buf)
+{
+       unsigned long i, n, chunks = 0, sum_total_chunks = 0;
+       char *p = buf;
+
+       for (i = 0; i < NCHUNKS; i++) {
+               n = zv_curr_dist_counts[i];
+               p += sprintf(p, "%lu ", n);
+               chunks += n;
+               sum_total_chunks += i * n;
+       }
+       p += sprintf(p, "mean:%lu\n",
+               chunks == 0 ? 0 : sum_total_chunks / chunks);
+       return p - buf;
+}
+
+static int zv_cumul_dist_counts_show(char *buf)
+{
+       unsigned long i, n, chunks = 0, sum_total_chunks = 0;
+       char *p = buf;
+
+       for (i = 0; i < NCHUNKS; i++) {
+               n = zv_cumul_dist_counts[i];
+               p += sprintf(p, "%lu ", n);
+               chunks += n;
+               sum_total_chunks += i * n;
+       }
+       p += sprintf(p, "mean:%lu\n",
+               chunks == 0 ? 0 : sum_total_chunks / chunks);
+       return p - buf;
+}
+
+/*
+ * setting zv_max_zsize via sysfs causes all persistent (e.g. swap)
+ * pages that don't compress to less than this value (including metadata
+ * overhead) to be rejected.  We don't allow the value to get too close
+ * to PAGE_SIZE.
+ */
+static ssize_t zv_max_zsize_show(struct kobject *kobj,
+                                   struct kobj_attribute *attr,
+                                   char *buf)
+{
+       return sprintf(buf, "%u\n", zv_max_zsize);
+}
+
+static ssize_t zv_max_zsize_store(struct kobject *kobj,
+                                   struct kobj_attribute *attr,
+                                   const char *buf, size_t count)
+{
+       unsigned long val;
+       int err;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       err = strict_strtoul(buf, 10, &val);
+       if (err || (val == 0) || (val > (PAGE_SIZE / 8) * 7))
+               return -EINVAL;
+       zv_max_zsize = val;
+       return count;
+}
+
+/*
+ * setting zv_max_mean_zsize via sysfs causes all persistent (e.g. swap)
+ * pages that don't compress to less than this value (including metadata
+ * overhead) to be rejected UNLESS the mean compression is also smaller
+ * than this value.  In other words, we are load-balancing-by-zsize the
+ * accepted pages.  Again, we don't allow the value to get too close
+ * to PAGE_SIZE.
+ */
+static ssize_t zv_max_mean_zsize_show(struct kobject *kobj,
+                                   struct kobj_attribute *attr,
+                                   char *buf)
+{
+       return sprintf(buf, "%u\n", zv_max_mean_zsize);
+}
+
+static ssize_t zv_max_mean_zsize_store(struct kobject *kobj,
+                                   struct kobj_attribute *attr,
+                                   const char *buf, size_t count)
+{
+       unsigned long val;
+       int err;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       err = strict_strtoul(buf, 10, &val);
+       if (err || (val == 0) || (val > (PAGE_SIZE / 8) * 7))
+               return -EINVAL;
+       zv_max_mean_zsize = val;
+       return count;
+}
+
+/*
+ * setting zv_page_count_policy_percent via sysfs sets an upper bound of
+ * persistent (e.g. swap) pages that will be retained according to:
+ *     (zv_page_count_policy_percent * totalram_pages) / 100)
+ * when that limit is reached, further puts will be rejected (until
+ * some pages have been flushed).  Note that, due to compression,
+ * this number may exceed 100; it defaults to 75 and we set an
+ * arbitary limit of 150.  A poor choice will almost certainly result
+ * in OOM's, so this value should only be changed prudently.
+ */
+static ssize_t zv_page_count_policy_percent_show(struct kobject *kobj,
+                                                struct kobj_attribute *attr,
+                                                char *buf)
+{
+       return sprintf(buf, "%u\n", zv_page_count_policy_percent);
+}
+
+static ssize_t zv_page_count_policy_percent_store(struct kobject *kobj,
+                                                 struct kobj_attribute *attr,
+                                                 const char *buf, size_t count)
+{
+       unsigned long val;
+       int err;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       err = strict_strtoul(buf, 10, &val);
+       if (err || (val == 0) || (val > 150))
+               return -EINVAL;
+       zv_page_count_policy_percent = val;
+       return count;
+}
+
+static struct kobj_attribute zcache_zv_max_zsize_attr = {
+               .attr = { .name = "zv_max_zsize", .mode = 0644 },
+               .show = zv_max_zsize_show,
+               .store = zv_max_zsize_store,
+};
+
+static struct kobj_attribute zcache_zv_max_mean_zsize_attr = {
+               .attr = { .name = "zv_max_mean_zsize", .mode = 0644 },
+               .show = zv_max_mean_zsize_show,
+               .store = zv_max_mean_zsize_store,
+};
+
+static struct kobj_attribute zcache_zv_page_count_policy_percent_attr = {
+               .attr = { .name = "zv_page_count_policy_percent",
+                         .mode = 0644 },
+               .show = zv_page_count_policy_percent_show,
+               .store = zv_page_count_policy_percent_store,
+};
+#endif
+
+/*
+ * zcache core code starts here
+ */
+
+/* useful stats not collected by cleancache or frontswap */
+static unsigned long zcache_flush_total;
+static unsigned long zcache_flush_found;
+static unsigned long zcache_flobj_total;
+static unsigned long zcache_flobj_found;
+static unsigned long zcache_failed_eph_puts;
+static unsigned long zcache_failed_pers_puts;
+
+/*
+ * Tmem operations assume the poolid implies the invoking client.
+ * Zcache only has one client (the kernel itself): LOCAL_CLIENT.
+ * RAMster has each client numbered by cluster node, and a KVM version
+ * of zcache would have one client per guest and each client might
+ * have a poolid==N.
+ */
+static struct tmem_pool *zcache_get_pool_by_id(uint16_t cli_id, uint16_t poolid)
+{
+       struct tmem_pool *pool = NULL;
+       struct zcache_client *cli = NULL;
+
+       if (cli_id == LOCAL_CLIENT)
+               cli = &zcache_host;
+       else {
+               if (cli_id >= MAX_CLIENTS)
+                       goto out;
+               cli = &zcache_clients[cli_id];
+               if (cli == NULL)
+                       goto out;
+               atomic_inc(&cli->refcount);
+       }
+       if (poolid < MAX_POOLS_PER_CLIENT) {
+               pool = cli->tmem_pools[poolid];
+               if (pool != NULL)
+                       atomic_inc(&pool->refcount);
+       }
+out:
+       return pool;
+}
+
+static void zcache_put_pool(struct tmem_pool *pool)
+{
+       struct zcache_client *cli = NULL;
+
+       if (pool == NULL)
+               BUG();
+       cli = pool->client;
+       atomic_dec(&pool->refcount);
+       atomic_dec(&cli->refcount);
+}
+
+int zcache_new_client(uint16_t cli_id)
+{
+       struct zcache_client *cli = NULL;
+       int ret = -1;
+
+       if (cli_id == LOCAL_CLIENT)
+               cli = &zcache_host;
+       else if ((unsigned int)cli_id < MAX_CLIENTS)
+               cli = &zcache_clients[cli_id];
+       if (cli == NULL)
+               goto out;
+       if (cli->allocated)
+               goto out;
+       cli->allocated = 1;
+#ifdef CONFIG_FRONTSWAP
+       cli->xvpool = xv_create_pool();
+       if (cli->xvpool == NULL)
+               goto out;
+#endif
+       ret = 0;
+out:
+       return ret;
+}
+
+/* counters for debugging */
+static unsigned long zcache_failed_get_free_pages;
+static unsigned long zcache_failed_alloc;
+static unsigned long zcache_put_to_flush;
+static unsigned long zcache_aborted_preload;
+static unsigned long zcache_aborted_shrink;
+
+/*
+ * Ensure that memory allocation requests in zcache don't result
+ * in direct reclaim requests via the shrinker, which would cause
+ * an infinite loop.  Maybe a GFP flag would be better?
+ */
+static DEFINE_SPINLOCK(zcache_direct_reclaim_lock);
+
+/*
+ * for now, used named slabs so can easily track usage; later can
+ * either just use kmalloc, or perhaps add a slab-like allocator
+ * to more carefully manage total memory utilization
+ */
+static struct kmem_cache *zcache_objnode_cache;
+static struct kmem_cache *zcache_obj_cache;
+static atomic_t zcache_curr_obj_count = ATOMIC_INIT(0);
+static unsigned long zcache_curr_obj_count_max;
+static atomic_t zcache_curr_objnode_count = ATOMIC_INIT(0);
+static unsigned long zcache_curr_objnode_count_max;
+
+/*
+ * to avoid memory allocation recursion (e.g. due to direct reclaim), we
+ * preload all necessary data structures so the hostops callbacks never
+ * actually do a malloc
+ */
+struct zcache_preload {
+       void *page;
+       struct tmem_obj *obj;
+       int nr;
+       struct tmem_objnode *objnodes[OBJNODE_TREE_MAX_PATH];
+};
+static DEFINE_PER_CPU(struct zcache_preload, zcache_preloads) = { 0, };
+
+static int zcache_do_preload(struct tmem_pool *pool)
+{
+       struct zcache_preload *kp;
+       struct tmem_objnode *objnode;
+       struct tmem_obj *obj;
+       void *page;
+       int ret = -ENOMEM;
+
+       if (unlikely(zcache_objnode_cache == NULL))
+               goto out;
+       if (unlikely(zcache_obj_cache == NULL))
+               goto out;
+       if (!spin_trylock(&zcache_direct_reclaim_lock)) {
+               zcache_aborted_preload++;
+               goto out;
+       }
+       preempt_disable();
+       kp = &__get_cpu_var(zcache_preloads);
+       while (kp->nr < ARRAY_SIZE(kp->objnodes)) {
+               preempt_enable_no_resched();
+               objnode = kmem_cache_alloc(zcache_objnode_cache,
+                               ZCACHE_GFP_MASK);
+               if (unlikely(objnode == NULL)) {
+                       zcache_failed_alloc++;
+                       goto unlock_out;
+               }
+               preempt_disable();
+               kp = &__get_cpu_var(zcache_preloads);
+               if (kp->nr < ARRAY_SIZE(kp->objnodes))
+                       kp->objnodes[kp->nr++] = objnode;
+               else
+                       kmem_cache_free(zcache_objnode_cache, objnode);
+       }
+       preempt_enable_no_resched();
+       obj = kmem_cache_alloc(zcache_obj_cache, ZCACHE_GFP_MASK);
+       if (unlikely(obj == NULL)) {
+               zcache_failed_alloc++;
+               goto unlock_out;
+       }
+       page = (void *)__get_free_page(ZCACHE_GFP_MASK);
+       if (unlikely(page == NULL)) {
+               zcache_failed_get_free_pages++;
+               kmem_cache_free(zcache_obj_cache, obj);
+               goto unlock_out;
+       }
+       preempt_disable();
+       kp = &__get_cpu_var(zcache_preloads);
+       if (kp->obj == NULL)
+               kp->obj = obj;
+       else
+               kmem_cache_free(zcache_obj_cache, obj);
+       if (kp->page == NULL)
+               kp->page = page;
+       else
+               free_page((unsigned long)page);
+       ret = 0;
+unlock_out:
+       spin_unlock(&zcache_direct_reclaim_lock);
+out:
+       return ret;
+}
+
+static void *zcache_get_free_page(void)
+{
+       struct zcache_preload *kp;
+       void *page;
+
+       kp = &__get_cpu_var(zcache_preloads);
+       page = kp->page;
+       BUG_ON(page == NULL);
+       kp->page = NULL;
+       return page;
+}
+
+static void zcache_free_page(void *p)
+{
+       free_page((unsigned long)p);
+}
+
+/*
+ * zcache implementation for tmem host ops
+ */
+
+static struct tmem_objnode *zcache_objnode_alloc(struct tmem_pool *pool)
+{
+       struct tmem_objnode *objnode = NULL;
+       unsigned long count;
+       struct zcache_preload *kp;
+
+       kp = &__get_cpu_var(zcache_preloads);
+       if (kp->nr <= 0)
+               goto out;
+       objnode = kp->objnodes[kp->nr - 1];
+       BUG_ON(objnode == NULL);
+       kp->objnodes[kp->nr - 1] = NULL;
+       kp->nr--;
+       count = atomic_inc_return(&zcache_curr_objnode_count);
+       if (count > zcache_curr_objnode_count_max)
+               zcache_curr_objnode_count_max = count;
+out:
+       return objnode;
+}
+
+static void zcache_objnode_free(struct tmem_objnode *objnode,
+                                       struct tmem_pool *pool)
+{
+       atomic_dec(&zcache_curr_objnode_count);
+       BUG_ON(atomic_read(&zcache_curr_objnode_count) < 0);
+       kmem_cache_free(zcache_objnode_cache, objnode);
+}
+
+static struct tmem_obj *zcache_obj_alloc(struct tmem_pool *pool)
+{
+       struct tmem_obj *obj = NULL;
+       unsigned long count;
+       struct zcache_preload *kp;
+
+       kp = &__get_cpu_var(zcache_preloads);
+       obj = kp->obj;
+       BUG_ON(obj == NULL);
+       kp->obj = NULL;
+       count = atomic_inc_return(&zcache_curr_obj_count);
+       if (count > zcache_curr_obj_count_max)
+               zcache_curr_obj_count_max = count;
+       return obj;
+}
+
+static void zcache_obj_free(struct tmem_obj *obj, struct tmem_pool *pool)
+{
+       atomic_dec(&zcache_curr_obj_count);
+       BUG_ON(atomic_read(&zcache_curr_obj_count) < 0);
+       kmem_cache_free(zcache_obj_cache, obj);
+}
+
+static struct tmem_hostops zcache_hostops = {
+       .obj_alloc = zcache_obj_alloc,
+       .obj_free = zcache_obj_free,
+       .objnode_alloc = zcache_objnode_alloc,
+       .objnode_free = zcache_objnode_free,
+};
+
+/*
+ * zcache implementations for PAM page descriptor ops
+ */
+
+static atomic_t zcache_curr_eph_pampd_count = ATOMIC_INIT(0);
+static unsigned long zcache_curr_eph_pampd_count_max;
+static atomic_t zcache_curr_pers_pampd_count = ATOMIC_INIT(0);
+static unsigned long zcache_curr_pers_pampd_count_max;
+
+/* forward reference */
+static int zcache_compress(struct page *from, void **out_va, size_t *out_len);
+
+static void *zcache_pampd_create(char *data, size_t size, bool raw, int eph,
+                               struct tmem_pool *pool, struct tmem_oid *oid,
+                                uint32_t index)
+{
+       void *pampd = NULL, *cdata;
+       size_t clen;
+       int ret;
+       unsigned long count;
+       struct page *page = (struct page *)(data);
+       struct zcache_client *cli = pool->client;
+       uint16_t client_id = get_client_id_from_client(cli);
+       unsigned long zv_mean_zsize;
+       unsigned long curr_pers_pampd_count;
+       u64 total_zsize;
+
+       if (eph) {
+               ret = zcache_compress(page, &cdata, &clen);
+               if (ret == 0)
+                       goto out;
+               if (clen == 0 || clen > zbud_max_buddy_size()) {
+                       zcache_compress_poor++;
+                       goto out;
+               }
+               pampd = (void *)zbud_create(client_id, pool->pool_id, oid,
+                                               index, page, cdata, clen);
+               if (pampd != NULL) {
+                       count = atomic_inc_return(&zcache_curr_eph_pampd_count);
+                       if (count > zcache_curr_eph_pampd_count_max)
+                               zcache_curr_eph_pampd_count_max = count;
+               }
+       } else {
+               curr_pers_pampd_count =
+                       atomic_read(&zcache_curr_pers_pampd_count);
+               if (curr_pers_pampd_count >
+                   (zv_page_count_policy_percent * totalram_pages) / 100)
+                       goto out;
+               ret = zcache_compress(page, &cdata, &clen);
+               if (ret == 0)
+                       goto out;
+               /* reject if compression is too poor */
+               if (clen > zv_max_zsize) {
+                       zcache_compress_poor++;
+                       goto out;
+               }
+               /* reject if mean compression is too poor */
+               if ((clen > zv_max_mean_zsize) && (curr_pers_pampd_count > 0)) {
+                       total_zsize = xv_get_total_size_bytes(cli->xvpool);
+                       zv_mean_zsize = div_u64(total_zsize,
+                                               curr_pers_pampd_count);
+                       if (zv_mean_zsize > zv_max_mean_zsize) {
+                               zcache_mean_compress_poor++;
+                               goto out;
+                       }
+               }
+               pampd = (void *)zv_create(cli->xvpool, pool->pool_id,
+                                               oid, index, cdata, clen);
+               if (pampd == NULL)
+                       goto out;
+               count = atomic_inc_return(&zcache_curr_pers_pampd_count);
+               if (count > zcache_curr_pers_pampd_count_max)
+                       zcache_curr_pers_pampd_count_max = count;
+       }
+out:
+       return pampd;
+}
+
+/*
+ * fill the pageframe corresponding to the struct page with the data
+ * from the passed pampd
+ */
+static int zcache_pampd_get_data(char *data, size_t *bufsize, bool raw,
+                                       void *pampd, struct tmem_pool *pool,
+                                       struct tmem_oid *oid, uint32_t index)
+{
+       int ret = 0;
+
+       BUG_ON(is_ephemeral(pool));
+       zv_decompress((struct page *)(data), pampd);
+       return ret;
+}
+
+/*
+ * fill the pageframe corresponding to the struct page with the data
+ * from the passed pampd
+ */
+static int zcache_pampd_get_data_and_free(char *data, size_t *bufsize, bool raw,
+                                       void *pampd, struct tmem_pool *pool,
+                                       struct tmem_oid *oid, uint32_t index)
+{
+       int ret = 0;
+
+       BUG_ON(!is_ephemeral(pool));
+       zbud_decompress(virt_to_page(data), pampd);
+       zbud_free_and_delist((struct zbud_hdr *)pampd);
+       atomic_dec(&zcache_curr_eph_pampd_count);
+       return ret;
+}
+
+/*
+ * free the pampd and remove it from any zcache lists
+ * pampd must no longer be pointed to from any tmem data structures!
+ */
+static void zcache_pampd_free(void *pampd, struct tmem_pool *pool,
+                               struct tmem_oid *oid, uint32_t index)
+{
+       struct zcache_client *cli = pool->client;
+
+       if (is_ephemeral(pool)) {
+               zbud_free_and_delist((struct zbud_hdr *)pampd);
+               atomic_dec(&zcache_curr_eph_pampd_count);
+               BUG_ON(atomic_read(&zcache_curr_eph_pampd_count) < 0);
+       } else {
+               zv_free(cli->xvpool, (struct zv_hdr *)pampd);
+               atomic_dec(&zcache_curr_pers_pampd_count);
+               BUG_ON(atomic_read(&zcache_curr_pers_pampd_count) < 0);
+       }
+}
+
+static void zcache_pampd_free_obj(struct tmem_pool *pool, struct tmem_obj *obj)
+{
+}
+
+static void zcache_pampd_new_obj(struct tmem_obj *obj)
+{
+}
+
+static int zcache_pampd_replace_in_obj(void *pampd, struct tmem_obj *obj)
+{
+       return -1;
+}
+
+static bool zcache_pampd_is_remote(void *pampd)
+{
+       return 0;
+}
+
+static struct tmem_pamops zcache_pamops = {
+       .create = zcache_pampd_create,
+       .get_data = zcache_pampd_get_data,
+       .get_data_and_free = zcache_pampd_get_data_and_free,
+       .free = zcache_pampd_free,
+       .free_obj = zcache_pampd_free_obj,
+       .new_obj = zcache_pampd_new_obj,
+       .replace_in_obj = zcache_pampd_replace_in_obj,
+       .is_remote = zcache_pampd_is_remote,
+};
+
+/*
+ * zcache compression/decompression and related per-cpu stuff
+ */
+
+#define LZO_WORKMEM_BYTES LZO1X_1_MEM_COMPRESS
+#define LZO_DSTMEM_PAGE_ORDER 1
+static DEFINE_PER_CPU(unsigned char *, zcache_workmem);
+static DEFINE_PER_CPU(unsigned char *, zcache_dstmem);
+
+static int zcache_compress(struct page *from, void **out_va, size_t *out_len)
+{
+       int ret = 0;
+       unsigned char *dmem = __get_cpu_var(zcache_dstmem);
+       unsigned char *wmem = __get_cpu_var(zcache_workmem);
+       char *from_va;
+
+       BUG_ON(!irqs_disabled());
+       if (unlikely(dmem == NULL || wmem == NULL))
+               goto out;  /* no buffer, so can't compress */
+       from_va = kmap_atomic(from, KM_USER0);
+       mb();
+       ret = lzo1x_1_compress(from_va, PAGE_SIZE, dmem, out_len, wmem);
+       BUG_ON(ret != LZO_E_OK);
+       *out_va = dmem;
+       kunmap_atomic(from_va, KM_USER0);
+       ret = 1;
+out:
+       return ret;
+}
+
+
+static int zcache_cpu_notifier(struct notifier_block *nb,
+                               unsigned long action, void *pcpu)
+{
+       int cpu = (long)pcpu;
+       struct zcache_preload *kp;
+
+       switch (action) {
+       case CPU_UP_PREPARE:
+               per_cpu(zcache_dstmem, cpu) = (void *)__get_free_pages(
+                       GFP_KERNEL | __GFP_REPEAT,
+                       LZO_DSTMEM_PAGE_ORDER),
+               per_cpu(zcache_workmem, cpu) =
+                       kzalloc(LZO1X_MEM_COMPRESS,
+                               GFP_KERNEL | __GFP_REPEAT);
+               break;
+       case CPU_DEAD:
+       case CPU_UP_CANCELED:
+               free_pages((unsigned long)per_cpu(zcache_dstmem, cpu),
+                               LZO_DSTMEM_PAGE_ORDER);
+               per_cpu(zcache_dstmem, cpu) = NULL;
+               kfree(per_cpu(zcache_workmem, cpu));
+               per_cpu(zcache_workmem, cpu) = NULL;
+               kp = &per_cpu(zcache_preloads, cpu);
+               while (kp->nr) {
+                       kmem_cache_free(zcache_objnode_cache,
+                                       kp->objnodes[kp->nr - 1]);
+                       kp->objnodes[kp->nr - 1] = NULL;
+                       kp->nr--;
+               }
+               kmem_cache_free(zcache_obj_cache, kp->obj);
+               free_page((unsigned long)kp->page);
+               break;
+       default:
+               break;
+       }
+       return NOTIFY_OK;
+}
+
+static struct notifier_block zcache_cpu_notifier_block = {
+       .notifier_call = zcache_cpu_notifier
+};
+
+#ifdef CONFIG_SYSFS
+#define ZCACHE_SYSFS_RO(_name) \
+       static ssize_t zcache_##_name##_show(struct kobject *kobj, \
+                               struct kobj_attribute *attr, char *buf) \
+       { \
+               return sprintf(buf, "%lu\n", zcache_##_name); \
+       } \
+       static struct kobj_attribute zcache_##_name##_attr = { \
+               .attr = { .name = __stringify(_name), .mode = 0444 }, \
+               .show = zcache_##_name##_show, \
+       }
+
+#define ZCACHE_SYSFS_RO_ATOMIC(_name) \
+       static ssize_t zcache_##_name##_show(struct kobject *kobj, \
+                               struct kobj_attribute *attr, char *buf) \
+       { \
+           return sprintf(buf, "%d\n", atomic_read(&zcache_##_name)); \
+       } \
+       static struct kobj_attribute zcache_##_name##_attr = { \
+               .attr = { .name = __stringify(_name), .mode = 0444 }, \
+               .show = zcache_##_name##_show, \
+       }
+
+#define ZCACHE_SYSFS_RO_CUSTOM(_name, _func) \
+       static ssize_t zcache_##_name##_show(struct kobject *kobj, \
+                               struct kobj_attribute *attr, char *buf) \
+       { \
+           return _func(buf); \
+       } \
+       static struct kobj_attribute zcache_##_name##_attr = { \
+               .attr = { .name = __stringify(_name), .mode = 0444 }, \
+               .show = zcache_##_name##_show, \
+       }
+
+ZCACHE_SYSFS_RO(curr_obj_count_max);
+ZCACHE_SYSFS_RO(curr_objnode_count_max);
+ZCACHE_SYSFS_RO(flush_total);
+ZCACHE_SYSFS_RO(flush_found);
+ZCACHE_SYSFS_RO(flobj_total);
+ZCACHE_SYSFS_RO(flobj_found);
+ZCACHE_SYSFS_RO(failed_eph_puts);
+ZCACHE_SYSFS_RO(failed_pers_puts);
+ZCACHE_SYSFS_RO(zbud_curr_zbytes);
+ZCACHE_SYSFS_RO(zbud_cumul_zpages);
+ZCACHE_SYSFS_RO(zbud_cumul_zbytes);
+ZCACHE_SYSFS_RO(zbud_buddied_count);
+ZCACHE_SYSFS_RO(zbpg_unused_list_count);
+ZCACHE_SYSFS_RO(evicted_raw_pages);
+ZCACHE_SYSFS_RO(evicted_unbuddied_pages);
+ZCACHE_SYSFS_RO(evicted_buddied_pages);
+ZCACHE_SYSFS_RO(failed_get_free_pages);
+ZCACHE_SYSFS_RO(failed_alloc);
+ZCACHE_SYSFS_RO(put_to_flush);
+ZCACHE_SYSFS_RO(aborted_preload);
+ZCACHE_SYSFS_RO(aborted_shrink);
+ZCACHE_SYSFS_RO(compress_poor);
+ZCACHE_SYSFS_RO(mean_compress_poor);
+ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_raw_pages);
+ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_zpages);
+ZCACHE_SYSFS_RO_ATOMIC(curr_obj_count);
+ZCACHE_SYSFS_RO_ATOMIC(curr_objnode_count);
+ZCACHE_SYSFS_RO_CUSTOM(zbud_unbuddied_list_counts,
+                       zbud_show_unbuddied_list_counts);
+ZCACHE_SYSFS_RO_CUSTOM(zbud_cumul_chunk_counts,
+                       zbud_show_cumul_chunk_counts);
+ZCACHE_SYSFS_RO_CUSTOM(zv_curr_dist_counts,
+                       zv_curr_dist_counts_show);
+ZCACHE_SYSFS_RO_CUSTOM(zv_cumul_dist_counts,
+                       zv_cumul_dist_counts_show);
+
+static struct attribute *zcache_attrs[] = {
+       &zcache_curr_obj_count_attr.attr,
+       &zcache_curr_obj_count_max_attr.attr,
+       &zcache_curr_objnode_count_attr.attr,
+       &zcache_curr_objnode_count_max_attr.attr,
+       &zcache_flush_total_attr.attr,
+       &zcache_flobj_total_attr.attr,
+       &zcache_flush_found_attr.attr,
+       &zcache_flobj_found_attr.attr,
+       &zcache_failed_eph_puts_attr.attr,
+       &zcache_failed_pers_puts_attr.attr,
+       &zcache_compress_poor_attr.attr,
+       &zcache_mean_compress_poor_attr.attr,
+       &zcache_zbud_curr_raw_pages_attr.attr,
+       &zcache_zbud_curr_zpages_attr.attr,
+       &zcache_zbud_curr_zbytes_attr.attr,
+       &zcache_zbud_cumul_zpages_attr.attr,
+       &zcache_zbud_cumul_zbytes_attr.attr,
+       &zcache_zbud_buddied_count_attr.attr,
+       &zcache_zbpg_unused_list_count_attr.attr,
+       &zcache_evicted_raw_pages_attr.attr,
+       &zcache_evicted_unbuddied_pages_attr.attr,
+       &zcache_evicted_buddied_pages_attr.attr,
+       &zcache_failed_get_free_pages_attr.attr,
+       &zcache_failed_alloc_attr.attr,
+       &zcache_put_to_flush_attr.attr,
+       &zcache_aborted_preload_attr.attr,
+       &zcache_aborted_shrink_attr.attr,
+       &zcache_zbud_unbuddied_list_counts_attr.attr,
+       &zcache_zbud_cumul_chunk_counts_attr.attr,
+       &zcache_zv_curr_dist_counts_attr.attr,
+       &zcache_zv_cumul_dist_counts_attr.attr,
+       &zcache_zv_max_zsize_attr.attr,
+       &zcache_zv_max_mean_zsize_attr.attr,
+       &zcache_zv_page_count_policy_percent_attr.attr,
+       NULL,
+};
+
+static struct attribute_group zcache_attr_group = {
+       .attrs = zcache_attrs,
+       .name = "zcache",
+};
+
+#endif /* CONFIG_SYSFS */
+/*
+ * When zcache is disabled ("frozen"), pools can be created and destroyed,
+ * but all puts (and thus all other operations that require memory allocation)
+ * must fail.  If zcache is unfrozen, accepts puts, then frozen again,
+ * data consistency requires all puts while frozen to be converted into
+ * flushes.
+ */
+static bool zcache_freeze;
+
+/*
+ * zcache shrinker interface (only useful for ephemeral pages, so zbud only)
+ */
+static int shrink_zcache_memory(struct shrinker *shrink,
+                               struct shrink_control *sc)
+{
+       int ret = -1;
+       int nr = sc->nr_to_scan;
+       gfp_t gfp_mask = sc->gfp_mask;
+
+       if (nr >= 0) {
+               if (!(gfp_mask & __GFP_FS))
+                       /* does this case really need to be skipped? */
+                       goto out;
+               if (spin_trylock(&zcache_direct_reclaim_lock)) {
+                       zbud_evict_pages(nr);
+                       spin_unlock(&zcache_direct_reclaim_lock);
+               } else
+                       zcache_aborted_shrink++;
+       }
+       ret = (int)atomic_read(&zcache_zbud_curr_raw_pages);
+out:
+       return ret;
+}
+
+static struct shrinker zcache_shrinker = {
+       .shrink = shrink_zcache_memory,
+       .seeks = DEFAULT_SEEKS,
+};
+
+/*
+ * zcache shims between cleancache/frontswap ops and tmem
+ */
+
+static int zcache_put_page(int cli_id, int pool_id, struct tmem_oid *oidp,
+                               uint32_t index, struct page *page)
+{
+       struct tmem_pool *pool;
+       int ret = -1;
+
+       BUG_ON(!irqs_disabled());
+       pool = zcache_get_pool_by_id(cli_id, pool_id);
+       if (unlikely(pool == NULL))
+               goto out;
+       if (!zcache_freeze && zcache_do_preload(pool) == 0) {
+               /* preload does preempt_disable on success */
+               ret = tmem_put(pool, oidp, index, (char *)(page),
+                               PAGE_SIZE, 0, is_ephemeral(pool));
+               if (ret < 0) {
+                       if (is_ephemeral(pool))
+                               zcache_failed_eph_puts++;
+                       else
+                               zcache_failed_pers_puts++;
+               }
+               zcache_put_pool(pool);
+               preempt_enable_no_resched();
+       } else {
+               zcache_put_to_flush++;
+               if (atomic_read(&pool->obj_count) > 0)
+                       /* the put fails whether the flush succeeds or not */
+                       (void)tmem_flush_page(pool, oidp, index);
+               zcache_put_pool(pool);
+       }
+out:
+       return ret;
+}
+
+static int zcache_get_page(int cli_id, int pool_id, struct tmem_oid *oidp,
+                               uint32_t index, struct page *page)
+{
+       struct tmem_pool *pool;
+       int ret = -1;
+       unsigned long flags;
+       size_t size = PAGE_SIZE;
+
+       local_irq_save(flags);
+       pool = zcache_get_pool_by_id(cli_id, pool_id);
+       if (likely(pool != NULL)) {
+               if (atomic_read(&pool->obj_count) > 0)
+                       ret = tmem_get(pool, oidp, index, (char *)(page),
+                                       &size, 0, is_ephemeral(pool));
+               zcache_put_pool(pool);
+       }
+       local_irq_restore(flags);
+       return ret;
+}
+
+static int zcache_flush_page(int cli_id, int pool_id,
+                               struct tmem_oid *oidp, uint32_t index)
+{
+       struct tmem_pool *pool;
+       int ret = -1;
+       unsigned long flags;
+
+       local_irq_save(flags);
+       zcache_flush_total++;
+       pool = zcache_get_pool_by_id(cli_id, pool_id);
+       if (likely(pool != NULL)) {
+               if (atomic_read(&pool->obj_count) > 0)
+                       ret = tmem_flush_page(pool, oidp, index);
+               zcache_put_pool(pool);
+       }
+       if (ret >= 0)
+               zcache_flush_found++;
+       local_irq_restore(flags);
+       return ret;
+}
+
+static int zcache_flush_object(int cli_id, int pool_id,
+                               struct tmem_oid *oidp)
+{
+       struct tmem_pool *pool;
+       int ret = -1;
+       unsigned long flags;
+
+       local_irq_save(flags);
+       zcache_flobj_total++;
+       pool = zcache_get_pool_by_id(cli_id, pool_id);
+       if (likely(pool != NULL)) {
+               if (atomic_read(&pool->obj_count) > 0)
+                       ret = tmem_flush_object(pool, oidp);
+               zcache_put_pool(pool);
+       }
+       if (ret >= 0)
+               zcache_flobj_found++;
+       local_irq_restore(flags);
+       return ret;
+}
+
+static int zcache_destroy_pool(int cli_id, int pool_id)
+{
+       struct tmem_pool *pool = NULL;
+       struct zcache_client *cli = NULL;
+       int ret = -1;
+
+       if (pool_id < 0)
+               goto out;
+       if (cli_id == LOCAL_CLIENT)
+               cli = &zcache_host;
+       else if ((unsigned int)cli_id < MAX_CLIENTS)
+               cli = &zcache_clients[cli_id];
+       if (cli == NULL)
+               goto out;
+       atomic_inc(&cli->refcount);
+       pool = cli->tmem_pools[pool_id];
+       if (pool == NULL)
+               goto out;
+       cli->tmem_pools[pool_id] = NULL;
+       /* wait for pool activity on other cpus to quiesce */
+       while (atomic_read(&pool->refcount) != 0)
+               ;
+       atomic_dec(&cli->refcount);
+       local_bh_disable();
+       ret = tmem_destroy_pool(pool);
+       local_bh_enable();
+       kfree(pool);
+       pr_info("zcache: destroyed pool id=%d, cli_id=%d\n",
+                       pool_id, cli_id);
+out:
+       return ret;
+}
+
+static int zcache_new_pool(uint16_t cli_id, uint32_t flags)
+{
+       int poolid = -1;
+       struct tmem_pool *pool;
+       struct zcache_client *cli = NULL;
+
+       if (cli_id == LOCAL_CLIENT)
+               cli = &zcache_host;
+       else if ((unsigned int)cli_id < MAX_CLIENTS)
+               cli = &zcache_clients[cli_id];
+       if (cli == NULL)
+               goto out;
+       atomic_inc(&cli->refcount);
+       pool = kmalloc(sizeof(struct tmem_pool), GFP_KERNEL);
+       if (pool == NULL) {
+               pr_info("zcache: pool creation failed: out of memory\n");
+               goto out;
+       }
+
+       for (poolid = 0; poolid < MAX_POOLS_PER_CLIENT; poolid++)
+               if (cli->tmem_pools[poolid] == NULL)
+                       break;
+       if (poolid >= MAX_POOLS_PER_CLIENT) {
+               pr_info("zcache: pool creation failed: max exceeded\n");
+               kfree(pool);
+               poolid = -1;
+               goto out;
+       }
+       atomic_set(&pool->refcount, 0);
+       pool->client = cli;
+       pool->pool_id = poolid;
+       tmem_new_pool(pool, flags);
+       cli->tmem_pools[poolid] = pool;
+       pr_info("zcache: created %s tmem pool, id=%d, client=%d\n",
+               flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral",
+               poolid, cli_id);
+out:
+       if (cli != NULL)
+               atomic_dec(&cli->refcount);
+       return poolid;
+}
+
+/**********
+ * Two kernel functionalities currently can be layered on top of tmem.
+ * These are "cleancache" which is used as a second-chance cache for clean
+ * page cache pages; and "frontswap" which is used for swap pages
+ * to avoid writes to disk.  A generic "shim" is provided here for each
+ * to translate in-kernel semantics to zcache semantics.
+ */
+
+#ifdef CONFIG_CLEANCACHE
+static void zcache_cleancache_put_page(int pool_id,
+                                       struct cleancache_filekey key,
+                                       pgoff_t index, struct page *page)
+{
+       u32 ind = (u32) index;
+       struct tmem_oid oid = *(struct tmem_oid *)&key;
+
+       if (likely(ind == index))
+               (void)zcache_put_page(LOCAL_CLIENT, pool_id, &oid, index, page);
+}
+
+static int zcache_cleancache_get_page(int pool_id,
+                                       struct cleancache_filekey key,
+                                       pgoff_t index, struct page *page)
+{
+       u32 ind = (u32) index;
+       struct tmem_oid oid = *(struct tmem_oid *)&key;
+       int ret = -1;
+
+       if (likely(ind == index))
+               ret = zcache_get_page(LOCAL_CLIENT, pool_id, &oid, index, page);
+       return ret;
+}
+
+static void zcache_cleancache_flush_page(int pool_id,
+                                       struct cleancache_filekey key,
+                                       pgoff_t index)
+{
+       u32 ind = (u32) index;
+       struct tmem_oid oid = *(struct tmem_oid *)&key;
+
+       if (likely(ind == index))
+               (void)zcache_flush_page(LOCAL_CLIENT, pool_id, &oid, ind);
+}
+
+static void zcache_cleancache_flush_inode(int pool_id,
+                                       struct cleancache_filekey key)
+{
+       struct tmem_oid oid = *(struct tmem_oid *)&key;
+
+       (void)zcache_flush_object(LOCAL_CLIENT, pool_id, &oid);
+}
+
+static void zcache_cleancache_flush_fs(int pool_id)
+{
+       if (pool_id >= 0)
+               (void)zcache_destroy_pool(LOCAL_CLIENT, pool_id);
+}
+
+static int zcache_cleancache_init_fs(size_t pagesize)
+{
+       BUG_ON(sizeof(struct cleancache_filekey) !=
+                               sizeof(struct tmem_oid));
+       BUG_ON(pagesize != PAGE_SIZE);
+       return zcache_new_pool(LOCAL_CLIENT, 0);
+}
+
+static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize)
+{
+       /* shared pools are unsupported and map to private */
+       BUG_ON(sizeof(struct cleancache_filekey) !=
+                               sizeof(struct tmem_oid));
+       BUG_ON(pagesize != PAGE_SIZE);
+       return zcache_new_pool(LOCAL_CLIENT, 0);
+}
+
+static struct cleancache_ops zcache_cleancache_ops = {
+       .put_page = zcache_cleancache_put_page,
+       .get_page = zcache_cleancache_get_page,
+       .flush_page = zcache_cleancache_flush_page,
+       .flush_inode = zcache_cleancache_flush_inode,
+       .flush_fs = zcache_cleancache_flush_fs,
+       .init_shared_fs = zcache_cleancache_init_shared_fs,
+       .init_fs = zcache_cleancache_init_fs
+};
+
+struct cleancache_ops zcache_cleancache_register_ops(void)
+{
+       struct cleancache_ops old_ops =
+               cleancache_register_ops(&zcache_cleancache_ops);
+
+       return old_ops;
+}
+#endif
+
+#ifdef CONFIG_FRONTSWAP
+/* a single tmem poolid is used for all frontswap "types" (swapfiles) */
+static int zcache_frontswap_poolid = -1;
+
+/*
+ * Swizzling increases objects per swaptype, increasing tmem concurrency
+ * for heavy swaploads.  Later, larger nr_cpus -> larger SWIZ_BITS
+ */
+#define SWIZ_BITS              4
+#define SWIZ_MASK              ((1 << SWIZ_BITS) - 1)
+#define _oswiz(_type, _ind)    ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK))
+#define iswiz(_ind)            (_ind >> SWIZ_BITS)
+
+static inline struct tmem_oid oswiz(unsigned type, u32 ind)
+{
+       struct tmem_oid oid = { .oid = { 0 } };
+       oid.oid[0] = _oswiz(type, ind);
+       return oid;
+}
+
+static int zcache_frontswap_put_page(unsigned type, pgoff_t offset,
+                                  struct page *page)
+{
+       u64 ind64 = (u64)offset;
+       u32 ind = (u32)offset;
+       struct tmem_oid oid = oswiz(type, ind);
+       int ret = -1;
+       unsigned long flags;
+
+       BUG_ON(!PageLocked(page));
+       if (likely(ind64 == ind)) {
+               local_irq_save(flags);
+               ret = zcache_put_page(LOCAL_CLIENT, zcache_frontswap_poolid,
+                                       &oid, iswiz(ind), page);
+               local_irq_restore(flags);
+       }
+       return ret;
+}
+
+/* returns 0 if the page was successfully gotten from frontswap, -1 if
+ * was not present (should never happen!) */
+static int zcache_frontswap_get_page(unsigned type, pgoff_t offset,
+                                  struct page *page)
+{
+       u64 ind64 = (u64)offset;
+       u32 ind = (u32)offset;
+       struct tmem_oid oid = oswiz(type, ind);
+       int ret = -1;
+
+       BUG_ON(!PageLocked(page));
+       if (likely(ind64 == ind))
+               ret = zcache_get_page(LOCAL_CLIENT, zcache_frontswap_poolid,
+                                       &oid, iswiz(ind), page);
+       return ret;
+}
+
+/* flush a single page from frontswap */
+static void zcache_frontswap_flush_page(unsigned type, pgoff_t offset)
+{
+       u64 ind64 = (u64)offset;
+       u32 ind = (u32)offset;
+       struct tmem_oid oid = oswiz(type, ind);
+
+       if (likely(ind64 == ind))
+               (void)zcache_flush_page(LOCAL_CLIENT, zcache_frontswap_poolid,
+                                       &oid, iswiz(ind));
+}
+
+/* flush all pages from the passed swaptype */
+static void zcache_frontswap_flush_area(unsigned type)
+{
+       struct tmem_oid oid;
+       int ind;
+
+       for (ind = SWIZ_MASK; ind >= 0; ind--) {
+               oid = oswiz(type, ind);
+               (void)zcache_flush_object(LOCAL_CLIENT,
+                                               zcache_frontswap_poolid, &oid);
+       }
+}
+
+static void zcache_frontswap_init(unsigned ignored)
+{
+       /* a single tmem poolid is used for all frontswap "types" (swapfiles) */
+       if (zcache_frontswap_poolid < 0)
+               zcache_frontswap_poolid =
+                       zcache_new_pool(LOCAL_CLIENT, TMEM_POOL_PERSIST);
+}
+
+static struct frontswap_ops zcache_frontswap_ops = {
+       .put_page = zcache_frontswap_put_page,
+       .get_page = zcache_frontswap_get_page,
+       .flush_page = zcache_frontswap_flush_page,
+       .flush_area = zcache_frontswap_flush_area,
+       .init = zcache_frontswap_init
+};
+
+struct frontswap_ops zcache_frontswap_register_ops(void)
+{
+       struct frontswap_ops old_ops =
+               frontswap_register_ops(&zcache_frontswap_ops);
+
+       return old_ops;
+}
+#endif
+
+/*
+ * zcache initialization
+ * NOTE FOR NOW zcache MUST BE PROVIDED AS A KERNEL BOOT PARAMETER OR
+ * NOTHING HAPPENS!
+ */
+
+static int zcache_enabled;
+
+static int __init enable_zcache(char *s)
+{
+       zcache_enabled = 1;
+       return 1;
+}
+__setup("zcache", enable_zcache);
+
+/* allow independent dynamic disabling of cleancache and frontswap */
+
+static int use_cleancache = 1;
+
+static int __init no_cleancache(char *s)
+{
+       use_cleancache = 0;
+       return 1;
+}
+
+__setup("nocleancache", no_cleancache);
+
+static int use_frontswap = 1;
+
+static int __init no_frontswap(char *s)
+{
+       use_frontswap = 0;
+       return 1;
+}
+
+__setup("nofrontswap", no_frontswap);
+
+static int __init zcache_init(void)
+{
+       int ret = 0;
+
+#ifdef CONFIG_SYSFS
+       ret = sysfs_create_group(mm_kobj, &zcache_attr_group);
+       if (ret) {
+               pr_err("zcache: can't create sysfs\n");
+               goto out;
+       }
+#endif /* CONFIG_SYSFS */
+#if defined(CONFIG_CLEANCACHE) || defined(CONFIG_FRONTSWAP)
+       if (zcache_enabled) {
+               unsigned int cpu;
+
+               tmem_register_hostops(&zcache_hostops);
+               tmem_register_pamops(&zcache_pamops);
+               ret = register_cpu_notifier(&zcache_cpu_notifier_block);
+               if (ret) {
+                       pr_err("zcache: can't register cpu notifier\n");
+                       goto out;
+               }
+               for_each_online_cpu(cpu) {
+                       void *pcpu = (void *)(long)cpu;
+                       zcache_cpu_notifier(&zcache_cpu_notifier_block,
+                               CPU_UP_PREPARE, pcpu);
+               }
+       }
+       zcache_objnode_cache = kmem_cache_create("zcache_objnode",
+                               sizeof(struct tmem_objnode), 0, 0, NULL);
+       zcache_obj_cache = kmem_cache_create("zcache_obj",
+                               sizeof(struct tmem_obj), 0, 0, NULL);
+       ret = zcache_new_client(LOCAL_CLIENT);
+       if (ret) {
+               pr_err("zcache: can't create client\n");
+               goto out;
+       }
+#endif
+#ifdef CONFIG_CLEANCACHE
+       if (zcache_enabled && use_cleancache) {
+               struct cleancache_ops old_ops;
+
+               zbud_init();
+               register_shrinker(&zcache_shrinker);
+               old_ops = zcache_cleancache_register_ops();
+               pr_info("zcache: cleancache enabled using kernel "
+                       "transcendent memory and compression buddies\n");
+               if (old_ops.init_fs != NULL)
+                       pr_warning("zcache: cleancache_ops overridden");
+       }
+#endif
+#ifdef CONFIG_FRONTSWAP
+       if (zcache_enabled && use_frontswap) {
+               struct frontswap_ops old_ops;
+
+               old_ops = zcache_frontswap_register_ops();
+               pr_info("zcache: frontswap enabled using kernel "
+                       "transcendent memory and xvmalloc\n");
+               if (old_ops.init != NULL)
+                       pr_warning("ktmem: frontswap_ops overridden");
+       }
+#endif
+out:
+       return ret;
+}
+
+module_init(zcache_init)
diff --git a/drivers/staging/zcache/zcache.c b/drivers/staging/zcache/zcache.c

deleted file mode 100644 (file)

index 65a81a0..0000000
--- a/drivers/staging/zcache/zcache.c
+++ /dev/null
@@ -1,1996 +0,0 @@
-/*
- * zcache.c
- *
- * Copyright (c) 2010,2011, Dan Magenheimer, Oracle Corp.
- * Copyright (c) 2010,2011, Nitin Gupta
- *
- * Zcache provides an in-kernel "host implementation" for transcendent memory
- * and, thus indirectly, for cleancache and frontswap.  Zcache includes two
- * page-accessible memory [1] interfaces, both utilizing lzo1x compression:
- * 1) "compression buddies" ("zbud") is used for ephemeral pages
- * 2) xvmalloc is used for persistent pages.
- * Xvmalloc (based on the TLSF allocator) has very low fragmentation
- * so maximizes space efficiency, while zbud allows pairs (and potentially,
- * in the future, more than a pair of) compressed pages to be closely linked
- * so that reclaiming can be done via the kernel's physical-page-oriented
- * "shrinker" interface.
- *
- * [1] For a definition of page-accessible memory (aka PAM), see:
- *   http://marc.info/?l=linux-mm&m=127811271605009
- */
-
-#include <linux/cpu.h>
-#include <linux/highmem.h>
-#include <linux/list.h>
-#include <linux/lzo.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-#include <linux/atomic.h>
-#include "tmem.h"
-
-#include "../zram/xvmalloc.h" /* if built in drivers/staging */
-
-#if (!defined(CONFIG_CLEANCACHE) && !defined(CONFIG_FRONTSWAP))
-#error "zcache is useless without CONFIG_CLEANCACHE or CONFIG_FRONTSWAP"
-#endif
-#ifdef CONFIG_CLEANCACHE
-#include <linux/cleancache.h>
-#endif
-#ifdef CONFIG_FRONTSWAP
-#include <linux/frontswap.h>
-#endif
-
-#if 0
-/* this is more aggressive but may cause other problems? */
-#define ZCACHE_GFP_MASK        (GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN)
-#else
-#define ZCACHE_GFP_MASK \
-       (__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | __GFP_NOMEMALLOC)
-#endif
-
-#define MAX_POOLS_PER_CLIENT 16
-
-#define MAX_CLIENTS 16
-#define LOCAL_CLIENT ((uint16_t)-1)
-struct zcache_client {
-       struct tmem_pool *tmem_pools[MAX_POOLS_PER_CLIENT];
-       struct xv_pool *xvpool;
-       bool allocated;
-       atomic_t refcount;
-};
-
-static struct zcache_client zcache_host;
-static struct zcache_client zcache_clients[MAX_CLIENTS];
-
-static inline uint16_t get_client_id_from_client(struct zcache_client *cli)
-{
-       BUG_ON(cli == NULL);
-       if (cli == &zcache_host)
-               return LOCAL_CLIENT;
-       return cli - &zcache_clients[0];
-}
-
-static inline bool is_local_client(struct zcache_client *cli)
-{
-       return cli == &zcache_host;
-}
-
-/**********
- * Compression buddies ("zbud") provides for packing two (or, possibly
- * in the future, more) compressed ephemeral pages into a single "raw"
- * (physical) page and tracking them with data structures so that
- * the raw pages can be easily reclaimed.
- *
- * A zbud page ("zbpg") is an aligned page containing a list_head,
- * a lock, and two "zbud headers".  The remainder of the physical
- * page is divided up into aligned 64-byte "chunks" which contain
- * the compressed data for zero, one, or two zbuds.  Each zbpg
- * resides on: (1) an "unused list" if it has no zbuds; (2) a
- * "buddied" list if it is fully populated  with two zbuds; or
- * (3) one of PAGE_SIZE/64 "unbuddied" lists indexed by how many chunks
- * the one unbuddied zbud uses.  The data inside a zbpg cannot be
- * read or written unless the zbpg's lock is held.
- */
-
-#define ZBH_SENTINEL  0x43214321
-#define ZBPG_SENTINEL  0xdeadbeef
-
-#define ZBUD_MAX_BUDS 2
-
-struct zbud_hdr {
-       uint16_t client_id;
-       uint16_t pool_id;
-       struct tmem_oid oid;
-       uint32_t index;
-       uint16_t size; /* compressed size in bytes, zero means unused */
-       DECL_SENTINEL
-};
-
-struct zbud_page {
-       struct list_head bud_list;
-       spinlock_t lock;
-       struct zbud_hdr buddy[ZBUD_MAX_BUDS];
-       DECL_SENTINEL
-       /* followed by NUM_CHUNK aligned CHUNK_SIZE-byte chunks */
-};
-
-#define CHUNK_SHIFT    6
-#define CHUNK_SIZE     (1 << CHUNK_SHIFT)
-#define CHUNK_MASK     (~(CHUNK_SIZE-1))
-#define NCHUNKS                (((PAGE_SIZE - sizeof(struct zbud_page)) & \
-                               CHUNK_MASK) >> CHUNK_SHIFT)
-#define MAX_CHUNK      (NCHUNKS-1)
-
-static struct {
-       struct list_head list;
-       unsigned count;
-} zbud_unbuddied[NCHUNKS];
-/* list N contains pages with N chunks USED and NCHUNKS-N unused */
-/* element 0 is never used but optimizing that isn't worth it */
-static unsigned long zbud_cumul_chunk_counts[NCHUNKS];
-
-struct list_head zbud_buddied_list;
-static unsigned long zcache_zbud_buddied_count;
-
-/* protects the buddied list and all unbuddied lists */
-static DEFINE_SPINLOCK(zbud_budlists_spinlock);
-
-static LIST_HEAD(zbpg_unused_list);
-static unsigned long zcache_zbpg_unused_list_count;
-
-/* protects the unused page list */
-static DEFINE_SPINLOCK(zbpg_unused_list_spinlock);
-
-static atomic_t zcache_zbud_curr_raw_pages;
-static atomic_t zcache_zbud_curr_zpages;
-static unsigned long zcache_zbud_curr_zbytes;
-static unsigned long zcache_zbud_cumul_zpages;
-static unsigned long zcache_zbud_cumul_zbytes;
-static unsigned long zcache_compress_poor;
-static unsigned long zcache_mean_compress_poor;
-
-/* forward references */
-static void *zcache_get_free_page(void);
-static void zcache_free_page(void *p);
-
-/*
- * zbud helper functions
- */
-
-static inline unsigned zbud_max_buddy_size(void)
-{
-       return MAX_CHUNK << CHUNK_SHIFT;
-}
-
-static inline unsigned zbud_size_to_chunks(unsigned size)
-{
-       BUG_ON(size == 0 || size > zbud_max_buddy_size());
-       return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
-}
-
-static inline int zbud_budnum(struct zbud_hdr *zh)
-{
-       unsigned offset = (unsigned long)zh & (PAGE_SIZE - 1);
-       struct zbud_page *zbpg = NULL;
-       unsigned budnum = -1U;
-       int i;
-
-       for (i = 0; i < ZBUD_MAX_BUDS; i++)
-               if (offset == offsetof(typeof(*zbpg), buddy[i])) {
-                       budnum = i;
-                       break;
-               }
-       BUG_ON(budnum == -1U);
-       return budnum;
-}
-
-static char *zbud_data(struct zbud_hdr *zh, unsigned size)
-{
-       struct zbud_page *zbpg;
-       char *p;
-       unsigned budnum;
-
-       ASSERT_SENTINEL(zh, ZBH);
-       budnum = zbud_budnum(zh);
-       BUG_ON(size == 0 || size > zbud_max_buddy_size());
-       zbpg = container_of(zh, struct zbud_page, buddy[budnum]);
-       ASSERT_SPINLOCK(&zbpg->lock);
-       p = (char *)zbpg;
-       if (budnum == 0)
-               p += ((sizeof(struct zbud_page) + CHUNK_SIZE - 1) &
-                                                       CHUNK_MASK);
-       else if (budnum == 1)
-               p += PAGE_SIZE - ((size + CHUNK_SIZE - 1) & CHUNK_MASK);
-       return p;
-}
-
-/*
- * zbud raw page management
- */
-
-static struct zbud_page *zbud_alloc_raw_page(void)
-{
-       struct zbud_page *zbpg = NULL;
-       struct zbud_hdr *zh0, *zh1;
-       bool recycled = 0;
-
-       /* if any pages on the zbpg list, use one */
-       spin_lock(&zbpg_unused_list_spinlock);
-       if (!list_empty(&zbpg_unused_list)) {
-               zbpg = list_first_entry(&zbpg_unused_list,
-                               struct zbud_page, bud_list);
-               list_del_init(&zbpg->bud_list);
-               zcache_zbpg_unused_list_count--;
-               recycled = 1;
-       }
-       spin_unlock(&zbpg_unused_list_spinlock);
-       if (zbpg == NULL)
-               /* none on zbpg list, try to get a kernel page */
-               zbpg = zcache_get_free_page();
-       if (likely(zbpg != NULL)) {
-               INIT_LIST_HEAD(&zbpg->bud_list);
-               zh0 = &zbpg->buddy[0]; zh1 = &zbpg->buddy[1];
-               spin_lock_init(&zbpg->lock);
-               if (recycled) {
-                       ASSERT_INVERTED_SENTINEL(zbpg, ZBPG);
-                       SET_SENTINEL(zbpg, ZBPG);
-                       BUG_ON(zh0->size != 0 || tmem_oid_valid(&zh0->oid));
-                       BUG_ON(zh1->size != 0 || tmem_oid_valid(&zh1->oid));
-               } else {
-                       atomic_inc(&zcache_zbud_curr_raw_pages);
-                       INIT_LIST_HEAD(&zbpg->bud_list);
-                       SET_SENTINEL(zbpg, ZBPG);
-                       zh0->size = 0; zh1->size = 0;
-                       tmem_oid_set_invalid(&zh0->oid);
-                       tmem_oid_set_invalid(&zh1->oid);
-               }
-       }
-       return zbpg;
-}
-
-static void zbud_free_raw_page(struct zbud_page *zbpg)
-{
-       struct zbud_hdr *zh0 = &zbpg->buddy[0], *zh1 = &zbpg->buddy[1];
-
-       ASSERT_SENTINEL(zbpg, ZBPG);
-       BUG_ON(!list_empty(&zbpg->bud_list));
-       ASSERT_SPINLOCK(&zbpg->lock);
-       BUG_ON(zh0->size != 0 || tmem_oid_valid(&zh0->oid));
-       BUG_ON(zh1->size != 0 || tmem_oid_valid(&zh1->oid));
-       INVERT_SENTINEL(zbpg, ZBPG);
-       spin_unlock(&zbpg->lock);
-       spin_lock(&zbpg_unused_list_spinlock);
-       list_add(&zbpg->bud_list, &zbpg_unused_list);
-       zcache_zbpg_unused_list_count++;
-       spin_unlock(&zbpg_unused_list_spinlock);
-}
-
-/*
- * core zbud handling routines
- */
-
-static unsigned zbud_free(struct zbud_hdr *zh)
-{
-       unsigned size;
-
-       ASSERT_SENTINEL(zh, ZBH);
-       BUG_ON(!tmem_oid_valid(&zh->oid));
-       size = zh->size;
-       BUG_ON(zh->size == 0 || zh->size > zbud_max_buddy_size());
-       zh->size = 0;
-       tmem_oid_set_invalid(&zh->oid);
-       INVERT_SENTINEL(zh, ZBH);
-       zcache_zbud_curr_zbytes -= size;
-       atomic_dec(&zcache_zbud_curr_zpages);
-       return size;
-}
-
-static void zbud_free_and_delist(struct zbud_hdr *zh)
-{
-       unsigned chunks;
-       struct zbud_hdr *zh_other;
-       unsigned budnum = zbud_budnum(zh), size;
-       struct zbud_page *zbpg =
-               container_of(zh, struct zbud_page, buddy[budnum]);
-
-       spin_lock(&zbpg->lock);
-       if (list_empty(&zbpg->bud_list)) {
-               /* ignore zombie page... see zbud_evict_pages() */
-               spin_unlock(&zbpg->lock);
-               return;
-       }
-       size = zbud_free(zh);
-       ASSERT_SPINLOCK(&zbpg->lock);
-       zh_other = &zbpg->buddy[(budnum == 0) ? 1 : 0];
-       if (zh_other->size == 0) { /* was unbuddied: unlist and free */
-               chunks = zbud_size_to_chunks(size) ;
-               spin_lock(&zbud_budlists_spinlock);
-               BUG_ON(list_empty(&zbud_unbuddied[chunks].list));
-               list_del_init(&zbpg->bud_list);
-               zbud_unbuddied[chunks].count--;
-               spin_unlock(&zbud_budlists_spinlock);
-               zbud_free_raw_page(zbpg);
-       } else { /* was buddied: move remaining buddy to unbuddied list */
-               chunks = zbud_size_to_chunks(zh_other->size) ;
-               spin_lock(&zbud_budlists_spinlock);
-               list_del_init(&zbpg->bud_list);
-               zcache_zbud_buddied_count--;
-               list_add_tail(&zbpg->bud_list, &zbud_unbuddied[chunks].list);
-               zbud_unbuddied[chunks].count++;
-               spin_unlock(&zbud_budlists_spinlock);
-               spin_unlock(&zbpg->lock);
-       }
-}
-
-static struct zbud_hdr *zbud_create(uint16_t client_id, uint16_t pool_id,
-                                       struct tmem_oid *oid,
-                                       uint32_t index, struct page *page,
-                                       void *cdata, unsigned size)
-{
-       struct zbud_hdr *zh0, *zh1, *zh = NULL;
-       struct zbud_page *zbpg = NULL, *ztmp;
-       unsigned nchunks;
-       char *to;
-       int i, found_good_buddy = 0;
-
-       nchunks = zbud_size_to_chunks(size) ;
-       for (i = MAX_CHUNK - nchunks + 1; i > 0; i--) {
-               spin_lock(&zbud_budlists_spinlock);
-               if (!list_empty(&zbud_unbuddied[i].list)) {
-                       list_for_each_entry_safe(zbpg, ztmp,
-                                   &zbud_unbuddied[i].list, bud_list) {
-                               if (spin_trylock(&zbpg->lock)) {
-                                       found_good_buddy = i;
-                                       goto found_unbuddied;
-                               }
-                       }
-               }
-               spin_unlock(&zbud_budlists_spinlock);
-       }
-       /* didn't find a good buddy, try allocating a new page */
-       zbpg = zbud_alloc_raw_page();
-       if (unlikely(zbpg == NULL))
-               goto out;
-       /* ok, have a page, now compress the data before taking locks */
-       spin_lock(&zbpg->lock);
-       spin_lock(&zbud_budlists_spinlock);
-       list_add_tail(&zbpg->bud_list, &zbud_unbuddied[nchunks].list);
-       zbud_unbuddied[nchunks].count++;
-       zh = &zbpg->buddy[0];
-       goto init_zh;
-
-found_unbuddied:
-       ASSERT_SPINLOCK(&zbpg->lock);
-       zh0 = &zbpg->buddy[0]; zh1 = &zbpg->buddy[1];
-       BUG_ON(!((zh0->size == 0) ^ (zh1->size == 0)));
-       if (zh0->size != 0) { /* buddy0 in use, buddy1 is vacant */
-               ASSERT_SENTINEL(zh0, ZBH);
-               zh = zh1;
-       } else if (zh1->size != 0) { /* buddy1 in use, buddy0 is vacant */
-               ASSERT_SENTINEL(zh1, ZBH);
-               zh = zh0;
-       } else
-               BUG();
-       list_del_init(&zbpg->bud_list);
-       zbud_unbuddied[found_good_buddy].count--;
-       list_add_tail(&zbpg->bud_list, &zbud_buddied_list);
-       zcache_zbud_buddied_count++;
-
-init_zh:
-       SET_SENTINEL(zh, ZBH);
-       zh->size = size;
-       zh->index = index;
-       zh->oid = *oid;
-       zh->pool_id = pool_id;
-       zh->client_id = client_id;
-       /* can wait to copy the data until the list locks are dropped */
-       spin_unlock(&zbud_budlists_spinlock);
-
-       to = zbud_data(zh, size);
-       memcpy(to, cdata, size);
-       spin_unlock(&zbpg->lock);
-       zbud_cumul_chunk_counts[nchunks]++;
-       atomic_inc(&zcache_zbud_curr_zpages);
-       zcache_zbud_cumul_zpages++;
-       zcache_zbud_curr_zbytes += size;
-       zcache_zbud_cumul_zbytes += size;
-out:
-       return zh;
-}
-
-static int zbud_decompress(struct page *page, struct zbud_hdr *zh)
-{
-       struct zbud_page *zbpg;
-       unsigned budnum = zbud_budnum(zh);
-       size_t out_len = PAGE_SIZE;
-       char *to_va, *from_va;
-       unsigned size;
-       int ret = 0;
-
-       zbpg = container_of(zh, struct zbud_page, buddy[budnum]);
-       spin_lock(&zbpg->lock);
-       if (list_empty(&zbpg->bud_list)) {
-               /* ignore zombie page... see zbud_evict_pages() */
-               ret = -EINVAL;
-               goto out;
-       }
-       ASSERT_SENTINEL(zh, ZBH);
-       BUG_ON(zh->size == 0 || zh->size > zbud_max_buddy_size());
-       to_va = kmap_atomic(page, KM_USER0);
-       size = zh->size;
-       from_va = zbud_data(zh, size);
-       ret = lzo1x_decompress_safe(from_va, size, to_va, &out_len);
-       BUG_ON(ret != LZO_E_OK);
-       BUG_ON(out_len != PAGE_SIZE);
-       kunmap_atomic(to_va, KM_USER0);
-out:
-       spin_unlock(&zbpg->lock);
-       return ret;
-}
-
-/*
- * The following routines handle shrinking of ephemeral pages by evicting
- * pages "least valuable" first.
- */
-
-static unsigned long zcache_evicted_raw_pages;
-static unsigned long zcache_evicted_buddied_pages;
-static unsigned long zcache_evicted_unbuddied_pages;
-
-static struct tmem_pool *zcache_get_pool_by_id(uint16_t cli_id,
-                                               uint16_t poolid);
-static void zcache_put_pool(struct tmem_pool *pool);
-
-/*
- * Flush and free all zbuds in a zbpg, then free the pageframe
- */
-static void zbud_evict_zbpg(struct zbud_page *zbpg)
-{
-       struct zbud_hdr *zh;
-       int i, j;
-       uint32_t pool_id[ZBUD_MAX_BUDS], client_id[ZBUD_MAX_BUDS];
-       uint32_t index[ZBUD_MAX_BUDS];
-       struct tmem_oid oid[ZBUD_MAX_BUDS];
-       struct tmem_pool *pool;
-
-       ASSERT_SPINLOCK(&zbpg->lock);
-       BUG_ON(!list_empty(&zbpg->bud_list));
-       for (i = 0, j = 0; i < ZBUD_MAX_BUDS; i++) {
-               zh = &zbpg->buddy[i];
-               if (zh->size) {
-                       client_id[j] = zh->client_id;
-                       pool_id[j] = zh->pool_id;
-                       oid[j] = zh->oid;
-                       index[j] = zh->index;
-                       j++;
-                       zbud_free(zh);
-               }
-       }
-       spin_unlock(&zbpg->lock);
-       for (i = 0; i < j; i++) {
-               pool = zcache_get_pool_by_id(client_id[i], pool_id[i]);
-               if (pool != NULL) {
-                       tmem_flush_page(pool, &oid[i], index[i]);
-                       zcache_put_pool(pool);
-               }
-       }
-       ASSERT_SENTINEL(zbpg, ZBPG);
-       spin_lock(&zbpg->lock);
-       zbud_free_raw_page(zbpg);
-}
-
-/*
- * Free nr pages.  This code is funky because we want to hold the locks
- * protecting various lists for as short a time as possible, and in some
- * circumstances the list may change asynchronously when the list lock is
- * not held.  In some cases we also trylock not only to avoid waiting on a
- * page in use by another cpu, but also to avoid potential deadlock due to
- * lock inversion.
- */
-static void zbud_evict_pages(int nr)
-{
-       struct zbud_page *zbpg;
-       int i;
-
-       /* first try freeing any pages on unused list */
-retry_unused_list:
-       spin_lock_bh(&zbpg_unused_list_spinlock);
-       if (!list_empty(&zbpg_unused_list)) {
-               /* can't walk list here, since it may change when unlocked */
-               zbpg = list_first_entry(&zbpg_unused_list,
-                               struct zbud_page, bud_list);
-               list_del_init(&zbpg->bud_list);
-               zcache_zbpg_unused_list_count--;
-               atomic_dec(&zcache_zbud_curr_raw_pages);
-               spin_unlock_bh(&zbpg_unused_list_spinlock);
-               zcache_free_page(zbpg);
-               zcache_evicted_raw_pages++;
-               if (--nr <= 0)
-                       goto out;
-               goto retry_unused_list;
-       }
-       spin_unlock_bh(&zbpg_unused_list_spinlock);
-
-       /* now try freeing unbuddied pages, starting with least space avail */
-       for (i = 0; i < MAX_CHUNK; i++) {
-retry_unbud_list_i:
-               spin_lock_bh(&zbud_budlists_spinlock);
-               if (list_empty(&zbud_unbuddied[i].list)) {
-                       spin_unlock_bh(&zbud_budlists_spinlock);
-                       continue;
-               }
-               list_for_each_entry(zbpg, &zbud_unbuddied[i].list, bud_list) {
-                       if (unlikely(!spin_trylock(&zbpg->lock)))
-                               continue;
-                       list_del_init(&zbpg->bud_list);
-                       zbud_unbuddied[i].count--;
-                       spin_unlock(&zbud_budlists_spinlock);
-                       zcache_evicted_unbuddied_pages++;
-                       /* want budlists unlocked when doing zbpg eviction */
-                       zbud_evict_zbpg(zbpg);
-                       local_bh_enable();
-                       if (--nr <= 0)
-                               goto out;
-                       goto retry_unbud_list_i;
-               }
-               spin_unlock_bh(&zbud_budlists_spinlock);
-       }
-
-       /* as a last resort, free buddied pages */
-retry_bud_list:
-       spin_lock_bh(&zbud_budlists_spinlock);
-       if (list_empty(&zbud_buddied_list)) {
-               spin_unlock_bh(&zbud_budlists_spinlock);
-               goto out;
-       }
-       list_for_each_entry(zbpg, &zbud_buddied_list, bud_list) {
-               if (unlikely(!spin_trylock(&zbpg->lock)))
-                       continue;
-               list_del_init(&zbpg->bud_list);
-               zcache_zbud_buddied_count--;
-               spin_unlock(&zbud_budlists_spinlock);
-               zcache_evicted_buddied_pages++;
-               /* want budlists unlocked when doing zbpg eviction */
-               zbud_evict_zbpg(zbpg);
-               local_bh_enable();
-               if (--nr <= 0)
-                       goto out;
-               goto retry_bud_list;
-       }
-       spin_unlock_bh(&zbud_budlists_spinlock);
-out:
-       return;
-}
-
-static void zbud_init(void)
-{
-       int i;
-
-       INIT_LIST_HEAD(&zbud_buddied_list);
-       zcache_zbud_buddied_count = 0;
-       for (i = 0; i < NCHUNKS; i++) {
-               INIT_LIST_HEAD(&zbud_unbuddied[i].list);
-               zbud_unbuddied[i].count = 0;
-       }
-}
-
-#ifdef CONFIG_SYSFS
-/*
- * These sysfs routines show a nice distribution of how many zbpg's are
- * currently (and have ever been placed) in each unbuddied list.  It's fun
- * to watch but can probably go away before final merge.
- */
-static int zbud_show_unbuddied_list_counts(char *buf)
-{
-       int i;
-       char *p = buf;
-
-       for (i = 0; i < NCHUNKS; i++)
-               p += sprintf(p, "%u ", zbud_unbuddied[i].count);
-       return p - buf;
-}
-
-static int zbud_show_cumul_chunk_counts(char *buf)
-{
-       unsigned long i, chunks = 0, total_chunks = 0, sum_total_chunks = 0;
-       unsigned long total_chunks_lte_21 = 0, total_chunks_lte_32 = 0;
-       unsigned long total_chunks_lte_42 = 0;
-       char *p = buf;
-
-       for (i = 0; i < NCHUNKS; i++) {
-               p += sprintf(p, "%lu ", zbud_cumul_chunk_counts[i]);
-               chunks += zbud_cumul_chunk_counts[i];
-               total_chunks += zbud_cumul_chunk_counts[i];
-               sum_total_chunks += i * zbud_cumul_chunk_counts[i];
-               if (i == 21)
-                       total_chunks_lte_21 = total_chunks;
-               if (i == 32)
-                       total_chunks_lte_32 = total_chunks;
-               if (i == 42)
-                       total_chunks_lte_42 = total_chunks;
-       }
-       p += sprintf(p, "<=21:%lu <=32:%lu <=42:%lu, mean:%lu\n",
-               total_chunks_lte_21, total_chunks_lte_32, total_chunks_lte_42,
-               chunks == 0 ? 0 : sum_total_chunks / chunks);
-       return p - buf;
-}
-#endif
-
-/**********
- * This "zv" PAM implementation combines the TLSF-based xvMalloc
- * with lzo1x compression to maximize the amount of data that can
- * be packed into a physical page.
- *
- * Zv represents a PAM page with the index and object (plus a "size" value
- * necessary for decompression) immediately preceding the compressed data.
- */
-
-#define ZVH_SENTINEL  0x43214321
-
-struct zv_hdr {
-       uint32_t pool_id;
-       struct tmem_oid oid;
-       uint32_t index;
-       DECL_SENTINEL
-};
-
-/* rudimentary policy limits */
-/* total number of persistent pages may not exceed this percentage */
-static unsigned int zv_page_count_policy_percent = 75;
-/*
- * byte count defining poor compression; pages with greater zsize will be
- * rejected
- */
-static unsigned int zv_max_zsize = (PAGE_SIZE / 8) * 7;
-/*
- * byte count defining poor *mean* compression; pages with greater zsize
- * will be rejected until sufficient better-compressed pages are accepted
- * driving the man below this threshold
- */
-static unsigned int zv_max_mean_zsize = (PAGE_SIZE / 8) * 5;
-
-static unsigned long zv_curr_dist_counts[NCHUNKS];
-static unsigned long zv_cumul_dist_counts[NCHUNKS];
-
-static struct zv_hdr *zv_create(struct xv_pool *xvpool, uint32_t pool_id,
-                               struct tmem_oid *oid, uint32_t index,
-                               void *cdata, unsigned clen)
-{
-       struct page *page;
-       struct zv_hdr *zv = NULL;
-       uint32_t offset;
-       int alloc_size = clen + sizeof(struct zv_hdr);
-       int chunks = (alloc_size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT;
-       int ret;
-
-       BUG_ON(!irqs_disabled());
-       BUG_ON(chunks >= NCHUNKS);
-       ret = xv_malloc(xvpool, alloc_size,
-                       &page, &offset, ZCACHE_GFP_MASK);
-       if (unlikely(ret))
-               goto out;
-       zv_curr_dist_counts[chunks]++;
-       zv_cumul_dist_counts[chunks]++;
-       zv = kmap_atomic(page, KM_USER0) + offset;
-       zv->index = index;
-       zv->oid = *oid;
-       zv->pool_id = pool_id;
-       SET_SENTINEL(zv, ZVH);
-       memcpy((char *)zv + sizeof(struct zv_hdr), cdata, clen);
-       kunmap_atomic(zv, KM_USER0);
-out:
-       return zv;
-}
-
-static void zv_free(struct xv_pool *xvpool, struct zv_hdr *zv)
-{
-       unsigned long flags;
-       struct page *page;
-       uint32_t offset;
-       uint16_t size = xv_get_object_size(zv);
-       int chunks = (size + (CHUNK_SIZE - 1)) >> CHUNK_SHIFT;
-
-       ASSERT_SENTINEL(zv, ZVH);
-       BUG_ON(chunks >= NCHUNKS);
-       zv_curr_dist_counts[chunks]--;
-       size -= sizeof(*zv);
-       BUG_ON(size == 0);
-       INVERT_SENTINEL(zv, ZVH);
-       page = virt_to_page(zv);
-       offset = (unsigned long)zv & ~PAGE_MASK;
-       local_irq_save(flags);
-       xv_free(xvpool, page, offset);
-       local_irq_restore(flags);
-}
-
-static void zv_decompress(struct page *page, struct zv_hdr *zv)
-{
-       size_t clen = PAGE_SIZE;
-       char *to_va;
-       unsigned size;
-       int ret;
-
-       ASSERT_SENTINEL(zv, ZVH);
-       size = xv_get_object_size(zv) - sizeof(*zv);
-       BUG_ON(size == 0);
-       to_va = kmap_atomic(page, KM_USER0);
-       ret = lzo1x_decompress_safe((char *)zv + sizeof(*zv),
-                                       size, to_va, &clen);
-       kunmap_atomic(to_va, KM_USER0);
-       BUG_ON(ret != LZO_E_OK);
-       BUG_ON(clen != PAGE_SIZE);
-}
-
-#ifdef CONFIG_SYSFS
-/*
- * show a distribution of compression stats for zv pages.
- */
-
-static int zv_curr_dist_counts_show(char *buf)
-{
-       unsigned long i, n, chunks = 0, sum_total_chunks = 0;
-       char *p = buf;
-
-       for (i = 0; i < NCHUNKS; i++) {
-               n = zv_curr_dist_counts[i];
-               p += sprintf(p, "%lu ", n);
-               chunks += n;
-               sum_total_chunks += i * n;
-       }
-       p += sprintf(p, "mean:%lu\n",
-               chunks == 0 ? 0 : sum_total_chunks / chunks);
-       return p - buf;
-}
-
-static int zv_cumul_dist_counts_show(char *buf)
-{
-       unsigned long i, n, chunks = 0, sum_total_chunks = 0;
-       char *p = buf;
-
-       for (i = 0; i < NCHUNKS; i++) {
-               n = zv_cumul_dist_counts[i];
-               p += sprintf(p, "%lu ", n);
-               chunks += n;
-               sum_total_chunks += i * n;
-       }
-       p += sprintf(p, "mean:%lu\n",
-               chunks == 0 ? 0 : sum_total_chunks / chunks);
-       return p - buf;
-}
-
-/*
- * setting zv_max_zsize via sysfs causes all persistent (e.g. swap)
- * pages that don't compress to less than this value (including metadata
- * overhead) to be rejected.  We don't allow the value to get too close
- * to PAGE_SIZE.
- */
-static ssize_t zv_max_zsize_show(struct kobject *kobj,
-                                   struct kobj_attribute *attr,
-                                   char *buf)
-{
-       return sprintf(buf, "%u\n", zv_max_zsize);
-}
-
-static ssize_t zv_max_zsize_store(struct kobject *kobj,
-                                   struct kobj_attribute *attr,
-                                   const char *buf, size_t count)
-{
-       unsigned long val;
-       int err;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -EPERM;
-
-       err = strict_strtoul(buf, 10, &val);
-       if (err || (val == 0) || (val > (PAGE_SIZE / 8) * 7))
-               return -EINVAL;
-       zv_max_zsize = val;
-       return count;
-}
-
-/*
- * setting zv_max_mean_zsize via sysfs causes all persistent (e.g. swap)
- * pages that don't compress to less than this value (including metadata
- * overhead) to be rejected UNLESS the mean compression is also smaller
- * than this value.  In other words, we are load-balancing-by-zsize the
- * accepted pages.  Again, we don't allow the value to get too close
- * to PAGE_SIZE.
- */
-static ssize_t zv_max_mean_zsize_show(struct kobject *kobj,
-                                   struct kobj_attribute *attr,
-                                   char *buf)
-{
-       return sprintf(buf, "%u\n", zv_max_mean_zsize);
-}
-
-static ssize_t zv_max_mean_zsize_store(struct kobject *kobj,
-                                   struct kobj_attribute *attr,
-                                   const char *buf, size_t count)
-{
-       unsigned long val;
-       int err;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -EPERM;
-
-       err = strict_strtoul(buf, 10, &val);
-       if (err || (val == 0) || (val > (PAGE_SIZE / 8) * 7))
-               return -EINVAL;
-       zv_max_mean_zsize = val;
-       return count;
-}
-
-/*
- * setting zv_page_count_policy_percent via sysfs sets an upper bound of
- * persistent (e.g. swap) pages that will be retained according to:
- *     (zv_page_count_policy_percent * totalram_pages) / 100)
- * when that limit is reached, further puts will be rejected (until
- * some pages have been flushed).  Note that, due to compression,
- * this number may exceed 100; it defaults to 75 and we set an
- * arbitary limit of 150.  A poor choice will almost certainly result
- * in OOM's, so this value should only be changed prudently.
- */
-static ssize_t zv_page_count_policy_percent_show(struct kobject *kobj,
-                                                struct kobj_attribute *attr,
-                                                char *buf)
-{
-       return sprintf(buf, "%u\n", zv_page_count_policy_percent);
-}
-
-static ssize_t zv_page_count_policy_percent_store(struct kobject *kobj,
-                                                 struct kobj_attribute *attr,
-                                                 const char *buf, size_t count)
-{
-       unsigned long val;
-       int err;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -EPERM;
-
-       err = strict_strtoul(buf, 10, &val);
-       if (err || (val == 0) || (val > 150))
-               return -EINVAL;
-       zv_page_count_policy_percent = val;
-       return count;
-}
-
-static struct kobj_attribute zcache_zv_max_zsize_attr = {
-               .attr = { .name = "zv_max_zsize", .mode = 0644 },
-               .show = zv_max_zsize_show,
-               .store = zv_max_zsize_store,
-};
-
-static struct kobj_attribute zcache_zv_max_mean_zsize_attr = {
-               .attr = { .name = "zv_max_mean_zsize", .mode = 0644 },
-               .show = zv_max_mean_zsize_show,
-               .store = zv_max_mean_zsize_store,
-};
-
-static struct kobj_attribute zcache_zv_page_count_policy_percent_attr = {
-               .attr = { .name = "zv_page_count_policy_percent",
-                         .mode = 0644 },
-               .show = zv_page_count_policy_percent_show,
-               .store = zv_page_count_policy_percent_store,
-};
-#endif
-
-/*
- * zcache core code starts here
- */
-
-/* useful stats not collected by cleancache or frontswap */
-static unsigned long zcache_flush_total;
-static unsigned long zcache_flush_found;
-static unsigned long zcache_flobj_total;
-static unsigned long zcache_flobj_found;
-static unsigned long zcache_failed_eph_puts;
-static unsigned long zcache_failed_pers_puts;
-
-/*
- * Tmem operations assume the poolid implies the invoking client.
- * Zcache only has one client (the kernel itself): LOCAL_CLIENT.
- * RAMster has each client numbered by cluster node, and a KVM version
- * of zcache would have one client per guest and each client might
- * have a poolid==N.
- */
-static struct tmem_pool *zcache_get_pool_by_id(uint16_t cli_id, uint16_t poolid)
-{
-       struct tmem_pool *pool = NULL;
-       struct zcache_client *cli = NULL;
-
-       if (cli_id == LOCAL_CLIENT)
-               cli = &zcache_host;
-       else {
-               if (cli_id >= MAX_CLIENTS)
-                       goto out;
-               cli = &zcache_clients[cli_id];
-               if (cli == NULL)
-                       goto out;
-               atomic_inc(&cli->refcount);
-       }
-       if (poolid < MAX_POOLS_PER_CLIENT) {
-               pool = cli->tmem_pools[poolid];
-               if (pool != NULL)
-                       atomic_inc(&pool->refcount);
-       }
-out:
-       return pool;
-}
-
-static void zcache_put_pool(struct tmem_pool *pool)
-{
-       struct zcache_client *cli = NULL;
-
-       if (pool == NULL)
-               BUG();
-       cli = pool->client;
-       atomic_dec(&pool->refcount);
-       atomic_dec(&cli->refcount);
-}
-
-int zcache_new_client(uint16_t cli_id)
-{
-       struct zcache_client *cli = NULL;
-       int ret = -1;
-
-       if (cli_id == LOCAL_CLIENT)
-               cli = &zcache_host;
-       else if ((unsigned int)cli_id < MAX_CLIENTS)
-               cli = &zcache_clients[cli_id];
-       if (cli == NULL)
-               goto out;
-       if (cli->allocated)
-               goto out;
-       cli->allocated = 1;
-#ifdef CONFIG_FRONTSWAP
-       cli->xvpool = xv_create_pool();
-       if (cli->xvpool == NULL)
-               goto out;
-#endif
-       ret = 0;
-out:
-       return ret;
-}
-
-/* counters for debugging */
-static unsigned long zcache_failed_get_free_pages;
-static unsigned long zcache_failed_alloc;
-static unsigned long zcache_put_to_flush;
-static unsigned long zcache_aborted_preload;
-static unsigned long zcache_aborted_shrink;
-
-/*
- * Ensure that memory allocation requests in zcache don't result
- * in direct reclaim requests via the shrinker, which would cause
- * an infinite loop.  Maybe a GFP flag would be better?
- */
-static DEFINE_SPINLOCK(zcache_direct_reclaim_lock);
-
-/*
- * for now, used named slabs so can easily track usage; later can
- * either just use kmalloc, or perhaps add a slab-like allocator
- * to more carefully manage total memory utilization
- */
-static struct kmem_cache *zcache_objnode_cache;
-static struct kmem_cache *zcache_obj_cache;
-static atomic_t zcache_curr_obj_count = ATOMIC_INIT(0);
-static unsigned long zcache_curr_obj_count_max;
-static atomic_t zcache_curr_objnode_count = ATOMIC_INIT(0);
-static unsigned long zcache_curr_objnode_count_max;
-
-/*
- * to avoid memory allocation recursion (e.g. due to direct reclaim), we
- * preload all necessary data structures so the hostops callbacks never
- * actually do a malloc
- */
-struct zcache_preload {
-       void *page;
-       struct tmem_obj *obj;
-       int nr;
-       struct tmem_objnode *objnodes[OBJNODE_TREE_MAX_PATH];
-};
-static DEFINE_PER_CPU(struct zcache_preload, zcache_preloads) = { 0, };
-
-static int zcache_do_preload(struct tmem_pool *pool)
-{
-       struct zcache_preload *kp;
-       struct tmem_objnode *objnode;
-       struct tmem_obj *obj;
-       void *page;
-       int ret = -ENOMEM;
-
-       if (unlikely(zcache_objnode_cache == NULL))
-               goto out;
-       if (unlikely(zcache_obj_cache == NULL))
-               goto out;
-       if (!spin_trylock(&zcache_direct_reclaim_lock)) {
-               zcache_aborted_preload++;
-               goto out;
-       }
-       preempt_disable();
-       kp = &__get_cpu_var(zcache_preloads);
-       while (kp->nr < ARRAY_SIZE(kp->objnodes)) {
-               preempt_enable_no_resched();
-               objnode = kmem_cache_alloc(zcache_objnode_cache,
-                               ZCACHE_GFP_MASK);
-               if (unlikely(objnode == NULL)) {
-                       zcache_failed_alloc++;
-                       goto unlock_out;
-               }
-               preempt_disable();
-               kp = &__get_cpu_var(zcache_preloads);
-               if (kp->nr < ARRAY_SIZE(kp->objnodes))
-                       kp->objnodes[kp->nr++] = objnode;
-               else
-                       kmem_cache_free(zcache_objnode_cache, objnode);
-       }
-       preempt_enable_no_resched();
-       obj = kmem_cache_alloc(zcache_obj_cache, ZCACHE_GFP_MASK);
-       if (unlikely(obj == NULL)) {
-               zcache_failed_alloc++;
-               goto unlock_out;
-       }
-       page = (void *)__get_free_page(ZCACHE_GFP_MASK);
-       if (unlikely(page == NULL)) {
-               zcache_failed_get_free_pages++;
-               kmem_cache_free(zcache_obj_cache, obj);
-               goto unlock_out;
-       }
-       preempt_disable();
-       kp = &__get_cpu_var(zcache_preloads);
-       if (kp->obj == NULL)
-               kp->obj = obj;
-       else
-               kmem_cache_free(zcache_obj_cache, obj);
-       if (kp->page == NULL)
-               kp->page = page;
-       else
-               free_page((unsigned long)page);
-       ret = 0;
-unlock_out:
-       spin_unlock(&zcache_direct_reclaim_lock);
-out:
-       return ret;
-}
-
-static void *zcache_get_free_page(void)
-{
-       struct zcache_preload *kp;
-       void *page;
-
-       kp = &__get_cpu_var(zcache_preloads);
-       page = kp->page;
-       BUG_ON(page == NULL);
-       kp->page = NULL;
-       return page;
-}
-
-static void zcache_free_page(void *p)
-{
-       free_page((unsigned long)p);
-}
-
-/*
- * zcache implementation for tmem host ops
- */
-
-static struct tmem_objnode *zcache_objnode_alloc(struct tmem_pool *pool)
-{
-       struct tmem_objnode *objnode = NULL;
-       unsigned long count;
-       struct zcache_preload *kp;
-
-       kp = &__get_cpu_var(zcache_preloads);
-       if (kp->nr <= 0)
-               goto out;
-       objnode = kp->objnodes[kp->nr - 1];
-       BUG_ON(objnode == NULL);
-       kp->objnodes[kp->nr - 1] = NULL;
-       kp->nr--;
-       count = atomic_inc_return(&zcache_curr_objnode_count);
-       if (count > zcache_curr_objnode_count_max)
-               zcache_curr_objnode_count_max = count;
-out:
-       return objnode;
-}
-
-static void zcache_objnode_free(struct tmem_objnode *objnode,
-                                       struct tmem_pool *pool)
-{
-       atomic_dec(&zcache_curr_objnode_count);
-       BUG_ON(atomic_read(&zcache_curr_objnode_count) < 0);
-       kmem_cache_free(zcache_objnode_cache, objnode);
-}
-
-static struct tmem_obj *zcache_obj_alloc(struct tmem_pool *pool)
-{
-       struct tmem_obj *obj = NULL;
-       unsigned long count;
-       struct zcache_preload *kp;
-
-       kp = &__get_cpu_var(zcache_preloads);
-       obj = kp->obj;
-       BUG_ON(obj == NULL);
-       kp->obj = NULL;
-       count = atomic_inc_return(&zcache_curr_obj_count);
-       if (count > zcache_curr_obj_count_max)
-               zcache_curr_obj_count_max = count;
-       return obj;
-}
-
-static void zcache_obj_free(struct tmem_obj *obj, struct tmem_pool *pool)
-{
-       atomic_dec(&zcache_curr_obj_count);
-       BUG_ON(atomic_read(&zcache_curr_obj_count) < 0);
-       kmem_cache_free(zcache_obj_cache, obj);
-}
-
-static struct tmem_hostops zcache_hostops = {
-       .obj_alloc = zcache_obj_alloc,
-       .obj_free = zcache_obj_free,
-       .objnode_alloc = zcache_objnode_alloc,
-       .objnode_free = zcache_objnode_free,
-};
-
-/*
- * zcache implementations for PAM page descriptor ops
- */
-
-static atomic_t zcache_curr_eph_pampd_count = ATOMIC_INIT(0);
-static unsigned long zcache_curr_eph_pampd_count_max;
-static atomic_t zcache_curr_pers_pampd_count = ATOMIC_INIT(0);
-static unsigned long zcache_curr_pers_pampd_count_max;
-
-/* forward reference */
-static int zcache_compress(struct page *from, void **out_va, size_t *out_len);
-
-static void *zcache_pampd_create(char *data, size_t size, bool raw, int eph,
-                               struct tmem_pool *pool, struct tmem_oid *oid,
-                                uint32_t index)
-{
-       void *pampd = NULL, *cdata;
-       size_t clen;
-       int ret;
-       unsigned long count;
-       struct page *page = virt_to_page(data);
-       struct zcache_client *cli = pool->client;
-       uint16_t client_id = get_client_id_from_client(cli);
-       unsigned long zv_mean_zsize;
-       unsigned long curr_pers_pampd_count;
-
-       if (eph) {
-               ret = zcache_compress(page, &cdata, &clen);
-               if (ret == 0)
-                       goto out;
-               if (clen == 0 || clen > zbud_max_buddy_size()) {
-                       zcache_compress_poor++;
-                       goto out;
-               }
-               pampd = (void *)zbud_create(client_id, pool->pool_id, oid,
-                                               index, page, cdata, clen);
-               if (pampd != NULL) {
-                       count = atomic_inc_return(&zcache_curr_eph_pampd_count);
-                       if (count > zcache_curr_eph_pampd_count_max)
-                               zcache_curr_eph_pampd_count_max = count;
-               }
-       } else {
-               curr_pers_pampd_count =
-                       atomic_read(&zcache_curr_pers_pampd_count);
-               if (curr_pers_pampd_count >
-                   (zv_page_count_policy_percent * totalram_pages) / 100)
-                       goto out;
-               ret = zcache_compress(page, &cdata, &clen);
-               if (ret == 0)
-                       goto out;
-               /* reject if compression is too poor */
-               if (clen > zv_max_zsize) {
-                       zcache_compress_poor++;
-                       goto out;
-               }
-               /* reject if mean compression is too poor */
-               if ((clen > zv_max_mean_zsize) && (curr_pers_pampd_count > 0)) {
-                       zv_mean_zsize = xv_get_total_size_bytes(cli->xvpool) /
-                                               curr_pers_pampd_count;
-                       if (zv_mean_zsize > zv_max_mean_zsize) {
-                               zcache_mean_compress_poor++;
-                               goto out;
-                       }
-               }
-               pampd = (void *)zv_create(cli->xvpool, pool->pool_id,
-                                               oid, index, cdata, clen);
-               if (pampd == NULL)
-                       goto out;
-               count = atomic_inc_return(&zcache_curr_pers_pampd_count);
-               if (count > zcache_curr_pers_pampd_count_max)
-                       zcache_curr_pers_pampd_count_max = count;
-       }
-out:
-       return pampd;
-}
-
-/*
- * fill the pageframe corresponding to the struct page with the data
- * from the passed pampd
- */
-static int zcache_pampd_get_data(char *data, size_t *bufsize, bool raw,
-                                       void *pampd, struct tmem_pool *pool,
-                                       struct tmem_oid *oid, uint32_t index)
-{
-       int ret = 0;
-
-       BUG_ON(is_ephemeral(pool));
-       zv_decompress(virt_to_page(data), pampd);
-       return ret;
-}
-
-/*
- * fill the pageframe corresponding to the struct page with the data
- * from the passed pampd
- */
-static int zcache_pampd_get_data_and_free(char *data, size_t *bufsize, bool raw,
-                                       void *pampd, struct tmem_pool *pool,
-                                       struct tmem_oid *oid, uint32_t index)
-{
-       int ret = 0;
-
-       BUG_ON(!is_ephemeral(pool));
-       zbud_decompress(virt_to_page(data), pampd);
-       zbud_free_and_delist((struct zbud_hdr *)pampd);
-       atomic_dec(&zcache_curr_eph_pampd_count);
-       return ret;
-}
-
-/*
- * free the pampd and remove it from any zcache lists
- * pampd must no longer be pointed to from any tmem data structures!
- */
-static void zcache_pampd_free(void *pampd, struct tmem_pool *pool,
-                               struct tmem_oid *oid, uint32_t index)
-{
-       struct zcache_client *cli = pool->client;
-
-       if (is_ephemeral(pool)) {
-               zbud_free_and_delist((struct zbud_hdr *)pampd);
-               atomic_dec(&zcache_curr_eph_pampd_count);
-               BUG_ON(atomic_read(&zcache_curr_eph_pampd_count) < 0);
-       } else {
-               zv_free(cli->xvpool, (struct zv_hdr *)pampd);
-               atomic_dec(&zcache_curr_pers_pampd_count);
-               BUG_ON(atomic_read(&zcache_curr_pers_pampd_count) < 0);
-       }
-}
-
-static void zcache_pampd_free_obj(struct tmem_pool *pool, struct tmem_obj *obj)
-{
-}
-
-static void zcache_pampd_new_obj(struct tmem_obj *obj)
-{
-}
-
-static int zcache_pampd_replace_in_obj(void *pampd, struct tmem_obj *obj)
-{
-       return -1;
-}
-
-static bool zcache_pampd_is_remote(void *pampd)
-{
-       return 0;
-}
-
-static struct tmem_pamops zcache_pamops = {
-       .create = zcache_pampd_create,
-       .get_data = zcache_pampd_get_data,
-       .get_data_and_free = zcache_pampd_get_data_and_free,
-       .free = zcache_pampd_free,
-       .free_obj = zcache_pampd_free_obj,
-       .new_obj = zcache_pampd_new_obj,
-       .replace_in_obj = zcache_pampd_replace_in_obj,
-       .is_remote = zcache_pampd_is_remote,
-};
-
-/*
- * zcache compression/decompression and related per-cpu stuff
- */
-
-#define LZO_WORKMEM_BYTES LZO1X_1_MEM_COMPRESS
-#define LZO_DSTMEM_PAGE_ORDER 1
-static DEFINE_PER_CPU(unsigned char *, zcache_workmem);
-static DEFINE_PER_CPU(unsigned char *, zcache_dstmem);
-
-static int zcache_compress(struct page *from, void **out_va, size_t *out_len)
-{
-       int ret = 0;
-       unsigned char *dmem = __get_cpu_var(zcache_dstmem);
-       unsigned char *wmem = __get_cpu_var(zcache_workmem);
-       char *from_va;
-
-       BUG_ON(!irqs_disabled());
-       if (unlikely(dmem == NULL || wmem == NULL))
-               goto out;  /* no buffer, so can't compress */
-       from_va = kmap_atomic(from, KM_USER0);
-       mb();
-       ret = lzo1x_1_compress(from_va, PAGE_SIZE, dmem, out_len, wmem);
-       BUG_ON(ret != LZO_E_OK);
-       *out_va = dmem;
-       kunmap_atomic(from_va, KM_USER0);
-       ret = 1;
-out:
-       return ret;
-}
-
-
-static int zcache_cpu_notifier(struct notifier_block *nb,
-                               unsigned long action, void *pcpu)
-{
-       int cpu = (long)pcpu;
-       struct zcache_preload *kp;
-
-       switch (action) {
-       case CPU_UP_PREPARE:
-               per_cpu(zcache_dstmem, cpu) = (void *)__get_free_pages(
-                       GFP_KERNEL | __GFP_REPEAT,
-                       LZO_DSTMEM_PAGE_ORDER),
-               per_cpu(zcache_workmem, cpu) =
-                       kzalloc(LZO1X_MEM_COMPRESS,
-                               GFP_KERNEL | __GFP_REPEAT);
-               break;
-       case CPU_DEAD:
-       case CPU_UP_CANCELED:
-               free_pages((unsigned long)per_cpu(zcache_dstmem, cpu),
-                               LZO_DSTMEM_PAGE_ORDER);
-               per_cpu(zcache_dstmem, cpu) = NULL;
-               kfree(per_cpu(zcache_workmem, cpu));
-               per_cpu(zcache_workmem, cpu) = NULL;
-               kp = &per_cpu(zcache_preloads, cpu);
-               while (kp->nr) {
-                       kmem_cache_free(zcache_objnode_cache,
-                                       kp->objnodes[kp->nr - 1]);
-                       kp->objnodes[kp->nr - 1] = NULL;
-                       kp->nr--;
-               }
-               kmem_cache_free(zcache_obj_cache, kp->obj);
-               free_page((unsigned long)kp->page);
-               break;
-       default:
-               break;
-       }
-       return NOTIFY_OK;
-}
-
-static struct notifier_block zcache_cpu_notifier_block = {
-       .notifier_call = zcache_cpu_notifier
-};
-
-#ifdef CONFIG_SYSFS
-#define ZCACHE_SYSFS_RO(_name) \
-       static ssize_t zcache_##_name##_show(struct kobject *kobj, \
-                               struct kobj_attribute *attr, char *buf) \
-       { \
-               return sprintf(buf, "%lu\n", zcache_##_name); \
-       } \
-       static struct kobj_attribute zcache_##_name##_attr = { \
-               .attr = { .name = __stringify(_name), .mode = 0444 }, \
-               .show = zcache_##_name##_show, \
-       }
-
-#define ZCACHE_SYSFS_RO_ATOMIC(_name) \
-       static ssize_t zcache_##_name##_show(struct kobject *kobj, \
-                               struct kobj_attribute *attr, char *buf) \
-       { \
-           return sprintf(buf, "%d\n", atomic_read(&zcache_##_name)); \
-       } \
-       static struct kobj_attribute zcache_##_name##_attr = { \
-               .attr = { .name = __stringify(_name), .mode = 0444 }, \
-               .show = zcache_##_name##_show, \
-       }
-
-#define ZCACHE_SYSFS_RO_CUSTOM(_name, _func) \
-       static ssize_t zcache_##_name##_show(struct kobject *kobj, \
-                               struct kobj_attribute *attr, char *buf) \
-       { \
-           return _func(buf); \
-       } \
-       static struct kobj_attribute zcache_##_name##_attr = { \
-               .attr = { .name = __stringify(_name), .mode = 0444 }, \
-               .show = zcache_##_name##_show, \
-       }
-
-ZCACHE_SYSFS_RO(curr_obj_count_max);
-ZCACHE_SYSFS_RO(curr_objnode_count_max);
-ZCACHE_SYSFS_RO(flush_total);
-ZCACHE_SYSFS_RO(flush_found);
-ZCACHE_SYSFS_RO(flobj_total);
-ZCACHE_SYSFS_RO(flobj_found);
-ZCACHE_SYSFS_RO(failed_eph_puts);
-ZCACHE_SYSFS_RO(failed_pers_puts);
-ZCACHE_SYSFS_RO(zbud_curr_zbytes);
-ZCACHE_SYSFS_RO(zbud_cumul_zpages);
-ZCACHE_SYSFS_RO(zbud_cumul_zbytes);
-ZCACHE_SYSFS_RO(zbud_buddied_count);
-ZCACHE_SYSFS_RO(zbpg_unused_list_count);
-ZCACHE_SYSFS_RO(evicted_raw_pages);
-ZCACHE_SYSFS_RO(evicted_unbuddied_pages);
-ZCACHE_SYSFS_RO(evicted_buddied_pages);
-ZCACHE_SYSFS_RO(failed_get_free_pages);
-ZCACHE_SYSFS_RO(failed_alloc);
-ZCACHE_SYSFS_RO(put_to_flush);
-ZCACHE_SYSFS_RO(aborted_preload);
-ZCACHE_SYSFS_RO(aborted_shrink);
-ZCACHE_SYSFS_RO(compress_poor);
-ZCACHE_SYSFS_RO(mean_compress_poor);
-ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_raw_pages);
-ZCACHE_SYSFS_RO_ATOMIC(zbud_curr_zpages);
-ZCACHE_SYSFS_RO_ATOMIC(curr_obj_count);
-ZCACHE_SYSFS_RO_ATOMIC(curr_objnode_count);
-ZCACHE_SYSFS_RO_CUSTOM(zbud_unbuddied_list_counts,
-                       zbud_show_unbuddied_list_counts);
-ZCACHE_SYSFS_RO_CUSTOM(zbud_cumul_chunk_counts,
-                       zbud_show_cumul_chunk_counts);
-ZCACHE_SYSFS_RO_CUSTOM(zv_curr_dist_counts,
-                       zv_curr_dist_counts_show);
-ZCACHE_SYSFS_RO_CUSTOM(zv_cumul_dist_counts,
-                       zv_cumul_dist_counts_show);
-
-static struct attribute *zcache_attrs[] = {
-       &zcache_curr_obj_count_attr.attr,
-       &zcache_curr_obj_count_max_attr.attr,
-       &zcache_curr_objnode_count_attr.attr,
-       &zcache_curr_objnode_count_max_attr.attr,
-       &zcache_flush_total_attr.attr,
-       &zcache_flobj_total_attr.attr,
-       &zcache_flush_found_attr.attr,
-       &zcache_flobj_found_attr.attr,
-       &zcache_failed_eph_puts_attr.attr,
-       &zcache_failed_pers_puts_attr.attr,
-       &zcache_compress_poor_attr.attr,
-       &zcache_mean_compress_poor_attr.attr,
-       &zcache_zbud_curr_raw_pages_attr.attr,
-       &zcache_zbud_curr_zpages_attr.attr,
-       &zcache_zbud_curr_zbytes_attr.attr,
-       &zcache_zbud_cumul_zpages_attr.attr,
-       &zcache_zbud_cumul_zbytes_attr.attr,
-       &zcache_zbud_buddied_count_attr.attr,
-       &zcache_zbpg_unused_list_count_attr.attr,
-       &zcache_evicted_raw_pages_attr.attr,
-       &zcache_evicted_unbuddied_pages_attr.attr,
-       &zcache_evicted_buddied_pages_attr.attr,
-       &zcache_failed_get_free_pages_attr.attr,
-       &zcache_failed_alloc_attr.attr,
-       &zcache_put_to_flush_attr.attr,
-       &zcache_aborted_preload_attr.attr,
-       &zcache_aborted_shrink_attr.attr,
-       &zcache_zbud_unbuddied_list_counts_attr.attr,
-       &zcache_zbud_cumul_chunk_counts_attr.attr,
-       &zcache_zv_curr_dist_counts_attr.attr,
-       &zcache_zv_cumul_dist_counts_attr.attr,
-       &zcache_zv_max_zsize_attr.attr,
-       &zcache_zv_max_mean_zsize_attr.attr,
-       &zcache_zv_page_count_policy_percent_attr.attr,
-       NULL,
-};
-
-static struct attribute_group zcache_attr_group = {
-       .attrs = zcache_attrs,
-       .name = "zcache",
-};
-
-#endif /* CONFIG_SYSFS */
-/*
- * When zcache is disabled ("frozen"), pools can be created and destroyed,
- * but all puts (and thus all other operations that require memory allocation)
- * must fail.  If zcache is unfrozen, accepts puts, then frozen again,
- * data consistency requires all puts while frozen to be converted into
- * flushes.
- */
-static bool zcache_freeze;
-
-/*
- * zcache shrinker interface (only useful for ephemeral pages, so zbud only)
- */
-static int shrink_zcache_memory(struct shrinker *shrink,
-                               struct shrink_control *sc)
-{
-       int ret = -1;
-       int nr = sc->nr_to_scan;
-       gfp_t gfp_mask = sc->gfp_mask;
-
-       if (nr >= 0) {
-               if (!(gfp_mask & __GFP_FS))
-                       /* does this case really need to be skipped? */
-                       goto out;
-               if (spin_trylock(&zcache_direct_reclaim_lock)) {
-                       zbud_evict_pages(nr);
-                       spin_unlock(&zcache_direct_reclaim_lock);
-               } else
-                       zcache_aborted_shrink++;
-       }
-       ret = (int)atomic_read(&zcache_zbud_curr_raw_pages);
-out:
-       return ret;
-}
-
-static struct shrinker zcache_shrinker = {
-       .shrink = shrink_zcache_memory,
-       .seeks = DEFAULT_SEEKS,
-};
-
-/*
- * zcache shims between cleancache/frontswap ops and tmem
- */
-
-static int zcache_put_page(int cli_id, int pool_id, struct tmem_oid *oidp,
-                               uint32_t index, struct page *page)
-{
-       struct tmem_pool *pool;
-       int ret = -1;
-
-       BUG_ON(!irqs_disabled());
-       pool = zcache_get_pool_by_id(cli_id, pool_id);
-       if (unlikely(pool == NULL))
-               goto out;
-       if (!zcache_freeze && zcache_do_preload(pool) == 0) {
-               /* preload does preempt_disable on success */
-               ret = tmem_put(pool, oidp, index, page_address(page),
-                               PAGE_SIZE, 0, is_ephemeral(pool));
-               if (ret < 0) {
-                       if (is_ephemeral(pool))
-                               zcache_failed_eph_puts++;
-                       else
-                               zcache_failed_pers_puts++;
-               }
-               zcache_put_pool(pool);
-               preempt_enable_no_resched();
-       } else {
-               zcache_put_to_flush++;
-               if (atomic_read(&pool->obj_count) > 0)
-                       /* the put fails whether the flush succeeds or not */
-                       (void)tmem_flush_page(pool, oidp, index);
-               zcache_put_pool(pool);
-       }
-out:
-       return ret;
-}
-
-static int zcache_get_page(int cli_id, int pool_id, struct tmem_oid *oidp,
-                               uint32_t index, struct page *page)
-{
-       struct tmem_pool *pool;
-       int ret = -1;
-       unsigned long flags;
-       size_t size = PAGE_SIZE;
-
-       local_irq_save(flags);
-       pool = zcache_get_pool_by_id(cli_id, pool_id);
-       if (likely(pool != NULL)) {
-               if (atomic_read(&pool->obj_count) > 0)
-                       ret = tmem_get(pool, oidp, index, page_address(page),
-                                       &size, 0, is_ephemeral(pool));
-               zcache_put_pool(pool);
-       }
-       local_irq_restore(flags);
-       return ret;
-}
-
-static int zcache_flush_page(int cli_id, int pool_id,
-                               struct tmem_oid *oidp, uint32_t index)
-{
-       struct tmem_pool *pool;
-       int ret = -1;
-       unsigned long flags;
-
-       local_irq_save(flags);
-       zcache_flush_total++;
-       pool = zcache_get_pool_by_id(cli_id, pool_id);
-       if (likely(pool != NULL)) {
-               if (atomic_read(&pool->obj_count) > 0)
-                       ret = tmem_flush_page(pool, oidp, index);
-               zcache_put_pool(pool);
-       }
-       if (ret >= 0)
-               zcache_flush_found++;
-       local_irq_restore(flags);
-       return ret;
-}
-
-static int zcache_flush_object(int cli_id, int pool_id,
-                               struct tmem_oid *oidp)
-{
-       struct tmem_pool *pool;
-       int ret = -1;
-       unsigned long flags;
-
-       local_irq_save(flags);
-       zcache_flobj_total++;
-       pool = zcache_get_pool_by_id(cli_id, pool_id);
-       if (likely(pool != NULL)) {
-               if (atomic_read(&pool->obj_count) > 0)
-                       ret = tmem_flush_object(pool, oidp);
-               zcache_put_pool(pool);
-       }
-       if (ret >= 0)
-               zcache_flobj_found++;
-       local_irq_restore(flags);
-       return ret;
-}
-
-static int zcache_destroy_pool(int cli_id, int pool_id)
-{
-       struct tmem_pool *pool = NULL;
-       struct zcache_client *cli = NULL;
-       int ret = -1;
-
-       if (pool_id < 0)
-               goto out;
-       if (cli_id == LOCAL_CLIENT)
-               cli = &zcache_host;
-       else if ((unsigned int)cli_id < MAX_CLIENTS)
-               cli = &zcache_clients[cli_id];
-       if (cli == NULL)
-               goto out;
-       atomic_inc(&cli->refcount);
-       pool = cli->tmem_pools[pool_id];
-       if (pool == NULL)
-               goto out;
-       cli->tmem_pools[pool_id] = NULL;
-       /* wait for pool activity on other cpus to quiesce */
-       while (atomic_read(&pool->refcount) != 0)
-               ;
-       atomic_dec(&cli->refcount);
-       local_bh_disable();
-       ret = tmem_destroy_pool(pool);
-       local_bh_enable();
-       kfree(pool);
-       pr_info("zcache: destroyed pool id=%d, cli_id=%d\n",
-                       pool_id, cli_id);
-out:
-       return ret;
-}
-
-static int zcache_new_pool(uint16_t cli_id, uint32_t flags)
-{
-       int poolid = -1;
-       struct tmem_pool *pool;
-       struct zcache_client *cli = NULL;
-
-       if (cli_id == LOCAL_CLIENT)
-               cli = &zcache_host;
-       else if ((unsigned int)cli_id < MAX_CLIENTS)
-               cli = &zcache_clients[cli_id];
-       if (cli == NULL)
-               goto out;
-       atomic_inc(&cli->refcount);
-       pool = kmalloc(sizeof(struct tmem_pool), GFP_KERNEL);
-       if (pool == NULL) {
-               pr_info("zcache: pool creation failed: out of memory\n");
-               goto out;
-       }
-
-       for (poolid = 0; poolid < MAX_POOLS_PER_CLIENT; poolid++)
-               if (cli->tmem_pools[poolid] == NULL)
-                       break;
-       if (poolid >= MAX_POOLS_PER_CLIENT) {
-               pr_info("zcache: pool creation failed: max exceeded\n");
-               kfree(pool);
-               poolid = -1;
-               goto out;
-       }
-       atomic_set(&pool->refcount, 0);
-       pool->client = cli;
-       pool->pool_id = poolid;
-       tmem_new_pool(pool, flags);
-       cli->tmem_pools[poolid] = pool;
-       pr_info("zcache: created %s tmem pool, id=%d, client=%d\n",
-               flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral",
-               poolid, cli_id);
-out:
-       if (cli != NULL)
-               atomic_dec(&cli->refcount);
-       return poolid;
-}
-
-/**********
- * Two kernel functionalities currently can be layered on top of tmem.
- * These are "cleancache" which is used as a second-chance cache for clean
- * page cache pages; and "frontswap" which is used for swap pages
- * to avoid writes to disk.  A generic "shim" is provided here for each
- * to translate in-kernel semantics to zcache semantics.
- */
-
-#ifdef CONFIG_CLEANCACHE
-static void zcache_cleancache_put_page(int pool_id,
-                                       struct cleancache_filekey key,
-                                       pgoff_t index, struct page *page)
-{
-       u32 ind = (u32) index;
-       struct tmem_oid oid = *(struct tmem_oid *)&key;
-
-       if (likely(ind == index))
-               (void)zcache_put_page(LOCAL_CLIENT, pool_id, &oid, index, page);
-}
-
-static int zcache_cleancache_get_page(int pool_id,
-                                       struct cleancache_filekey key,
-                                       pgoff_t index, struct page *page)
-{
-       u32 ind = (u32) index;
-       struct tmem_oid oid = *(struct tmem_oid *)&key;
-       int ret = -1;
-
-       if (likely(ind == index))
-               ret = zcache_get_page(LOCAL_CLIENT, pool_id, &oid, index, page);
-       return ret;
-}
-
-static void zcache_cleancache_flush_page(int pool_id,
-                                       struct cleancache_filekey key,
-                                       pgoff_t index)
-{
-       u32 ind = (u32) index;
-       struct tmem_oid oid = *(struct tmem_oid *)&key;
-
-       if (likely(ind == index))
-               (void)zcache_flush_page(LOCAL_CLIENT, pool_id, &oid, ind);
-}
-
-static void zcache_cleancache_flush_inode(int pool_id,
-                                       struct cleancache_filekey key)
-{
-       struct tmem_oid oid = *(struct tmem_oid *)&key;
-
-       (void)zcache_flush_object(LOCAL_CLIENT, pool_id, &oid);
-}
-
-static void zcache_cleancache_flush_fs(int pool_id)
-{
-       if (pool_id >= 0)
-               (void)zcache_destroy_pool(LOCAL_CLIENT, pool_id);
-}
-
-static int zcache_cleancache_init_fs(size_t pagesize)
-{
-       BUG_ON(sizeof(struct cleancache_filekey) !=
-                               sizeof(struct tmem_oid));
-       BUG_ON(pagesize != PAGE_SIZE);
-       return zcache_new_pool(LOCAL_CLIENT, 0);
-}
-
-static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize)
-{
-       /* shared pools are unsupported and map to private */
-       BUG_ON(sizeof(struct cleancache_filekey) !=
-                               sizeof(struct tmem_oid));
-       BUG_ON(pagesize != PAGE_SIZE);
-       return zcache_new_pool(LOCAL_CLIENT, 0);
-}
-
-static struct cleancache_ops zcache_cleancache_ops = {
-       .put_page = zcache_cleancache_put_page,
-       .get_page = zcache_cleancache_get_page,
-       .flush_page = zcache_cleancache_flush_page,
-       .flush_inode = zcache_cleancache_flush_inode,
-       .flush_fs = zcache_cleancache_flush_fs,
-       .init_shared_fs = zcache_cleancache_init_shared_fs,
-       .init_fs = zcache_cleancache_init_fs
-};
-
-struct cleancache_ops zcache_cleancache_register_ops(void)
-{
-       struct cleancache_ops old_ops =
-               cleancache_register_ops(&zcache_cleancache_ops);
-
-       return old_ops;
-}
-#endif
-
-#ifdef CONFIG_FRONTSWAP
-/* a single tmem poolid is used for all frontswap "types" (swapfiles) */
-static int zcache_frontswap_poolid = -1;
-
-/*
- * Swizzling increases objects per swaptype, increasing tmem concurrency
- * for heavy swaploads.  Later, larger nr_cpus -> larger SWIZ_BITS
- */
-#define SWIZ_BITS              4
-#define SWIZ_MASK              ((1 << SWIZ_BITS) - 1)
-#define _oswiz(_type, _ind)    ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK))
-#define iswiz(_ind)            (_ind >> SWIZ_BITS)
-
-static inline struct tmem_oid oswiz(unsigned type, u32 ind)
-{
-       struct tmem_oid oid = { .oid = { 0 } };
-       oid.oid[0] = _oswiz(type, ind);
-       return oid;
-}
-
-static int zcache_frontswap_put_page(unsigned type, pgoff_t offset,
-                                  struct page *page)
-{
-       u64 ind64 = (u64)offset;
-       u32 ind = (u32)offset;
-       struct tmem_oid oid = oswiz(type, ind);
-       int ret = -1;
-       unsigned long flags;
-
-       BUG_ON(!PageLocked(page));
-       if (likely(ind64 == ind)) {
-               local_irq_save(flags);
-               ret = zcache_put_page(LOCAL_CLIENT, zcache_frontswap_poolid,
-                                       &oid, iswiz(ind), page);
-               local_irq_restore(flags);
-       }
-       return ret;
-}
-
-/* returns 0 if the page was successfully gotten from frontswap, -1 if
- * was not present (should never happen!) */
-static int zcache_frontswap_get_page(unsigned type, pgoff_t offset,
-                                  struct page *page)
-{
-       u64 ind64 = (u64)offset;
-       u32 ind = (u32)offset;
-       struct tmem_oid oid = oswiz(type, ind);
-       int ret = -1;
-
-       BUG_ON(!PageLocked(page));
-       if (likely(ind64 == ind))
-               ret = zcache_get_page(LOCAL_CLIENT, zcache_frontswap_poolid,
-                                       &oid, iswiz(ind), page);
-       return ret;
-}
-
-/* flush a single page from frontswap */
-static void zcache_frontswap_flush_page(unsigned type, pgoff_t offset)
-{
-       u64 ind64 = (u64)offset;
-       u32 ind = (u32)offset;
-       struct tmem_oid oid = oswiz(type, ind);
-
-       if (likely(ind64 == ind))
-               (void)zcache_flush_page(LOCAL_CLIENT, zcache_frontswap_poolid,
-                                       &oid, iswiz(ind));
-}
-
-/* flush all pages from the passed swaptype */
-static void zcache_frontswap_flush_area(unsigned type)
-{
-       struct tmem_oid oid;
-       int ind;
-
-       for (ind = SWIZ_MASK; ind >= 0; ind--) {
-               oid = oswiz(type, ind);
-               (void)zcache_flush_object(LOCAL_CLIENT,
-                                               zcache_frontswap_poolid, &oid);
-       }
-}
-
-static void zcache_frontswap_init(unsigned ignored)
-{
-       /* a single tmem poolid is used for all frontswap "types" (swapfiles) */
-       if (zcache_frontswap_poolid < 0)
-               zcache_frontswap_poolid =
-                       zcache_new_pool(LOCAL_CLIENT, TMEM_POOL_PERSIST);
-}
-
-static struct frontswap_ops zcache_frontswap_ops = {
-       .put_page = zcache_frontswap_put_page,
-       .get_page = zcache_frontswap_get_page,
-       .flush_page = zcache_frontswap_flush_page,
-       .flush_area = zcache_frontswap_flush_area,
-       .init = zcache_frontswap_init
-};
-
-struct frontswap_ops zcache_frontswap_register_ops(void)
-{
-       struct frontswap_ops old_ops =
-               frontswap_register_ops(&zcache_frontswap_ops);
-
-       return old_ops;
-}
-#endif
-
-/*
- * zcache initialization
- * NOTE FOR NOW zcache MUST BE PROVIDED AS A KERNEL BOOT PARAMETER OR
- * NOTHING HAPPENS!
- */
-
-static int zcache_enabled;
-
-static int __init enable_zcache(char *s)
-{
-       zcache_enabled = 1;
-       return 1;
-}
-__setup("zcache", enable_zcache);
-
-/* allow independent dynamic disabling of cleancache and frontswap */
-
-static int use_cleancache = 1;
-
-static int __init no_cleancache(char *s)
-{
-       use_cleancache = 0;
-       return 1;
-}
-
-__setup("nocleancache", no_cleancache);
-
-static int use_frontswap = 1;
-
-static int __init no_frontswap(char *s)
-{
-       use_frontswap = 0;
-       return 1;
-}
-
-__setup("nofrontswap", no_frontswap);
-
-static int __init zcache_init(void)
-{
-#ifdef CONFIG_SYSFS
-       int ret = 0;
-
-       ret = sysfs_create_group(mm_kobj, &zcache_attr_group);
-       if (ret) {
-               pr_err("zcache: can't create sysfs\n");
-               goto out;
-       }
-#endif /* CONFIG_SYSFS */
-#if defined(CONFIG_CLEANCACHE) || defined(CONFIG_FRONTSWAP)
-       if (zcache_enabled) {
-               unsigned int cpu;
-
-               tmem_register_hostops(&zcache_hostops);
-               tmem_register_pamops(&zcache_pamops);
-               ret = register_cpu_notifier(&zcache_cpu_notifier_block);
-               if (ret) {
-                       pr_err("zcache: can't register cpu notifier\n");
-                       goto out;
-               }
-               for_each_online_cpu(cpu) {
-                       void *pcpu = (void *)(long)cpu;
-                       zcache_cpu_notifier(&zcache_cpu_notifier_block,
-                               CPU_UP_PREPARE, pcpu);
-               }
-       }
-       zcache_objnode_cache = kmem_cache_create("zcache_objnode",
-                               sizeof(struct tmem_objnode), 0, 0, NULL);
-       zcache_obj_cache = kmem_cache_create("zcache_obj",
-                               sizeof(struct tmem_obj), 0, 0, NULL);
-       ret = zcache_new_client(LOCAL_CLIENT);
-       if (ret) {
-               pr_err("zcache: can't create client\n");
-               goto out;
-       }
-#endif
-#ifdef CONFIG_CLEANCACHE
-       if (zcache_enabled && use_cleancache) {
-               struct cleancache_ops old_ops;
-
-               zbud_init();
-               register_shrinker(&zcache_shrinker);
-               old_ops = zcache_cleancache_register_ops();
-               pr_info("zcache: cleancache enabled using kernel "
-                       "transcendent memory and compression buddies\n");
-               if (old_ops.init_fs != NULL)
-                       pr_warning("zcache: cleancache_ops overridden");
-       }
-#endif
-#ifdef CONFIG_FRONTSWAP
-       if (zcache_enabled && use_frontswap) {
-               struct frontswap_ops old_ops;
-
-               old_ops = zcache_frontswap_register_ops();
-               pr_info("zcache: frontswap enabled using kernel "
-                       "transcendent memory and xvmalloc\n");
-               if (old_ops.init != NULL)
-                       pr_warning("ktmem: frontswap_ops overridden");
-       }
-#endif
-out:
-       return ret;
-}
-
-module_init(zcache_init)
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c

index c24fb10de60be4912962f9649feb2e21a96d231b..6a4ea29c2f36733206bd49081152dbecf2fcc2df 100644 (file)
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -2243,7 +2243,6 @@ static int iscsit_handle_snack(
         case 0:
                 return iscsit_handle_recovery_datain_or_r2t(conn, buf,
                         hdr->itt, hdr->ttt, hdr->begrun, hdr->runlength);
-               return 0;
         case ISCSI_FLAG_SNACK_TYPE_STATUS:
                 return iscsit_handle_status_snack(conn, hdr->itt, hdr->ttt,
                         hdr->begrun, hdr->runlength);
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c

index f095e65b1ccf401a2a4b951d5e8c463827dcc57f..f1643dbf6a92923d9a55fcc9732ded891a7afaa6 100644 (file)
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -268,7 +268,7 @@ struct se_tpg_np *lio_target_call_addnptotpg(
                                 ISCSI_TCP);
         if (IS_ERR(tpg_np)) {
                 iscsit_put_tpg(tpg);
-               return ERR_PTR(PTR_ERR(tpg_np));
+               return ERR_CAST(tpg_np);
         }
         pr_debug("LIO_Target_ConfigFS: addnptotpg done!\n");
  
@@ -1285,7 +1285,7 @@ struct se_wwn *lio_target_call_coreaddtiqn(
  
         tiqn = iscsit_add_tiqn((unsigned char *)name);
         if (IS_ERR(tiqn))
-               return ERR_PTR(PTR_ERR(tiqn));
+               return ERR_CAST(tiqn);
         /*
          * Setup struct iscsi_wwn_stat_grps for se_wwn->fabric_stat_group.
          */
diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c

index 980650792cf699de8de826722c0973752ed79cbc..c4c68da3e5004b3fa39eeb71829bbefab4e38632 100644 (file)
--- a/drivers/target/iscsi/iscsi_target_erl1.c
+++ b/drivers/target/iscsi/iscsi_target_erl1.c
@@ -834,7 +834,7 @@ static int iscsit_attach_ooo_cmdsn(
                          */
                         list_for_each_entry(ooo_tmp, &sess->sess_ooo_cmdsn_list,
                                                 ooo_list) {
-                               while (ooo_tmp->cmdsn < ooo_cmdsn->cmdsn)
+                               if (ooo_tmp->cmdsn < ooo_cmdsn->cmdsn)
                                         continue;
  
                                 list_add(&ooo_cmdsn->ooo_list,
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c

index bcaf82f470375e59caa6875da5c92e9f912730c5..daad362a93cecebeca5c9bcce26233da3b202df1 100644 (file)
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -1013,19 +1013,9 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
                                         ISCSI_LOGIN_STATUS_TARGET_ERROR);
                         goto new_sess_out;
                 }
-#if 0
-               if (!iscsi_ntop6((const unsigned char *)
-                               &sock_in6.sin6_addr.in6_u,
-                               (char *)&conn->ipv6_login_ip[0],
-                               IPV6_ADDRESS_SPACE)) {
-                       pr_err("iscsi_ntop6() failed\n");
-                       iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
-                                       ISCSI_LOGIN_STATUS_TARGET_ERROR);
-                       goto new_sess_out;
-               }
-#else
-               pr_debug("Skipping iscsi_ntop6()\n");
-#endif
+               snprintf(conn->login_ip, sizeof(conn->login_ip), "%pI6c",
+                               &sock_in6.sin6_addr.in6_u);
+               conn->login_port = ntohs(sock_in6.sin6_port);
         } else {
                 memset(&sock_in, 0, sizeof(struct sockaddr_in));
  
diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c

index 252e246cf51e54b8ce5babe19507d32d1f3284b7..5b773160200fd0f6eb8f510bd8b9ed212d46ef99 100644 (file)
--- a/drivers/target/iscsi/iscsi_target_parameters.c
+++ b/drivers/target/iscsi/iscsi_target_parameters.c
@@ -545,13 +545,13 @@ int iscsi_copy_param_list(
         struct iscsi_param_list *src_param_list,
         int leading)
  {
-       struct iscsi_param *new_param = NULL, *param = NULL;
+       struct iscsi_param *param = NULL;
+       struct iscsi_param *new_param = NULL;
         struct iscsi_param_list *param_list = NULL;
  
         param_list = kzalloc(sizeof(struct iscsi_param_list), GFP_KERNEL);
         if (!param_list) {
-               pr_err("Unable to allocate memory for"
-                               " struct iscsi_param_list.\n");
+               pr_err("Unable to allocate memory for struct iscsi_param_list.\n");
                 goto err_out;
         }
         INIT_LIST_HEAD(&param_list->param_list);
@@ -567,8 +567,17 @@ int iscsi_copy_param_list(
  
                 new_param = kzalloc(sizeof(struct iscsi_param), GFP_KERNEL);
                 if (!new_param) {
-                       pr_err("Unable to allocate memory for"
-                               " struct iscsi_param.\n");
+                       pr_err("Unable to allocate memory for struct iscsi_param.\n");
+                       goto err_out;
+               }
+
+               new_param->name = kstrdup(param->name, GFP_KERNEL);
+               new_param->value = kstrdup(param->value, GFP_KERNEL);
+               if (!new_param->value || !new_param->name) {
+                       kfree(new_param->value);
+                       kfree(new_param->name);
+                       kfree(new_param);
+                       pr_err("Unable to allocate memory for parameter name/value.\n");
                         goto err_out;
                 }
  
@@ -580,32 +589,12 @@ int iscsi_copy_param_list(
                 new_param->use = param->use;
                 new_param->type_range = param->type_range;
  
-               new_param->name = kzalloc(strlen(param->name) + 1, GFP_KERNEL);
-               if (!new_param->name) {
-                       pr_err("Unable to allocate memory for"
-                               " parameter name.\n");
-                       goto err_out;
-               }
-
-               new_param->value = kzalloc(strlen(param->value) + 1,
-                               GFP_KERNEL);
-               if (!new_param->value) {
-                       pr_err("Unable to allocate memory for"
-                               " parameter value.\n");
-                       goto err_out;
-               }
-
-               memcpy(new_param->name, param->name, strlen(param->name));
-               new_param->name[strlen(param->name)] = '\0';
-               memcpy(new_param->value, param->value, strlen(param->value));
-               new_param->value[strlen(param->value)] = '\0';
-
                 list_add_tail(&new_param->p_list, &param_list->param_list);
         }
  
-       if (!list_empty(&param_list->param_list))
+       if (!list_empty(&param_list->param_list)) {
                 *dst_param_list = param_list;
-       else {
+       } else {
                 pr_err("No parameters allocated.\n");
                 goto err_out;
         }
@@ -1441,7 +1430,7 @@ static int iscsi_enforce_integrity_rules(
         u8 DataSequenceInOrder = 0;
         u8 ErrorRecoveryLevel = 0, SessionType = 0;
         u8 IFMarker = 0, OFMarker = 0;
-       u8 IFMarkInt_Reject = 0, OFMarkInt_Reject = 0;
+       u8 IFMarkInt_Reject = 1, OFMarkInt_Reject = 1;
         u32 FirstBurstLength = 0, MaxBurstLength = 0;
         struct iscsi_param *param = NULL;
  
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c

index a1acb0167902cd20d1a29de688c17d0b0e202139..f00137f377b295f5b1fc0c0873c03c6e60b81e43 100644 (file)
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -243,7 +243,7 @@ struct iscsi_cmd *iscsit_allocate_se_cmd_for_tmr(
         if (!cmd->tmr_req) {
                 pr_err("Unable to allocate memory for"
                         " Task Management command!\n");
-               return NULL;
+               goto out;
         }
         /*
          * TASK_REASSIGN for ERL=2 / connection stays inside of
@@ -298,8 +298,6 @@ struct iscsi_cmd *iscsit_allocate_se_cmd_for_tmr(
         return cmd;
  out:
         iscsit_release_cmd(cmd);
-       if (se_cmd)
-               transport_free_se_cmd(se_cmd);
         return NULL;
  }
  
@@ -876,40 +874,6 @@ void iscsit_inc_session_usage_count(struct iscsi_session *sess)
         spin_unlock_bh(&sess->session_usage_lock);
  }
  
-/*
- *     Used before iscsi_do[rx,tx]_data() to determine iov and [rx,tx]_marker
- *     array counts needed for sync and steering.
- */
-static int iscsit_determine_sync_and_steering_counts(
-       struct iscsi_conn *conn,
-       struct iscsi_data_count *count)
-{
-       u32 length = count->data_length;
-       u32 marker, markint;
-
-       count->sync_and_steering = 1;
-
-       marker = (count->type == ISCSI_RX_DATA) ?
-                       conn->of_marker : conn->if_marker;
-       markint = (count->type == ISCSI_RX_DATA) ?
-                       (conn->conn_ops->OFMarkInt * 4) :
-                       (conn->conn_ops->IFMarkInt * 4);
-       count->ss_iov_count = count->iov_count;
-
-       while (length > 0) {
-               if (length >= marker) {
-                       count->ss_iov_count += 3;
-                       count->ss_marker_count += 2;
-
-                       length -= marker;
-                       marker = markint;
-               } else
-                       length = 0;
-       }
-
-       return 0;
-}
-
  /*
   *     Setup conn->if_marker and conn->of_marker values based upon
   *     the initial marker-less interval. (see iSCSI v19 A.2)
@@ -1292,7 +1256,7 @@ int iscsit_fe_sendpage_sg(
         struct kvec iov;
         u32 tx_hdr_size, data_len;
         u32 offset = cmd->first_data_sg_off;
-       int tx_sent;
+       int tx_sent, iov_off;
  
  send_hdr:
         tx_hdr_size = ISCSI_HDR_LEN;
@@ -1312,9 +1276,19 @@ send_hdr:
         }
  
         data_len = cmd->tx_size - tx_hdr_size - cmd->padding;
-       if (conn->conn_ops->DataDigest)
+       /*
+        * Set iov_off used by padding and data digest tx_data() calls below
+        * in order to determine proper offset into cmd->iov_data[]
+        */
+       if (conn->conn_ops->DataDigest) {
                 data_len -= ISCSI_CRC_LEN;
-
+               if (cmd->padding)
+                       iov_off = (cmd->iov_data_count - 2);
+               else
+                       iov_off = (cmd->iov_data_count - 1);
+       } else {
+               iov_off = (cmd->iov_data_count - 1);
+       }
         /*
          * Perform sendpage() for each page in the scatterlist
          */
@@ -1343,8 +1317,7 @@ send_pg:
  
  send_padding:
         if (cmd->padding) {
-               struct kvec *iov_p =
-                       &cmd->iov_data[cmd->iov_data_count-1];
+               struct kvec *iov_p = &cmd->iov_data[iov_off++];
  
                 tx_sent = tx_data(conn, iov_p, 1, cmd->padding);
                 if (cmd->padding != tx_sent) {
@@ -1358,8 +1331,7 @@ send_padding:
  
  send_datacrc:
         if (conn->conn_ops->DataDigest) {
-               struct kvec *iov_d =
-                       &cmd->iov_data[cmd->iov_data_count];
+               struct kvec *iov_d = &cmd->iov_data[iov_off];
  
                 tx_sent = tx_data(conn, iov_d, 1, ISCSI_CRC_LEN);
                 if (ISCSI_CRC_LEN != tx_sent) {
@@ -1433,8 +1405,7 @@ static int iscsit_do_rx_data(
         struct iscsi_data_count *count)
  {
         int data = count->data_length, rx_loop = 0, total_rx = 0, iov_len;
-       u32 rx_marker_val[count->ss_marker_count], rx_marker_iov = 0;
-       struct kvec iov[count->ss_iov_count], *iov_p;
+       struct kvec *iov_p;
         struct msghdr msg;
  
         if (!conn || !conn->sock || !conn->conn_ops)
@@ -1442,93 +1413,8 @@ static int iscsit_do_rx_data(
  
         memset(&msg, 0, sizeof(struct msghdr));
  
-       if (count->sync_and_steering) {
-               int size = 0;
-               u32 i, orig_iov_count = 0;
-               u32 orig_iov_len = 0, orig_iov_loc = 0;
-               u32 iov_count = 0, per_iov_bytes = 0;
-               u32 *rx_marker, old_rx_marker = 0;
-               struct kvec *iov_record;
-
-               memset(&rx_marker_val, 0,
-                               count->ss_marker_count * sizeof(u32));
-               memset(&iov, 0, count->ss_iov_count * sizeof(struct kvec));
-
-               iov_record = count->iov;
-               orig_iov_count = count->iov_count;
-               rx_marker = &conn->of_marker;
-
-               i = 0;
-               size = data;
-               orig_iov_len = iov_record[orig_iov_loc].iov_len;
-               while (size > 0) {
-                       pr_debug("rx_data: #1 orig_iov_len %u,"
-                       " orig_iov_loc %u\n", orig_iov_len, orig_iov_loc);
-                       pr_debug("rx_data: #2 rx_marker %u, size"
-                               " %u\n", *rx_marker, size);
-
-                       if (orig_iov_len >= *rx_marker) {
-                               iov[iov_count].iov_len = *rx_marker;
-                               iov[iov_count++].iov_base =
-                                       (iov_record[orig_iov_loc].iov_base +
-                                               per_iov_bytes);
-
-                               iov[iov_count].iov_len = (MARKER_SIZE / 2);
-                               iov[iov_count++].iov_base =
-                                       &rx_marker_val[rx_marker_iov++];
-                               iov[iov_count].iov_len = (MARKER_SIZE / 2);
-                               iov[iov_count++].iov_base =
-                                       &rx_marker_val[rx_marker_iov++];
-                               old_rx_marker = *rx_marker;
-
-                               /*
-                                * OFMarkInt is in 32-bit words.
-                                */
-                               *rx_marker = (conn->conn_ops->OFMarkInt * 4);
-                               size -= old_rx_marker;
-                               orig_iov_len -= old_rx_marker;
-                               per_iov_bytes += old_rx_marker;
-
-                               pr_debug("rx_data: #3 new_rx_marker"
-                                       " %u, size %u\n", *rx_marker, size);
-                       } else {
-                               iov[iov_count].iov_len = orig_iov_len;
-                               iov[iov_count++].iov_base =
-                                       (iov_record[orig_iov_loc].iov_base +
-                                               per_iov_bytes);
-
-                               per_iov_bytes = 0;
-                               *rx_marker -= orig_iov_len;
-                               size -= orig_iov_len;
-
-                               if (size)
-                                       orig_iov_len =
-                                       iov_record[++orig_iov_loc].iov_len;
-
-                               pr_debug("rx_data: #4 new_rx_marker"
-                                       " %u, size %u\n", *rx_marker, size);
-                       }
-               }
-               data += (rx_marker_iov * (MARKER_SIZE / 2));
-
-               iov_p   = &iov[0];
-               iov_len = iov_count;
-
-               if (iov_count > count->ss_iov_count) {
-                       pr_err("iov_count: %d, count->ss_iov_count:"
-                               " %d\n", iov_count, count->ss_iov_count);
-                       return -1;
-               }
-               if (rx_marker_iov > count->ss_marker_count) {
-                       pr_err("rx_marker_iov: %d, count->ss_marker"
-                               "_count: %d\n", rx_marker_iov,
-                               count->ss_marker_count);
-                       return -1;
-               }
-       } else {
-               iov_p = count->iov;
-               iov_len = count->iov_count;
-       }
+       iov_p = count->iov;
+       iov_len = count->iov_count;
  
         while (total_rx < data) {
                 rx_loop = kernel_recvmsg(conn->sock, &msg, iov_p, iov_len,
@@ -1543,16 +1429,6 @@ static int iscsit_do_rx_data(
                                 rx_loop, total_rx, data);
         }
  
-       if (count->sync_and_steering) {
-               int j;
-               for (j = 0; j < rx_marker_iov; j++) {
-                       pr_debug("rx_data: #5 j: %d, offset: %d\n",
-                               j, rx_marker_val[j]);
-                       conn->of_marker_offset = rx_marker_val[j];
-               }
-               total_rx -= (rx_marker_iov * (MARKER_SIZE / 2));
-       }
-
         return total_rx;
  }
  
@@ -1561,8 +1437,7 @@ static int iscsit_do_tx_data(
         struct iscsi_data_count *count)
  {
         int data = count->data_length, total_tx = 0, tx_loop = 0, iov_len;
-       u32 tx_marker_val[count->ss_marker_count], tx_marker_iov = 0;
-       struct kvec iov[count->ss_iov_count], *iov_p;
+       struct kvec *iov_p;
         struct msghdr msg;
  
         if (!conn || !conn->sock || !conn->conn_ops)
@@ -1575,98 +1450,8 @@ static int iscsit_do_tx_data(
  
         memset(&msg, 0, sizeof(struct msghdr));
  
-       if (count->sync_and_steering) {
-               int size = 0;
-               u32 i, orig_iov_count = 0;
-               u32 orig_iov_len = 0, orig_iov_loc = 0;
-               u32 iov_count = 0, per_iov_bytes = 0;
-               u32 *tx_marker, old_tx_marker = 0;
-               struct kvec *iov_record;
-
-               memset(&tx_marker_val, 0,
-                       count->ss_marker_count * sizeof(u32));
-               memset(&iov, 0, count->ss_iov_count * sizeof(struct kvec));
-
-               iov_record = count->iov;
-               orig_iov_count = count->iov_count;
-               tx_marker = &conn->if_marker;
-
-               i = 0;
-               size = data;
-               orig_iov_len = iov_record[orig_iov_loc].iov_len;
-               while (size > 0) {
-                       pr_debug("tx_data: #1 orig_iov_len %u,"
-                       " orig_iov_loc %u\n", orig_iov_len, orig_iov_loc);
-                       pr_debug("tx_data: #2 tx_marker %u, size"
-                               " %u\n", *tx_marker, size);
-
-                       if (orig_iov_len >= *tx_marker) {
-                               iov[iov_count].iov_len = *tx_marker;
-                               iov[iov_count++].iov_base =
-                                       (iov_record[orig_iov_loc].iov_base +
-                                               per_iov_bytes);
-
-                               tx_marker_val[tx_marker_iov] =
-                                               (size - *tx_marker);
-                               iov[iov_count].iov_len = (MARKER_SIZE / 2);
-                               iov[iov_count++].iov_base =
-                                       &tx_marker_val[tx_marker_iov++];
-                               iov[iov_count].iov_len = (MARKER_SIZE / 2);
-                               iov[iov_count++].iov_base =
-                                       &tx_marker_val[tx_marker_iov++];
-                               old_tx_marker = *tx_marker;
-
-                               /*
-                                * IFMarkInt is in 32-bit words.
-                                */
-                               *tx_marker = (conn->conn_ops->IFMarkInt * 4);
-                               size -= old_tx_marker;
-                               orig_iov_len -= old_tx_marker;
-                               per_iov_bytes += old_tx_marker;
-
-                               pr_debug("tx_data: #3 new_tx_marker"
-                                       " %u, size %u\n", *tx_marker, size);
-                               pr_debug("tx_data: #4 offset %u\n",
-                                       tx_marker_val[tx_marker_iov-1]);
-                       } else {
-                               iov[iov_count].iov_len = orig_iov_len;
-                               iov[iov_count++].iov_base
-                                       = (iov_record[orig_iov_loc].iov_base +
-                                               per_iov_bytes);
-
-                               per_iov_bytes = 0;
-                               *tx_marker -= orig_iov_len;
-                               size -= orig_iov_len;
-
-                               if (size)
-                                       orig_iov_len =
-                                       iov_record[++orig_iov_loc].iov_len;
-
-                               pr_debug("tx_data: #5 new_tx_marker"
-                                       " %u, size %u\n", *tx_marker, size);
-                       }
-               }
-
-               data += (tx_marker_iov * (MARKER_SIZE / 2));
-
-               iov_p = &iov[0];
-               iov_len = iov_count;
-
-               if (iov_count > count->ss_iov_count) {
-                       pr_err("iov_count: %d, count->ss_iov_count:"
-                               " %d\n", iov_count, count->ss_iov_count);
-                       return -1;
-               }
-               if (tx_marker_iov > count->ss_marker_count) {
-                       pr_err("tx_marker_iov: %d, count->ss_marker"
-                               "_count: %d\n", tx_marker_iov,
-                               count->ss_marker_count);
-                       return -1;
-               }
-       } else {
-               iov_p = count->iov;
-               iov_len = count->iov_count;
-       }
+       iov_p = count->iov;
+       iov_len = count->iov_count;
  
         while (total_tx < data) {
                 tx_loop = kernel_sendmsg(conn->sock, &msg, iov_p, iov_len,
@@ -1681,9 +1466,6 @@ static int iscsit_do_tx_data(
                                         tx_loop, total_tx, data);
         }
  
-       if (count->sync_and_steering)
-               total_tx -= (tx_marker_iov * (MARKER_SIZE / 2));
-
         return total_tx;
  }
  
@@ -1704,12 +1486,6 @@ int rx_data(
         c.data_length = data;
         c.type = ISCSI_RX_DATA;
  
-       if (conn->conn_ops->OFMarker &&
-          (conn->conn_state >= TARG_CONN_STATE_LOGGED_IN)) {
-               if (iscsit_determine_sync_and_steering_counts(conn, &c) < 0)
-                       return -1;
-       }
-
         return iscsit_do_rx_data(conn, &c);
  }
  
@@ -1730,12 +1506,6 @@ int tx_data(
         c.data_length = data;
         c.type = ISCSI_TX_DATA;
  
-       if (conn->conn_ops->IFMarker &&
-          (conn->conn_state >= TARG_CONN_STATE_LOGGED_IN)) {
-               if (iscsit_determine_sync_and_steering_counts(conn, &c) < 0)
-                       return -1;
-       }
-
         return iscsit_do_tx_data(conn, &c);
  }
  
diff --git a/drivers/target/target_core_cdb.c b/drivers/target/target_core_cdb.c

index 8ae09a1bdf74a8744b4884df7cddb792af6f8f0f..f04d4ef99dca00893761a3fe7c3b93c9dd6ba519 100644 (file)
--- a/drivers/target/target_core_cdb.c
+++ b/drivers/target/target_core_cdb.c
@@ -24,6 +24,7 @@
   */
  
  #include <linux/kernel.h>
+#include <linux/ctype.h>
  #include <asm/unaligned.h>
  #include <scsi/scsi.h>
  
@@ -67,6 +68,7 @@ target_emulate_inquiry_std(struct se_cmd *cmd)
  {
         struct se_lun *lun = cmd->se_lun;
         struct se_device *dev = cmd->se_dev;
+       struct se_portal_group *tpg = lun->lun_sep->sep_tpg;
         unsigned char *buf;
  
         /*
@@ -81,9 +83,13 @@ target_emulate_inquiry_std(struct se_cmd *cmd)
  
         buf = transport_kmap_first_data_page(cmd);
  
-       buf[0] = dev->transport->get_device_type(dev);
-       if (buf[0] == TYPE_TAPE)
-               buf[1] = 0x80;
+       if (dev == tpg->tpg_virt_lun0.lun_se_dev) {
+               buf[0] = 0x3f; /* Not connected */
+       } else {
+               buf[0] = dev->transport->get_device_type(dev);
+               if (buf[0] == TYPE_TAPE)
+                       buf[1] = 0x80;
+       }
         buf[2] = dev->transport->get_device_rev(dev);
  
         /*
@@ -149,6 +155,37 @@ target_emulate_evpd_80(struct se_cmd *cmd, unsigned char *buf)
         return 0;
  }
  
+static void
+target_parse_naa_6h_vendor_specific(struct se_device *dev, unsigned char *buf_off)
+{
+       unsigned char *p = &dev->se_sub_dev->t10_wwn.unit_serial[0];
+       unsigned char *buf = buf_off;
+       int cnt = 0, next = 1;
+       /*
+        * Generate up to 36 bits of VENDOR SPECIFIC IDENTIFIER starting on
+        * byte 3 bit 3-0 for NAA IEEE Registered Extended DESIGNATOR field
+        * format, followed by 64 bits of VENDOR SPECIFIC IDENTIFIER EXTENSION
+        * to complete the payload.  These are based from VPD=0x80 PRODUCT SERIAL
+        * NUMBER set via vpd_unit_serial in target_core_configfs.c to ensure
+        * per device uniqeness.
+        */
+       while (*p != '\0') {
+               if (cnt >= 13)
+                       break;
+               if (!isxdigit(*p)) {
+                       p++;
+                       continue;
+               }
+               if (next != 0) {
+                       buf[cnt++] |= hex_to_bin(*p++);
+                       next = 0;
+               } else {
+                       buf[cnt] = hex_to_bin(*p++) << 4;
+                       next = 1;
+               }
+       }
+}
+
  /*
   * Device identification VPD, for a complete list of
   * DESIGNATOR TYPEs see spc4r17 Table 459.
@@ -214,8 +251,7 @@ target_emulate_evpd_83(struct se_cmd *cmd, unsigned char *buf)
          * VENDOR_SPECIFIC_IDENTIFIER and
          * VENDOR_SPECIFIC_IDENTIFIER_EXTENTION
          */
-       buf[off++] |= hex_to_bin(dev->se_sub_dev->t10_wwn.unit_serial[0]);
-       hex2bin(&buf[off], &dev->se_sub_dev->t10_wwn.unit_serial[1], 12);
+       target_parse_naa_6h_vendor_specific(dev, &buf[off]);
  
         len = 20;
         off = (len + 4);
@@ -915,8 +951,8 @@ target_emulate_modesense(struct se_cmd *cmd, int ten)
                 length += target_modesense_control(dev, &buf[offset+length]);
                 break;
         default:
-               pr_err("Got Unknown Mode Page: 0x%02x\n",
-                               cdb[2] & 0x3f);
+               pr_err("MODE SENSE: unimplemented page/subpage: 0x%02x/0x%02x\n",
+                      cdb[2] & 0x3f, cdb[3]);
                 return PYX_TRANSPORT_UNKNOWN_MODE_PAGE;
         }
         offset += length;
@@ -1072,8 +1108,6 @@ target_emulate_unmap(struct se_task *task)
                 size -= 16;
         }
  
-       task->task_scsi_status = GOOD;
-       transport_complete_task(task, 1);
  err:
         transport_kunmap_first_data_page(cmd);
  
@@ -1085,24 +1119,17 @@ err:
   * Note this is not used for TCM/pSCSI passthrough
   */
  static int
-target_emulate_write_same(struct se_task *task, int write_same32)
+target_emulate_write_same(struct se_task *task, u32 num_blocks)
  {
         struct se_cmd *cmd = task->task_se_cmd;
         struct se_device *dev = cmd->se_dev;
         sector_t range;
         sector_t lba = cmd->t_task_lba;
-       unsigned int num_blocks;
         int ret;
         /*
-        * Extract num_blocks from the WRITE_SAME_* CDB.  Then use the explict
-        * range when non zero is supplied, otherwise calculate the remaining
-        * range based on ->get_blocks() - starting LBA.
+        * Use the explicit range when non zero is supplied, otherwise calculate
+        * the remaining range based on ->get_blocks() - starting LBA.
          */
-       if (write_same32)
-               num_blocks = get_unaligned_be32(&cmd->t_task_cdb[28]);
-       else
-               num_blocks = get_unaligned_be32(&cmd->t_task_cdb[10]);
-
         if (num_blocks != 0)
                 range = num_blocks;
         else
@@ -1117,8 +1144,6 @@ target_emulate_write_same(struct se_task *task, int write_same32)
                 return ret;
         }
  
-       task->task_scsi_status = GOOD;
-       transport_complete_task(task, 1);
         return 0;
  }
  
@@ -1165,13 +1190,23 @@ transport_emulate_control_cdb(struct se_task *task)
                 }
                 ret = target_emulate_unmap(task);
                 break;
+       case WRITE_SAME:
+               if (!dev->transport->do_discard) {
+                       pr_err("WRITE_SAME emulation not supported"
+                                       " for: %s\n", dev->transport->name);
+                       return PYX_TRANSPORT_UNKNOWN_SAM_OPCODE;
+               }
+               ret = target_emulate_write_same(task,
+                               get_unaligned_be16(&cmd->t_task_cdb[7]));
+               break;
         case WRITE_SAME_16:
                 if (!dev->transport->do_discard) {
                         pr_err("WRITE_SAME_16 emulation not supported"
                                         " for: %s\n", dev->transport->name);
                         return PYX_TRANSPORT_UNKNOWN_SAM_OPCODE;
                 }
-               ret = target_emulate_write_same(task, 0);
+               ret = target_emulate_write_same(task,
+                               get_unaligned_be32(&cmd->t_task_cdb[10]));
                 break;
         case VARIABLE_LENGTH_CMD:
                 service_action =
@@ -1184,7 +1219,8 @@ transport_emulate_control_cdb(struct se_task *task)
                                         dev->transport->name);
                                 return PYX_TRANSPORT_UNKNOWN_SAM_OPCODE;
                         }
-                       ret = target_emulate_write_same(task, 1);
+                       ret = target_emulate_write_same(task,
+                               get_unaligned_be32(&cmd->t_task_cdb[28]));
                         break;
                 default:
                         pr_err("Unsupported VARIABLE_LENGTH_CMD SA:"
@@ -1219,8 +1255,14 @@ transport_emulate_control_cdb(struct se_task *task)
  
         if (ret < 0)
                 return ret;
-       task->task_scsi_status = GOOD;
-       transport_complete_task(task, 1);
+       /*
+        * Handle the successful completion here unless a caller
+        * has explictly requested an asychronous completion.
+        */
+       if (!(cmd->se_cmd_flags & SCF_EMULATE_CDB_ASYNC)) {
+               task->task_scsi_status = GOOD;
+               transport_complete_task(task, 1);
+       }
  
         return PYX_TRANSPORT_SENT_TO_TRANSPORT;
  }
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c

index b38b6c993e6555855be8cd830451d41a0ab6e1d5..ca6e4a4df134e3b8b64ed093338ef7f11749e499 100644 (file)
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -472,9 +472,9 @@ void core_clear_lun_from_tpg(struct se_lun *lun, struct se_portal_group *tpg)
         struct se_dev_entry *deve;
         u32 i;
  
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
         list_for_each_entry(nacl, &tpg->acl_node_list, acl_list) {
-               spin_unlock_bh(&tpg->acl_node_lock);
+               spin_unlock_irq(&tpg->acl_node_lock);
  
                 spin_lock_irq(&nacl->device_list_lock);
                 for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) {
@@ -491,9 +491,9 @@ void core_clear_lun_from_tpg(struct se_lun *lun, struct se_portal_group *tpg)
                 }
                 spin_unlock_irq(&nacl->device_list_lock);
  
-               spin_lock_bh(&tpg->acl_node_lock);
+               spin_lock_irq(&tpg->acl_node_lock);
         }
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
  }
  
  static struct se_port *core_alloc_port(struct se_device *dev)
@@ -839,6 +839,24 @@ int se_dev_check_shutdown(struct se_device *dev)
         return ret;
  }
  
+u32 se_dev_align_max_sectors(u32 max_sectors, u32 block_size)
+{
+       u32 tmp, aligned_max_sectors;
+       /*
+        * Limit max_sectors to a PAGE_SIZE aligned value for modern
+        * transport_allocate_data_tasks() operation.
+        */
+       tmp = rounddown((max_sectors * block_size), PAGE_SIZE);
+       aligned_max_sectors = (tmp / block_size);
+       if (max_sectors != aligned_max_sectors) {
+               printk(KERN_INFO "Rounding down aligned max_sectors from %u"
+                               " to %u\n", max_sectors, aligned_max_sectors);
+               return aligned_max_sectors;
+       }
+
+       return max_sectors;
+}
+
  void se_dev_set_default_attribs(
         struct se_device *dev,
         struct se_dev_limits *dev_limits)
@@ -878,6 +896,11 @@ void se_dev_set_default_attribs(
          * max_sectors is based on subsystem plugin dependent requirements.
          */
         dev->se_sub_dev->se_dev_attrib.hw_max_sectors = limits->max_hw_sectors;
+       /*
+        * Align max_sectors down to PAGE_SIZE to follow transport_allocate_data_tasks()
+        */
+       limits->max_sectors = se_dev_align_max_sectors(limits->max_sectors,
+                                               limits->logical_block_size);
         dev->se_sub_dev->se_dev_attrib.max_sectors = limits->max_sectors;
         /*
          * Set optimal_sectors from max_sectors, which can be lowered via
@@ -1242,6 +1265,11 @@ int se_dev_set_max_sectors(struct se_device *dev, u32 max_sectors)
                         return -EINVAL;
                 }
         }
+       /*
+        * Align max_sectors down to PAGE_SIZE to follow transport_allocate_data_tasks()
+        */
+       max_sectors = se_dev_align_max_sectors(max_sectors,
+                               dev->se_sub_dev->se_dev_attrib.block_size);
  
         dev->se_sub_dev->se_dev_attrib.max_sectors = max_sectors;
         pr_debug("dev[%p]: SE Device max_sectors changed to %u\n",
@@ -1344,15 +1372,17 @@ struct se_lun *core_dev_add_lun(
          */
         if (tpg->se_tpg_tfo->tpg_check_demo_mode(tpg)) {
                 struct se_node_acl *acl;
-               spin_lock_bh(&tpg->acl_node_lock);
+               spin_lock_irq(&tpg->acl_node_lock);
                 list_for_each_entry(acl, &tpg->acl_node_list, acl_list) {
-                       if (acl->dynamic_node_acl) {
-                               spin_unlock_bh(&tpg->acl_node_lock);
+                       if (acl->dynamic_node_acl &&
+                           (!tpg->se_tpg_tfo->tpg_check_demo_mode_login_only ||
+                            !tpg->se_tpg_tfo->tpg_check_demo_mode_login_only(tpg))) {
+                               spin_unlock_irq(&tpg->acl_node_lock);
                                 core_tpg_add_node_to_devs(acl, tpg);
-                               spin_lock_bh(&tpg->acl_node_lock);
+                               spin_lock_irq(&tpg->acl_node_lock);
                         }
                 }
-               spin_unlock_bh(&tpg->acl_node_lock);
+               spin_unlock_irq(&tpg->acl_node_lock);
         }
  
         return lun_p;
diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c

index f1654694f4ea4e3b33c1ef3e5793b6e372b70156..55bbe0847a6d351cbb2d7ee940fef3f1d194fd10 100644 (file)
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c
@@ -481,7 +481,7 @@ static struct config_group *target_fabric_make_nodeacl(
  
         se_nacl = tf->tf_ops.fabric_make_nodeacl(se_tpg, group, name);
         if (IS_ERR(se_nacl))
-               return ERR_PTR(PTR_ERR(se_nacl));
+               return ERR_CAST(se_nacl);
  
         nacl_cg = &se_nacl->acl_group;
         nacl_cg->default_groups = se_nacl->acl_default_groups;
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c

index 1c1b849cd4fb9c24799ce95d399b07012c328a53..7fd3a161f7cc61cb8b95b6aca9f6af76972ba44c 100644 (file)
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -1598,14 +1598,14 @@ static int core_scsi3_decode_spec_i_port(
                          * from the decoded fabric module specific TransportID
                          * at *i_str.
                          */
-                       spin_lock_bh(&tmp_tpg->acl_node_lock);
+                       spin_lock_irq(&tmp_tpg->acl_node_lock);
                         dest_node_acl = __core_tpg_get_initiator_node_acl(
                                                 tmp_tpg, i_str);
                         if (dest_node_acl) {
                                 atomic_inc(&dest_node_acl->acl_pr_ref_count);
                                 smp_mb__after_atomic_inc();
                         }
-                       spin_unlock_bh(&tmp_tpg->acl_node_lock);
+                       spin_unlock_irq(&tmp_tpg->acl_node_lock);
  
                         if (!dest_node_acl) {
                                 core_scsi3_tpg_undepend_item(tmp_tpg);
@@ -3496,14 +3496,14 @@ after_iport_check:
         /*
          * Locate the destination struct se_node_acl from the received Transport ID
          */
-       spin_lock_bh(&dest_se_tpg->acl_node_lock);
+       spin_lock_irq(&dest_se_tpg->acl_node_lock);
         dest_node_acl = __core_tpg_get_initiator_node_acl(dest_se_tpg,
                                 initiator_str);
         if (dest_node_acl) {
                 atomic_inc(&dest_node_acl->acl_pr_ref_count);
                 smp_mb__after_atomic_inc();
         }
-       spin_unlock_bh(&dest_se_tpg->acl_node_lock);
+       spin_unlock_irq(&dest_se_tpg->acl_node_lock);
  
         if (!dest_node_acl) {
                 pr_err("Unable to locate %s dest_node_acl for"
diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c

index 3dd81d24d9a914169f80372550fc6bf4f77edbff..e567e129c69746b197ab0c2c4e6addfc008a3630 100644 (file)
--- a/drivers/target/target_core_rd.c
+++ b/drivers/target/target_core_rd.c
@@ -390,12 +390,10 @@ static int rd_MEMCPY_read(struct rd_request *req)
                                 length = req->rd_size;
  
                         dst = sg_virt(&sg_d[i++]) + dst_offset;
-                       if (!dst)
-                               BUG();
+                       BUG_ON(!dst);
  
                         src = sg_virt(&sg_s[j]) + src_offset;
-                       if (!src)
-                               BUG();
+                       BUG_ON(!src);
  
                         dst_offset = 0;
                         src_offset = length;
@@ -415,8 +413,7 @@ static int rd_MEMCPY_read(struct rd_request *req)
                                 length = req->rd_size;
  
                         dst = sg_virt(&sg_d[i]) + dst_offset;
-                       if (!dst)
-                               BUG();
+                       BUG_ON(!dst);
  
                         if (sg_d[i].length == length) {
                                 i++;
@@ -425,8 +422,7 @@ static int rd_MEMCPY_read(struct rd_request *req)
                                 dst_offset = length;
  
                         src = sg_virt(&sg_s[j++]) + src_offset;
-                       if (!src)
-                               BUG();
+                       BUG_ON(!src);
  
                         src_offset = 0;
                         page_end = 1;
@@ -510,12 +506,10 @@ static int rd_MEMCPY_write(struct rd_request *req)
                                 length = req->rd_size;
  
                         src = sg_virt(&sg_s[i++]) + src_offset;
-                       if (!src)
-                               BUG();
+                       BUG_ON(!src);
  
                         dst = sg_virt(&sg_d[j]) + dst_offset;
-                       if (!dst)
-                               BUG();
+                       BUG_ON(!dst);
  
                         src_offset = 0;
                         dst_offset = length;
@@ -535,8 +529,7 @@ static int rd_MEMCPY_write(struct rd_request *req)
                                 length = req->rd_size;
  
                         src = sg_virt(&sg_s[i]) + src_offset;
-                       if (!src)
-                               BUG();
+                       BUG_ON(!src);
  
                         if (sg_s[i].length == length) {
                                 i++;
@@ -545,8 +538,7 @@ static int rd_MEMCPY_write(struct rd_request *req)
                                 src_offset = length;
  
                         dst = sg_virt(&sg_d[j++]) + dst_offset;
-                       if (!dst)
-                               BUG();
+                       BUG_ON(!dst);
  
                         dst_offset = 0;
                         page_end = 1;
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c

index 4f1ba4c5ef1196e43e0d5ff4b52836c02e0dc9fe..162b736c73427c44d45f0c56fc59cb5bbf5c35d8 100644 (file)
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -137,15 +137,15 @@ struct se_node_acl *core_tpg_get_initiator_node_acl(
  {
         struct se_node_acl *acl;
  
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
         list_for_each_entry(acl, &tpg->acl_node_list, acl_list) {
                 if (!strcmp(acl->initiatorname, initiatorname) &&
                     !acl->dynamic_node_acl) {
-                       spin_unlock_bh(&tpg->acl_node_lock);
+                       spin_unlock_irq(&tpg->acl_node_lock);
                         return acl;
                 }
         }
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
  
         return NULL;
  }
@@ -298,13 +298,21 @@ struct se_node_acl *core_tpg_check_initiator_node_acl(
                 tpg->se_tpg_tfo->tpg_release_fabric_acl(tpg, acl);
                 return NULL;
         }
+       /*
+        * Here we only create demo-mode MappedLUNs from the active
+        * TPG LUNs if the fabric is not explictly asking for
+        * tpg_check_demo_mode_login_only() == 1.
+        */
+       if ((tpg->se_tpg_tfo->tpg_check_demo_mode_login_only != NULL) &&
+           (tpg->se_tpg_tfo->tpg_check_demo_mode_login_only(tpg) == 1))
+               do { ; } while (0);
+       else
+               core_tpg_add_node_to_devs(acl, tpg);
  
-       core_tpg_add_node_to_devs(acl, tpg);
-
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
         list_add_tail(&acl->acl_list, &tpg->acl_node_list);
         tpg->num_node_acls++;
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
  
         pr_debug("%s_TPG[%u] - Added DYNAMIC ACL with TCQ Depth: %d for %s"
                 " Initiator Node: %s\n", tpg->se_tpg_tfo->get_fabric_name(),
@@ -354,7 +362,7 @@ struct se_node_acl *core_tpg_add_initiator_node_acl(
  {
         struct se_node_acl *acl = NULL;
  
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
         acl = __core_tpg_get_initiator_node_acl(tpg, initiatorname);
         if (acl) {
                 if (acl->dynamic_node_acl) {
@@ -362,7 +370,7 @@ struct se_node_acl *core_tpg_add_initiator_node_acl(
                         pr_debug("%s_TPG[%u] - Replacing dynamic ACL"
                                 " for %s\n", tpg->se_tpg_tfo->get_fabric_name(),
                                 tpg->se_tpg_tfo->tpg_get_tag(tpg), initiatorname);
-                       spin_unlock_bh(&tpg->acl_node_lock);
+                       spin_unlock_irq(&tpg->acl_node_lock);
                         /*
                          * Release the locally allocated struct se_node_acl
                          * because * core_tpg_add_initiator_node_acl() returned
@@ -378,10 +386,10 @@ struct se_node_acl *core_tpg_add_initiator_node_acl(
                         " Node %s already exists for TPG %u, ignoring"
                         " request.\n",  tpg->se_tpg_tfo->get_fabric_name(),
                         initiatorname, tpg->se_tpg_tfo->tpg_get_tag(tpg));
-               spin_unlock_bh(&tpg->acl_node_lock);
+               spin_unlock_irq(&tpg->acl_node_lock);
                 return ERR_PTR(-EEXIST);
         }
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
  
         if (!se_nacl) {
                 pr_err("struct se_node_acl pointer is NULL\n");
@@ -418,10 +426,10 @@ struct se_node_acl *core_tpg_add_initiator_node_acl(
                 return ERR_PTR(-EINVAL);
         }
  
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
         list_add_tail(&acl->acl_list, &tpg->acl_node_list);
         tpg->num_node_acls++;
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
  
  done:
         pr_debug("%s_TPG[%hu] - Added ACL with TCQ Depth: %d for %s"
@@ -445,14 +453,14 @@ int core_tpg_del_initiator_node_acl(
         struct se_session *sess, *sess_tmp;
         int dynamic_acl = 0;
  
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
         if (acl->dynamic_node_acl) {
                 acl->dynamic_node_acl = 0;
                 dynamic_acl = 1;
         }
         list_del(&acl->acl_list);
         tpg->num_node_acls--;
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
  
         spin_lock_bh(&tpg->session_lock);
         list_for_each_entry_safe(sess, sess_tmp,
@@ -503,21 +511,21 @@ int core_tpg_set_initiator_node_queue_depth(
         struct se_node_acl *acl;
         int dynamic_acl = 0;
  
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
         acl = __core_tpg_get_initiator_node_acl(tpg, initiatorname);
         if (!acl) {
                 pr_err("Access Control List entry for %s Initiator"
                         " Node %s does not exists for TPG %hu, ignoring"
                         " request.\n", tpg->se_tpg_tfo->get_fabric_name(),
                         initiatorname, tpg->se_tpg_tfo->tpg_get_tag(tpg));
-               spin_unlock_bh(&tpg->acl_node_lock);
+               spin_unlock_irq(&tpg->acl_node_lock);
                 return -ENODEV;
         }
         if (acl->dynamic_node_acl) {
                 acl->dynamic_node_acl = 0;
                 dynamic_acl = 1;
         }
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
  
         spin_lock_bh(&tpg->session_lock);
         list_for_each_entry(sess, &tpg->tpg_sess_list, sess_list) {
@@ -533,10 +541,10 @@ int core_tpg_set_initiator_node_queue_depth(
                                 tpg->se_tpg_tfo->get_fabric_name(), initiatorname);
                         spin_unlock_bh(&tpg->session_lock);
  
-                       spin_lock_bh(&tpg->acl_node_lock);
+                       spin_lock_irq(&tpg->acl_node_lock);
                         if (dynamic_acl)
                                 acl->dynamic_node_acl = 1;
-                       spin_unlock_bh(&tpg->acl_node_lock);
+                       spin_unlock_irq(&tpg->acl_node_lock);
                         return -EEXIST;
                 }
                 /*
@@ -571,10 +579,10 @@ int core_tpg_set_initiator_node_queue_depth(
                 if (init_sess)
                         tpg->se_tpg_tfo->close_session(init_sess);
  
-               spin_lock_bh(&tpg->acl_node_lock);
+               spin_lock_irq(&tpg->acl_node_lock);
                 if (dynamic_acl)
                         acl->dynamic_node_acl = 1;
-               spin_unlock_bh(&tpg->acl_node_lock);
+               spin_unlock_irq(&tpg->acl_node_lock);
                 return -EINVAL;
         }
         spin_unlock_bh(&tpg->session_lock);
@@ -590,10 +598,10 @@ int core_tpg_set_initiator_node_queue_depth(
                 initiatorname, tpg->se_tpg_tfo->get_fabric_name(),
                 tpg->se_tpg_tfo->tpg_get_tag(tpg));
  
-       spin_lock_bh(&tpg->acl_node_lock);
+       spin_lock_irq(&tpg->acl_node_lock);
         if (dynamic_acl)
                 acl->dynamic_node_acl = 1;
-       spin_unlock_bh(&tpg->acl_node_lock);
+       spin_unlock_irq(&tpg->acl_node_lock);
  
         return 0;
  }
@@ -717,20 +725,20 @@ int core_tpg_deregister(struct se_portal_group *se_tpg)
          * not been released because of TFO->tpg_check_demo_mode_cache() == 1
          * in transport_deregister_session().
          */
-       spin_lock_bh(&se_tpg->acl_node_lock);
+       spin_lock_irq(&se_tpg->acl_node_lock);
         list_for_each_entry_safe(nacl, nacl_tmp, &se_tpg->acl_node_list,
                         acl_list) {
                 list_del(&nacl->acl_list);
                 se_tpg->num_node_acls--;
-               spin_unlock_bh(&se_tpg->acl_node_lock);
+               spin_unlock_irq(&se_tpg->acl_node_lock);
  
                 core_tpg_wait_for_nacl_pr_ref(nacl);
                 core_free_device_list_for_node(nacl, se_tpg);
                 se_tpg->se_tpg_tfo->tpg_release_fabric_acl(se_tpg, nacl);
  
-               spin_lock_bh(&se_tpg->acl_node_lock);
+               spin_lock_irq(&se_tpg->acl_node_lock);
         }
-       spin_unlock_bh(&se_tpg->acl_node_lock);
+       spin_unlock_irq(&se_tpg->acl_node_lock);
  
         if (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL)
                 core_tpg_release_virtual_lun0(se_tpg);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c

index 89760329d5d0d292c0c8648c8aff16495a1822b4..a4b0a8d27f259abd9d57092b814884f55efa3541 100644 (file)
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -389,17 +389,18 @@ void transport_deregister_session(struct se_session *se_sess)
  {
         struct se_portal_group *se_tpg = se_sess->se_tpg;
         struct se_node_acl *se_nacl;
+       unsigned long flags;
  
         if (!se_tpg) {
                 transport_free_session(se_sess);
                 return;
         }
  
-       spin_lock_bh(&se_tpg->session_lock);
+       spin_lock_irqsave(&se_tpg->session_lock, flags);
         list_del(&se_sess->sess_list);
         se_sess->se_tpg = NULL;
         se_sess->fabric_sess_ptr = NULL;
-       spin_unlock_bh(&se_tpg->session_lock);
+       spin_unlock_irqrestore(&se_tpg->session_lock, flags);
  
         /*
          * Determine if we need to do extra work for this initiator node's
@@ -407,22 +408,22 @@ void transport_deregister_session(struct se_session *se_sess)
          */
         se_nacl = se_sess->se_node_acl;
         if (se_nacl) {
-               spin_lock_bh(&se_tpg->acl_node_lock);
+               spin_lock_irqsave(&se_tpg->acl_node_lock, flags);
                 if (se_nacl->dynamic_node_acl) {
                         if (!se_tpg->se_tpg_tfo->tpg_check_demo_mode_cache(
                                         se_tpg)) {
                                 list_del(&se_nacl->acl_list);
                                 se_tpg->num_node_acls--;
-                               spin_unlock_bh(&se_tpg->acl_node_lock);
+                               spin_unlock_irqrestore(&se_tpg->acl_node_lock, flags);
  
                                 core_tpg_wait_for_nacl_pr_ref(se_nacl);
                                 core_free_device_list_for_node(se_nacl, se_tpg);
                                 se_tpg->se_tpg_tfo->tpg_release_fabric_acl(se_tpg,
                                                 se_nacl);
-                               spin_lock_bh(&se_tpg->acl_node_lock);
+                               spin_lock_irqsave(&se_tpg->acl_node_lock, flags);
                         }
                 }
-               spin_unlock_bh(&se_tpg->acl_node_lock);
+               spin_unlock_irqrestore(&se_tpg->acl_node_lock, flags);
         }
  
         transport_free_session(se_sess);
@@ -976,15 +977,17 @@ static void target_qf_do_work(struct work_struct *work)
  {
         struct se_device *dev = container_of(work, struct se_device,
                                         qf_work_queue);
+       LIST_HEAD(qf_cmd_list);
         struct se_cmd *cmd, *cmd_tmp;
  
         spin_lock_irq(&dev->qf_cmd_lock);
-       list_for_each_entry_safe(cmd, cmd_tmp, &dev->qf_cmd_list, se_qf_node) {
+       list_splice_init(&dev->qf_cmd_list, &qf_cmd_list);
+       spin_unlock_irq(&dev->qf_cmd_lock);
  
+       list_for_each_entry_safe(cmd, cmd_tmp, &qf_cmd_list, se_qf_node) {
                 list_del(&cmd->se_qf_node);
                 atomic_dec(&dev->dev_qf_count);
                 smp_mb__after_atomic_dec();
-               spin_unlock_irq(&dev->qf_cmd_lock);
  
                 pr_debug("Processing %s cmd: %p QUEUE_FULL in work queue"
                         " context: %s\n", cmd->se_tfo->get_fabric_name(), cmd,
@@ -996,10 +999,7 @@ static void target_qf_do_work(struct work_struct *work)
                  * has been added to head of queue
                  */
                 transport_add_cmd_to_queue(cmd, cmd->t_state);
-
-               spin_lock_irq(&dev->qf_cmd_lock);
         }
-       spin_unlock_irq(&dev->qf_cmd_lock);
  }
  
  unsigned char *transport_dump_cmd_direction(struct se_cmd *cmd)
@@ -2053,8 +2053,14 @@ static void transport_generic_request_failure(
                 cmd->scsi_sense_reason = TCM_UNSUPPORTED_SCSI_OPCODE;
                 break;
         }
-
-       if (!sc)
+       /*
+        * If a fabric does not define a cmd->se_tfo->new_cmd_map caller,
+        * make the call to transport_send_check_condition_and_sense()
+        * directly.  Otherwise expect the fabric to make the call to
+        * transport_send_check_condition_and_sense() after handling
+        * possible unsoliticied write data payloads.
+        */
+       if (!sc && !cmd->se_tfo->new_cmd_map)
                 transport_new_cmd_failure(cmd);
         else {
                 ret = transport_send_check_condition_and_sense(cmd,
@@ -2847,12 +2853,42 @@ static int transport_cmd_get_valid_sectors(struct se_cmd *cmd)
                         " transport_dev_end_lba(): %llu\n",
                         cmd->t_task_lba, sectors,
                         transport_dev_end_lba(dev));
-               pr_err("  We should return CHECK_CONDITION"
-                      " but we don't yet\n");
-               return 0;
+               return -EINVAL;
         }
  
-       return sectors;
+       return 0;
+}
+
+static int target_check_write_same_discard(unsigned char *flags, struct se_device *dev)
+{
+       /*
+        * Determine if the received WRITE_SAME is used to for direct
+        * passthrough into Linux/SCSI with struct request via TCM/pSCSI
+        * or we are signaling the use of internal WRITE_SAME + UNMAP=1
+        * emulation for -> Linux/BLOCK disbard with TCM/IBLOCK code.
+        */
+       int passthrough = (dev->transport->transport_type ==
+                               TRANSPORT_PLUGIN_PHBA_PDEV);
+
+       if (!passthrough) {
+               if ((flags[0] & 0x04) || (flags[0] & 0x02)) {
+                       pr_err("WRITE_SAME PBDATA and LBDATA"
+                               " bits not supported for Block Discard"
+                               " Emulation\n");
+                       return -ENOSYS;
+               }
+               /*
+                * Currently for the emulated case we only accept
+                * tpws with the UNMAP=1 bit set.
+                */
+               if (!(flags[0] & 0x08)) {
+                       pr_err("WRITE_SAME w/o UNMAP bit not"
+                               " supported for Block Discard Emulation\n");
+                       return -ENOSYS;
+               }
+       }
+
+       return 0;
  }
  
  /*     transport_generic_cmd_sequencer():
@@ -3065,7 +3101,7 @@ static int transport_generic_cmd_sequencer(
                                 goto out_unsupported_cdb;
  
                         if (sectors)
-                               size = transport_get_size(sectors, cdb, cmd);
+                               size = transport_get_size(1, cdb, cmd);
                         else {
                                 pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not"
                                        " supported\n");
@@ -3075,27 +3111,9 @@ static int transport_generic_cmd_sequencer(
                         cmd->t_task_lba = get_unaligned_be64(&cdb[12]);
                         cmd->se_cmd_flags |= SCF_SCSI_CONTROL_SG_IO_CDB;
  
-                       /*
-                        * Skip the remaining assignments for TCM/PSCSI passthrough
-                        */
-                       if (passthrough)
-                               break;
-
-                       if ((cdb[10] & 0x04) || (cdb[10] & 0x02)) {
-                               pr_err("WRITE_SAME PBDATA and LBDATA"
-                                       " bits not supported for Block Discard"
-                                       " Emulation\n");
+                       if (target_check_write_same_discard(&cdb[10], dev) < 0)
                                 goto out_invalid_cdb_field;
-                       }
-                       /*
-                        * Currently for the emulated case we only accept
-                        * tpws with the UNMAP=1 bit set.
-                        */
-                       if (!(cdb[10] & 0x08)) {
-                               pr_err("WRITE_SAME w/o UNMAP bit not"
-                                       " supported for Block Discard Emulation\n");
-                               goto out_invalid_cdb_field;
-                       }
+
                         break;
                 default:
                         pr_err("VARIABLE_LENGTH_CMD service action"
@@ -3330,10 +3348,12 @@ static int transport_generic_cmd_sequencer(
                 cmd->se_cmd_flags |= SCF_EMULATE_CDB_ASYNC;
                 /*
                  * Check to ensure that LBA + Range does not exceed past end of
-                * device.
+                * device for IBLOCK and FILEIO ->do_sync_cache() backend calls
                  */
-               if (!transport_cmd_get_valid_sectors(cmd))
-                       goto out_invalid_cdb_field;
+               if ((cmd->t_task_lba != 0) || (sectors != 0)) {
+                       if (transport_cmd_get_valid_sectors(cmd) < 0)
+                               goto out_invalid_cdb_field;
+               }
                 break;
         case UNMAP:
                 size = get_unaligned_be16(&cdb[7]);
@@ -3345,40 +3365,38 @@ static int transport_generic_cmd_sequencer(
                         goto out_unsupported_cdb;
  
                 if (sectors)
-                       size = transport_get_size(sectors, cdb, cmd);
+                       size = transport_get_size(1, cdb, cmd);
                 else {
                         pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not supported\n");
                         goto out_invalid_cdb_field;
                 }
  
                 cmd->t_task_lba = get_unaligned_be64(&cdb[2]);
-               passthrough = (dev->transport->transport_type ==
-                               TRANSPORT_PLUGIN_PHBA_PDEV);
-               /*
-                * Determine if the received WRITE_SAME_16 is used to for direct
-                * passthrough into Linux/SCSI with struct request via TCM/pSCSI
-                * or we are signaling the use of internal WRITE_SAME + UNMAP=1
-                * emulation for -> Linux/BLOCK disbard with TCM/IBLOCK and
-                * TCM/FILEIO subsystem plugin backstores.
-                */
-               if (!passthrough) {
-                       if ((cdb[1] & 0x04) || (cdb[1] & 0x02)) {
-                               pr_err("WRITE_SAME PBDATA and LBDATA"
-                                       " bits not supported for Block Discard"
-                                       " Emulation\n");
-                               goto out_invalid_cdb_field;
-                       }
-                       /*
-                        * Currently for the emulated case we only accept
-                        * tpws with the UNMAP=1 bit set.
-                        */
-                       if (!(cdb[1] & 0x08)) {
-                               pr_err("WRITE_SAME w/o UNMAP bit not "
-                                       " supported for Block Discard Emulation\n");
-                               goto out_invalid_cdb_field;
-                       }
+               cmd->se_cmd_flags |= SCF_SCSI_CONTROL_SG_IO_CDB;
+
+               if (target_check_write_same_discard(&cdb[1], dev) < 0)
+                       goto out_invalid_cdb_field;
+               break;
+       case WRITE_SAME:
+               sectors = transport_get_sectors_10(cdb, cmd, &sector_ret);
+               if (sector_ret)
+                       goto out_unsupported_cdb;
+
+               if (sectors)
+                       size = transport_get_size(1, cdb, cmd);
+               else {
+                       pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not supported\n");
+                       goto out_invalid_cdb_field;
                 }
+
+               cmd->t_task_lba = get_unaligned_be32(&cdb[2]);
                 cmd->se_cmd_flags |= SCF_SCSI_CONTROL_SG_IO_CDB;
+               /*
+                * Follow sbcr26 with WRITE_SAME (10) and check for the existence
+                * of byte 1 bit 3 UNMAP instead of original reserved field
+                */
+               if (target_check_write_same_discard(&cdb[1], dev) < 0)
+                       goto out_invalid_cdb_field;
                 break;
         case ALLOW_MEDIUM_REMOVAL:
         case GPCMD_CLOSE_TRACK:
@@ -3873,9 +3891,7 @@ EXPORT_SYMBOL(transport_generic_map_mem_to_cmd);
  static int transport_new_cmd_obj(struct se_cmd *cmd)
  {
         struct se_device *dev = cmd->se_dev;
-       u32 task_cdbs;
-       u32 rc;
-       int set_counts = 1;
+       int set_counts = 1, rc, task_cdbs;
  
         /*
          * Setup any BIDI READ tasks and memory from
@@ -3893,7 +3909,7 @@ static int transport_new_cmd_obj(struct se_cmd *cmd)
                         cmd->se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
                         cmd->scsi_sense_reason =
                                 TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-                       return PYX_TRANSPORT_LU_COMM_FAILURE;
+                       return -EINVAL;
                 }
                 atomic_inc(&cmd->t_fe_count);
                 atomic_inc(&cmd->t_se_count);
@@ -3912,7 +3928,7 @@ static int transport_new_cmd_obj(struct se_cmd *cmd)
                 cmd->se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
                 cmd->scsi_sense_reason =
                         TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-               return PYX_TRANSPORT_LU_COMM_FAILURE;
+               return -EINVAL;
         }
  
         if (set_counts) {
@@ -4028,8 +4044,6 @@ void transport_do_task_sg_chain(struct se_cmd *cmd)
                 if (!task->task_sg)
                         continue;
  
-               BUG_ON(!task->task_padded_sg);
-
                 if (!sg_first) {
                         sg_first = task->task_sg;
                         chained_nents = task->task_sg_nents;
@@ -4037,9 +4051,19 @@ void transport_do_task_sg_chain(struct se_cmd *cmd)
                         sg_chain(sg_prev, sg_prev_nents, task->task_sg);
                         chained_nents += task->task_sg_nents;
                 }
+               /*
+                * For the padded tasks, use the extra SGL vector allocated
+                * in transport_allocate_data_tasks() for the sg_prev_nents
+                * offset into sg_chain() above..  The last task of a
+                * multi-task list, or a single task will not have
+                * task->task_sg_padded set..
+                */
+               if (task->task_padded_sg)
+                       sg_prev_nents = (task->task_sg_nents + 1);
+               else
+                       sg_prev_nents = task->task_sg_nents;
  
                 sg_prev = task->task_sg;
-               sg_prev_nents = task->task_sg_nents;
         }
         /*
          * Setup the starting pointer and total t_tasks_sg_linked_no including
@@ -4091,7 +4115,7 @@ static int transport_allocate_data_tasks(
         
         cmd_sg = sgl;
         for (i = 0; i < task_count; i++) {
-               unsigned int task_size;
+               unsigned int task_size, task_sg_nents_padded;
                 int count;
  
                 task = transport_generic_get_task(cmd, data_direction);
@@ -4110,30 +4134,33 @@ static int transport_allocate_data_tasks(
  
                 /* Update new cdb with updated lba/sectors */
                 cmd->transport_split_cdb(task->task_lba, task->task_sectors, cdb);
-
+               /*
+                * This now assumes that passed sg_ents are in PAGE_SIZE chunks
+                * in order to calculate the number per task SGL entries
+                */
+               task->task_sg_nents = DIV_ROUND_UP(task->task_size, PAGE_SIZE);
                 /*
                  * Check if the fabric module driver is requesting that all
                  * struct se_task->task_sg[] be chained together..  If so,
                  * then allocate an extra padding SG entry for linking and
-                * marking the end of the chained SGL.
-                * Possibly over-allocate task sgl size by using cmd sgl size.
-                * It's so much easier and only a waste when task_count > 1.
-                * That is extremely rare.
+                * marking the end of the chained SGL for every task except
+                * the last one for (task_count > 1) operation, or skipping
+                * the extra padding for the (task_count == 1) case.
                  */
-               task->task_sg_nents = sgl_nents;
-               if (cmd->se_tfo->task_sg_chaining) {
-                       task->task_sg_nents++;
+               if (cmd->se_tfo->task_sg_chaining && (i < (task_count - 1))) {
+                       task_sg_nents_padded = (task->task_sg_nents + 1);
                         task->task_padded_sg = 1;
-               }
+               } else
+                       task_sg_nents_padded = task->task_sg_nents;
  
                 task->task_sg = kmalloc(sizeof(struct scatterlist) *
-                                       task->task_sg_nents, GFP_KERNEL);
+                                       task_sg_nents_padded, GFP_KERNEL);
                 if (!task->task_sg) {
                         cmd->se_dev->transport->free_task(task);
                         return -ENOMEM;
                 }
  
-               sg_init_table(task->task_sg, task->task_sg_nents);
+               sg_init_table(task->task_sg, task_sg_nents_padded);
  
                 task_size = task->task_size;
  
@@ -4230,10 +4257,13 @@ static u32 transport_allocate_tasks(
         struct scatterlist *sgl,
         unsigned int sgl_nents)
  {
-       if (cmd->se_cmd_flags & SCF_SCSI_DATA_SG_IO_CDB)
+       if (cmd->se_cmd_flags & SCF_SCSI_DATA_SG_IO_CDB) {
+               if (transport_cmd_get_valid_sectors(cmd) < 0)
+                       return -EINVAL;
+
                 return transport_allocate_data_tasks(cmd, lba, data_direction,
                                                      sgl, sgl_nents);
-       else
+       } else
                 return transport_allocate_control_task(cmd);
  
  }
@@ -4726,6 +4756,13 @@ int transport_send_check_condition_and_sense(
          */
         switch (reason) {
         case TCM_NON_EXISTENT_LUN:
+               /* CURRENT ERROR */
+               buffer[offset] = 0x70;
+               /* ILLEGAL REQUEST */
+               buffer[offset+SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST;
+               /* LOGICAL UNIT NOT SUPPORTED */
+               buffer[offset+SPC_ASC_KEY_OFFSET] = 0x25;
+               break;
         case TCM_UNSUPPORTED_SCSI_OPCODE:
         case TCM_SECTOR_COUNT_TOO_MANY:
                 /* CURRENT ERROR */
diff --git a/drivers/target/tcm_fc/tcm_fc.h b/drivers/target/tcm_fc/tcm_fc.h

index bd4fe21a23b889d8083cb57c32ae4fa588b2ed69..3749d8b4b4233115a3e1fb1bede71e15104f8a01 100644 (file)
--- a/drivers/target/tcm_fc/tcm_fc.h
+++ b/drivers/target/tcm_fc/tcm_fc.h
@@ -98,8 +98,7 @@ struct ft_tpg {
         struct list_head list;          /* linkage in ft_lport_acl tpg_list */
         struct list_head lun_list;      /* head of LUNs */
         struct se_portal_group se_tpg;
-       struct task_struct *thread;     /* processing thread */
-       struct se_queue_obj qobj;       /* queue for processing thread */
+       struct workqueue_struct *workqueue;
  };
  
  struct ft_lport_acl {
@@ -110,16 +109,10 @@ struct ft_lport_acl {
         struct se_wwn fc_lport_wwn;
  };
  
-enum ft_cmd_state {
-       FC_CMD_ST_NEW = 0,
-       FC_CMD_ST_REJ
-};
-
  /*
   * Commands
   */
  struct ft_cmd {
-       enum ft_cmd_state state;
         u32 lun;                        /* LUN from request */
         struct ft_sess *sess;           /* session held for cmd */
         struct fc_seq *seq;             /* sequence in exchange mgr */
@@ -127,7 +120,7 @@ struct ft_cmd {
         struct fc_frame *req_frame;
         unsigned char *cdb;             /* pointer to CDB inside frame */
         u32 write_data_len;             /* data received on writes */
-       struct se_queue_req se_req;
+       struct work_struct work;
         /* Local sense buffer */
         unsigned char ft_sense_buffer[TRANSPORT_SENSE_BUFFER];
         u32 was_ddp_setup:1;            /* Set only if ddp is setup */
@@ -177,7 +170,6 @@ int ft_is_state_remove(struct se_cmd *);
  /*
   * other internal functions.
   */
-int ft_thread(void *);
  void ft_recv_req(struct ft_sess *, struct fc_frame *);
  struct ft_tpg *ft_lport_find_tpg(struct fc_lport *);
  struct ft_node_acl *ft_acl_get(struct ft_tpg *, struct fc_rport_priv *);
diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c

index 5654dc22f7aef0adb2cbedb301057d9e5cc4ca69..80fbcde00cb694db7a628d3e0c0362a3e47650cd 100644 (file)
--- a/drivers/target/tcm_fc/tfc_cmd.c
+++ b/drivers/target/tcm_fc/tfc_cmd.c
@@ -62,8 +62,8 @@ void ft_dump_cmd(struct ft_cmd *cmd, const char *caller)
         int count;
  
         se_cmd = &cmd->se_cmd;
-       pr_debug("%s: cmd %p state %d sess %p seq %p se_cmd %p\n",
-               caller, cmd, cmd->state, cmd->sess, cmd->seq, se_cmd);
+       pr_debug("%s: cmd %p sess %p seq %p se_cmd %p\n",
+               caller, cmd, cmd->sess, cmd->seq, se_cmd);
         pr_debug("%s: cmd %p cdb %p\n",
                 caller, cmd, cmd->cdb);
         pr_debug("%s: cmd %p lun %d\n", caller, cmd, cmd->lun);
@@ -90,38 +90,6 @@ void ft_dump_cmd(struct ft_cmd *cmd, const char *caller)
                 16, 4, cmd->cdb, MAX_COMMAND_SIZE, 0);
  }
  
-static void ft_queue_cmd(struct ft_sess *sess, struct ft_cmd *cmd)
-{
-       struct ft_tpg *tpg = sess->tport->tpg;
-       struct se_queue_obj *qobj = &tpg->qobj;
-       unsigned long flags;
-
-       qobj = &sess->tport->tpg->qobj;
-       spin_lock_irqsave(&qobj->cmd_queue_lock, flags);
-       list_add_tail(&cmd->se_req.qr_list, &qobj->qobj_list);
-       atomic_inc(&qobj->queue_cnt);
-       spin_unlock_irqrestore(&qobj->cmd_queue_lock, flags);
-
-       wake_up_process(tpg->thread);
-}
-
-static struct ft_cmd *ft_dequeue_cmd(struct se_queue_obj *qobj)
-{
-       unsigned long flags;
-       struct se_queue_req *qr;
-
-       spin_lock_irqsave(&qobj->cmd_queue_lock, flags);
-       if (list_empty(&qobj->qobj_list)) {
-               spin_unlock_irqrestore(&qobj->cmd_queue_lock, flags);
-               return NULL;
-       }
-       qr = list_first_entry(&qobj->qobj_list, struct se_queue_req, qr_list);
-       list_del(&qr->qr_list);
-       atomic_dec(&qobj->queue_cnt);
-       spin_unlock_irqrestore(&qobj->cmd_queue_lock, flags);
-       return container_of(qr, struct ft_cmd, se_req);
-}
-
  static void ft_free_cmd(struct ft_cmd *cmd)
  {
         struct fc_frame *fp;
@@ -282,9 +250,7 @@ u32 ft_get_task_tag(struct se_cmd *se_cmd)
  
  int ft_get_cmd_state(struct se_cmd *se_cmd)
  {
-       struct ft_cmd *cmd = container_of(se_cmd, struct ft_cmd, se_cmd);
-
-       return cmd->state;
+       return 0;
  }
  
  int ft_is_state_remove(struct se_cmd *se_cmd)
@@ -505,6 +471,8 @@ int ft_queue_tm_resp(struct se_cmd *se_cmd)
         return 0;
  }
  
+static void ft_send_work(struct work_struct *work);
+
  /*
   * Handle incoming FCP command.
   */
@@ -523,7 +491,9 @@ static void ft_recv_cmd(struct ft_sess *sess, struct fc_frame *fp)
                 goto busy;
         }
         cmd->req_frame = fp;            /* hold frame during cmd */
-       ft_queue_cmd(sess, cmd);
+
+       INIT_WORK(&cmd->work, ft_send_work);
+       queue_work(sess->tport->tpg->workqueue, &cmd->work);
         return;
  
  busy:
@@ -563,12 +533,13 @@ void ft_recv_req(struct ft_sess *sess, struct fc_frame *fp)
  /*
   * Send new command to target.
   */
-static void ft_send_cmd(struct ft_cmd *cmd)
+static void ft_send_work(struct work_struct *work)
  {
+       struct ft_cmd *cmd = container_of(work, struct ft_cmd, work);
         struct fc_frame_header *fh = fc_frame_header_get(cmd->req_frame);
         struct se_cmd *se_cmd;
         struct fcp_cmnd *fcp;
-       int data_dir;
+       int data_dir = 0;
         u32 data_len;
         int task_attr;
         int ret;
@@ -675,42 +646,3 @@ static void ft_send_cmd(struct ft_cmd *cmd)
  err:
         ft_send_resp_code_and_free(cmd, FCP_CMND_FIELDS_INVALID);
  }
-
-/*
- * Handle request in the command thread.
- */
-static void ft_exec_req(struct ft_cmd *cmd)
-{
-       pr_debug("cmd state %x\n", cmd->state);
-       switch (cmd->state) {
-       case FC_CMD_ST_NEW:
-               ft_send_cmd(cmd);
-               break;
-       default:
-               break;
-       }
-}
-
-/*
- * Processing thread.
- * Currently one thread per tpg.
- */
-int ft_thread(void *arg)
-{
-       struct ft_tpg *tpg = arg;
-       struct se_queue_obj *qobj = &tpg->qobj;
-       struct ft_cmd *cmd;
-
-       while (!kthread_should_stop()) {
-               schedule_timeout_interruptible(MAX_SCHEDULE_TIMEOUT);
-               if (kthread_should_stop())
-                       goto out;
-
-               cmd = ft_dequeue_cmd(qobj);
-               if (cmd)
-                       ft_exec_req(cmd);
-       }
-
-out:
-       return 0;
-}
diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c

index 8781d1e423df31629d2d5eaa19e2db0e409059d8..8fa39b74f22c4a178ed714f123131521f0d78e84 100644 (file)
--- a/drivers/target/tcm_fc/tfc_conf.c
+++ b/drivers/target/tcm_fc/tfc_conf.c
@@ -256,7 +256,7 @@ struct ft_node_acl *ft_acl_get(struct ft_tpg *tpg, struct fc_rport_priv *rdata)
         struct se_portal_group *se_tpg = &tpg->se_tpg;
         struct se_node_acl *se_acl;
  
-       spin_lock_bh(&se_tpg->acl_node_lock);
+       spin_lock_irq(&se_tpg->acl_node_lock);
         list_for_each_entry(se_acl, &se_tpg->acl_node_list, acl_list) {
                 acl = container_of(se_acl, struct ft_node_acl, se_node_acl);
                 pr_debug("acl %p port_name %llx\n",
@@ -270,7 +270,7 @@ struct ft_node_acl *ft_acl_get(struct ft_tpg *tpg, struct fc_rport_priv *rdata)
                         break;
                 }
         }
-       spin_unlock_bh(&se_tpg->acl_node_lock);
+       spin_unlock_irq(&se_tpg->acl_node_lock);
         return found;
  }
  
@@ -327,7 +327,6 @@ static struct se_portal_group *ft_add_tpg(
         tpg->index = index;
         tpg->lport_acl = lacl;
         INIT_LIST_HEAD(&tpg->lun_list);
-       transport_init_queue_obj(&tpg->qobj);
  
         ret = core_tpg_register(&ft_configfs->tf_ops, wwn, &tpg->se_tpg,
                                 tpg, TRANSPORT_TPG_TYPE_NORMAL);
@@ -336,8 +335,8 @@ static struct se_portal_group *ft_add_tpg(
                 return NULL;
         }
  
-       tpg->thread = kthread_run(ft_thread, tpg, "ft_tpg%lu", index);
-       if (IS_ERR(tpg->thread)) {
+       tpg->workqueue = alloc_workqueue("tcm_fc", 0, 1);
+       if (!tpg->workqueue) {
                 kfree(tpg);
                 return NULL;
         }
@@ -356,7 +355,7 @@ static void ft_del_tpg(struct se_portal_group *se_tpg)
         pr_debug("del tpg %s\n",
                     config_item_name(&tpg->se_tpg.tpg_group.cg_item));
  
-       kthread_stop(tpg->thread);
+       destroy_workqueue(tpg->workqueue);
  
         /* Wait for sessions to be freed thru RCU, for BUG_ON below */
         synchronize_rcu();
@@ -655,9 +654,7 @@ static void __exit ft_exit(void)
         synchronize_rcu();
  }
  
-#ifdef MODULE
  MODULE_DESCRIPTION("FC TCM fabric driver " FT_VERSION);
  MODULE_LICENSE("GPL");
  module_init(ft_init);
  module_exit(ft_exit);
-#endif /* MODULE */
diff --git a/drivers/target/tcm_fc/tfc_io.c b/drivers/target/tcm_fc/tfc_io.c

index c37f4cd96452e8bf6ab40d99ee45f7df84a49bcb..d35ea5a3d56c159d96eaaa533abe3fd8f34f9843 100644 (file)
--- a/drivers/target/tcm_fc/tfc_io.c
+++ b/drivers/target/tcm_fc/tfc_io.c
@@ -219,43 +219,41 @@ void ft_recv_write_data(struct ft_cmd *cmd, struct fc_frame *fp)
         if (cmd->was_ddp_setup) {
                 BUG_ON(!ep);
                 BUG_ON(!lport);
-       }
-
-       /*
-        * Doesn't expect payload if DDP is setup. Payload
-        * is expected to be copied directly to user buffers
-        * due to DDP (Large Rx offload),
-        */
-       buf = fc_frame_payload_get(fp, 1);
-       if (buf)
-               pr_err("%s: xid 0x%x, f_ctl 0x%x, cmd->sg %p, "
+               /*
+                * Since DDP (Large Rx offload) was setup for this request,
+                * payload is expected to be copied directly to user buffers.
+                */
+               buf = fc_frame_payload_get(fp, 1);
+               if (buf)
+                       pr_err("%s: xid 0x%x, f_ctl 0x%x, cmd->sg %p, "
                                 "cmd->sg_cnt 0x%x. DDP was setup"
                                 " hence not expected to receive frame with "
-                               "payload, Frame will be dropped if "
-                               "'Sequence Initiative' bit in f_ctl is "
+                               "payload, Frame will be dropped if"
+                               "'Sequence Initiative' bit in f_ctl is"
                                 "not set\n", __func__, ep->xid, f_ctl,
                                 cmd->sg, cmd->sg_cnt);
-       /*
-        * Invalidate HW DDP context if it was setup for respective
-        * command. Invalidation of HW DDP context is requited in both
-        * situation (success and error). 
-        */
-       ft_invl_hw_context(cmd);
+               /*
+                * Invalidate HW DDP context if it was setup for respective
+                * command. Invalidation of HW DDP context is requited in both
+                * situation (success and error).
+                */
+               ft_invl_hw_context(cmd);
  
-       /*
-        * If "Sequence Initiative (TSI)" bit set in f_ctl, means last
-        * write data frame is received successfully where payload is
-        * posted directly to user buffer and only the last frame's
-        * header is posted in receive queue.
-        *
-        * If "Sequence Initiative (TSI)" bit is not set, means error
-        * condition w.r.t. DDP, hence drop the packet and let explict
-        * ABORTS from other end of exchange timer trigger the recovery.
-        */
-       if (f_ctl & FC_FC_SEQ_INIT)
-               goto last_frame;
-       else
-               goto drop;
+               /*
+                * If "Sequence Initiative (TSI)" bit set in f_ctl, means last
+                * write data frame is received successfully where payload is
+                * posted directly to user buffer and only the last frame's
+                * header is posted in receive queue.
+                *
+                * If "Sequence Initiative (TSI)" bit is not set, means error
+                * condition w.r.t. DDP, hence drop the packet and let explict
+                * ABORTS from other end of exchange timer trigger the recovery.
+                */
+               if (f_ctl & FC_FC_SEQ_INIT)
+                       goto last_frame;
+               else
+                       goto drop;
+       }
  
         rel_off = ntohl(fh->fh_parm_offset);
         frame_len = fr_len(fp);
diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c

index 98b6e3bdb000bda730a69a05680d76b8584f1715..e809e9d4683c6d82197cbd569c12b1375df8be99 100644 (file)
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -446,8 +446,19 @@ static inline void legacy_pty_init(void) { }
  int pty_limit = NR_UNIX98_PTY_DEFAULT;
  static int pty_limit_min;
  static int pty_limit_max = NR_UNIX98_PTY_MAX;
+static int tty_count;
  static int pty_count;
  
+static inline void pty_inc_count(void)
+{
+       pty_count = (++tty_count) / 2;
+}
+
+static inline void pty_dec_count(void)
+{
+       pty_count = (--tty_count) / 2;
+}
+
  static struct cdev ptmx_cdev;
  
  static struct ctl_table pty_table[] = {
@@ -542,6 +553,7 @@ static struct tty_struct *pts_unix98_lookup(struct tty_driver *driver,
  
  static void pty_unix98_shutdown(struct tty_struct *tty)
  {
+       tty_driver_remove_tty(tty->driver, tty);
         /* We have our own method as we don't use the tty index */
         kfree(tty->termios);
  }
@@ -588,7 +600,8 @@ static int pty_unix98_install(struct tty_driver *driver, struct tty_struct *tty)
          */
         tty_driver_kref_get(driver);
         tty->count++;
-       pty_count++;
+       pty_inc_count(); /* tty */
+       pty_inc_count(); /* tty->link */
         return 0;
  err_free_mem:
         deinitialize_tty_struct(o_tty);
@@ -602,7 +615,7 @@ err_free_tty:
  
  static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty)
  {
-       pty_count--;
+       pty_dec_count();
  }
  
  static const struct tty_operations ptm_unix98_ops = {
diff --git a/drivers/tty/serial/8250.c b/drivers/tty/serial/8250.c

index f2dfec82faf85d19f78d3ea4d755ae730b336911..7f50999eebc22f00fb482d2fda62d62f11262be5 100644 (file)
--- a/drivers/tty/serial/8250.c
+++ b/drivers/tty/serial/8250.c
@@ -1819,6 +1819,8 @@ static void serial8250_backup_timeout(unsigned long data)
         unsigned int iir, ier = 0, lsr;
         unsigned long flags;
  
+       spin_lock_irqsave(&up->port.lock, flags);
+
         /*
          * Must disable interrupts or else we risk racing with the interrupt
          * based handler.
@@ -1836,10 +1838,8 @@ static void serial8250_backup_timeout(unsigned long data)
          * the "Diva" UART used on the management processor on many HP
          * ia64 and parisc boxes.
          */
-       spin_lock_irqsave(&up->port.lock, flags);
         lsr = serial_in(up, UART_LSR);
         up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS;
-       spin_unlock_irqrestore(&up->port.lock, flags);
         if ((iir & UART_IIR_NO_INT) && (up->ier & UART_IER_THRI) &&
             (!uart_circ_empty(&up->port.state->xmit) || up->port.x_char) &&
             (lsr & UART_LSR_THRE)) {
@@ -1848,11 +1848,13 @@ static void serial8250_backup_timeout(unsigned long data)
         }
  
         if (!(iir & UART_IIR_NO_INT))
-               serial8250_handle_port(up);
+               transmit_chars(up);
  
         if (is_real_interrupt(up->port.irq))
                 serial_out(up, UART_IER, ier);
  
+       spin_unlock_irqrestore(&up->port.lock, flags);
+
         /* Standard timer interval plus 0.2s to keep the port running */
         mod_timer(&up->timer,
                 jiffies + uart_poll_timeout(&up->port) + HZ / 5);
diff --git a/drivers/tty/serial/8250_pci.c b/drivers/tty/serial/8250_pci.c

index 6b887d90a20554683975d921c7ca492bba3ccf68..3abeca2a2a1bb08a85c8e5d76fe37afc22da5f4e 100644 (file)
--- a/drivers/tty/serial/8250_pci.c
+++ b/drivers/tty/serial/8250_pci.c
@@ -1599,11 +1599,6 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = {
                 .device         = 0x800D,
                 .init           = pci_eg20t_init,
         },
-       {
-               .vendor         = 0x10DB,
-               .device         = 0x800D,
-               .init           = pci_eg20t_init,
-       },
         /*
          * Cronyx Omega PCI (PLX-chip based)
          */
@@ -4021,13 +4016,17 @@ static struct pci_device_id serial_pci_tbl[] = {
                 0, 0, pbn_NETMOS9900_2s_115200 },
  
         /*
-        * Best Connectivity PCI Multi I/O cards
+        * Best Connectivity and Rosewill PCI Multi I/O cards
          */
  
         {       PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9865,
                 0xA000, 0x1000,
                 0, 0, pbn_b0_1_115200 },
  
+       {       PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9865,
+               0xA000, 0x3002,
+               0, 0, pbn_b0_bt_2_115200 },
+
         {       PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9865,
                 0xA000, 0x3004,
                 0, 0, pbn_b0_bt_4_115200 },
diff --git a/drivers/tty/serial/8250_pnp.c b/drivers/tty/serial/8250_pnp.c

index fc301f6722e1bf4d2e9788f01e702cee6e711ddd..a2f236510ff1cd3f7fc92524f6096dc8678946f0 100644 (file)
--- a/drivers/tty/serial/8250_pnp.c
+++ b/drivers/tty/serial/8250_pnp.c
@@ -109,6 +109,9 @@ static const struct pnp_device_id pnp_dev_table[] = {
         /* IBM */
         /* IBM Thinkpad 701 Internal Modem Voice */
         {       "IBM0033",              0       },
+       /* Intermec */
+       /* Intermec CV60 touchscreen port */
+       {       "PNP4972",              0       },
         /* Intertex */
         /* Intertex 28k8 33k6 Voice EXT PnP */
         {       "IXDC801",              0       },
diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c

index af9b7814965a461921d337047c068aed9bb5d051..b922f5d2e61e0cc52d4aea4ba9c81fca0b7fcc52 100644 (file)
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -1609,9 +1609,11 @@ static struct console atmel_console = {
  static int __init atmel_console_init(void)
  {
         if (atmel_default_console_device) {
-               add_preferred_console(ATMEL_DEVICENAME,
-                                     atmel_default_console_device->id, NULL);
-               atmel_init_port(&atmel_ports[atmel_default_console_device->id],
+               struct atmel_uart_data *pdata =
+                       atmel_default_console_device->dev.platform_data;
+
+               add_preferred_console(ATMEL_DEVICENAME, pdata->num, NULL);
+               atmel_init_port(&atmel_ports[pdata->num],
                                 atmel_default_console_device);
                 register_console(&atmel_console);
         }
diff --git a/drivers/tty/serial/crisv10.c b/drivers/tty/serial/crisv10.c

index 225123b37f190de6ea448cba50be732ea20c3885..58be715913cdcc5d0e7f23a1a2a8c1128c118947 100644 (file)
--- a/drivers/tty/serial/crisv10.c
+++ b/drivers/tty/serial/crisv10.c
@@ -4450,7 +4450,7 @@ static int __init rs_init(void)
  
  #if defined(CONFIG_ETRAX_RS485)
  #if defined(CONFIG_ETRAX_RS485_ON_PA)
-       if (cris_io_interface_allocate_pins(if_ser0, 'a', rs485_pa_bit,
+       if (cris_io_interface_allocate_pins(if_serial_0, 'a', rs485_pa_bit,
                         rs485_pa_bit)) {
                 printk(KERN_CRIT "ETRAX100LX serial: Could not allocate "
                         "RS485 pin\n");
@@ -4459,7 +4459,7 @@ static int __init rs_init(void)
         }
  #endif
  #if defined(CONFIG_ETRAX_RS485_ON_PORT_G)
-       if (cris_io_interface_allocate_pins(if_ser0, 'g', rs485_pa_bit,
+       if (cris_io_interface_allocate_pins(if_serial_0, 'g', rs485_pa_bit,
                         rs485_port_g_bit)) {
                 printk(KERN_CRIT "ETRAX100LX serial: Could not allocate "
                         "RS485 pin\n");
diff --git a/drivers/tty/serial/max3107-aava.c b/drivers/tty/serial/max3107-aava.c

index a1fe304f2f5204adf43c2a26171886198719ac91..d73aadd7a9ad6614385b62f92582c5c7b87c7b61 100644 (file)
--- a/drivers/tty/serial/max3107-aava.c
+++ b/drivers/tty/serial/max3107-aava.c
@@ -340,5 +340,5 @@ module_exit(max3107_exit);
  
  MODULE_DESCRIPTION("MAX3107 driver");
  MODULE_AUTHOR("Aavamobile");
-MODULE_ALIAS("aava-max3107-spi");
+MODULE_ALIAS("spi:aava-max3107");
  MODULE_LICENSE("GPL v2");
diff --git a/drivers/tty/serial/max3107.c b/drivers/tty/serial/max3107.c

index 750b4f627315e465e7c9eed0f7d9fd172f869c13..a8164601c0ead50c841f4a3a662f90df315e18ca 100644 (file)
--- a/drivers/tty/serial/max3107.c
+++ b/drivers/tty/serial/max3107.c
@@ -1209,5 +1209,5 @@ module_exit(max3107_exit);
  
  MODULE_DESCRIPTION("MAX3107 driver");
  MODULE_AUTHOR("Aavamobile");
-MODULE_ALIAS("max3107-spi");
+MODULE_ALIAS("spi:max3107");
  MODULE_LICENSE("GPL v2");
diff --git a/drivers/tty/serial/mrst_max3110.c b/drivers/tty/serial/mrst_max3110.c

index a764bf99743b0b5c6be4d46fed2952cbca278d94..23bc743f2a22f864674750fb2ea4d4f442ca174f 100644 (file)
--- a/drivers/tty/serial/mrst_max3110.c
+++ b/drivers/tty/serial/mrst_max3110.c
@@ -917,4 +917,4 @@ module_init(serial_m3110_init);
  module_exit(serial_m3110_exit);
  
  MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("max3110-uart");
+MODULE_ALIAS("spi:max3110-uart");
diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c

index c37df8d0fa2819261dffccc5bc4d0180b9531f49..5e713d3ef1f47c49eaa93f8d4ced09ce87b89c1e 100644 (file)
--- a/drivers/tty/serial/omap-serial.c
+++ b/drivers/tty/serial/omap-serial.c
@@ -806,8 +806,7 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios,
  
         serial_omap_set_mctrl(&up->port, up->port.mctrl);
         /* Software Flow Control Configuration */
-       if (termios->c_iflag & (IXON | IXOFF))
-               serial_omap_configure_xonxoff(up, termios);
+       serial_omap_configure_xonxoff(up, termios);
  
         spin_unlock_irqrestore(&up->port.lock, flags);
         dev_dbg(up->port.dev, "serial_omap_set_termios+%d\n", up->pdev->id);
diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c

index 846dfcd3ce0d28a6ee882278a90779c3c6427e21..b46218d679e21e4561657b8cf5508e7138d5e03e 100644 (file)
--- a/drivers/tty/serial/pch_uart.c
+++ b/drivers/tty/serial/pch_uart.c
@@ -598,7 +598,8 @@ static void pch_request_dma(struct uart_port *port)
         dma_cap_zero(mask);
         dma_cap_set(DMA_SLAVE, mask);
  
-       dma_dev = pci_get_bus_and_slot(2, PCI_DEVFN(0xa, 0)); /* Get DMA's dev
+       dma_dev = pci_get_bus_and_slot(priv->pdev->bus->number,
+                                      PCI_DEVFN(0xa, 0)); /* Get DMA's dev
                                                                 information */
         /* Set Tx DMA */
         param = &priv->param_tx;
diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c

index afc629423152bfa606b27bb11094713dd7936988..6edafb5ace183bfb0c30eff6f00f7f673ea022c9 100644 (file)
--- a/drivers/tty/serial/samsung.c
+++ b/drivers/tty/serial/samsung.c
@@ -1225,15 +1225,19 @@ static const struct dev_pm_ops s3c24xx_serial_pm_ops = {
         .suspend = s3c24xx_serial_suspend,
         .resume = s3c24xx_serial_resume,
  };
+#define SERIAL_SAMSUNG_PM_OPS  (&s3c24xx_serial_pm_ops)
+
  #else /* !CONFIG_PM_SLEEP */
-#define s3c24xx_serial_pm_ops  NULL
+
+#define SERIAL_SAMSUNG_PM_OPS  NULL
  #endif /* CONFIG_PM_SLEEP */
  
  int s3c24xx_serial_init(struct platform_driver *drv,
                         struct s3c24xx_uart_info *info)
  {
         dbg("s3c24xx_serial_init(%p,%p)\n", drv, info);
-       drv->driver.pm = &s3c24xx_serial_pm_ops;
+
+       drv->driver.pm = SERIAL_SAMSUNG_PM_OPS;
  
         return platform_driver_register(drv);
  }
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c

index db7912cb7ae041a71a901437d3c835ee242530a9..a3efbea5dbba6b88f0068470c4e0ad019fa3cd5d 100644 (file)
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -200,6 +200,11 @@ static int uart_startup(struct tty_struct *tty, struct uart_state *state, int in
                 clear_bit(TTY_IO_ERROR, &tty->flags);
         }
  
+       /*
+        * This is to allow setserial on this port. People may want to set
+        * port/irq/type and then reconfigure the port properly if it failed
+        * now.
+        */
         if (retval && capable(CAP_SYS_ADMIN))
                 retval = 0;
  
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c

index 2ec57b2fb2783a232df5aad9df1a276a6f2edd1e..5ea6ec3442e64d2affe5f54d9871f1561624c2f9 100644 (file)
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -47,6 +47,7 @@
  #include <linux/ctype.h>
  #include <linux/err.h>
  #include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
  #include <linux/scatterlist.h>
  #include <linux/slab.h>
  
@@ -95,6 +96,12 @@ struct sci_port {
  #endif
  
         struct notifier_block           freq_transition;
+
+#ifdef CONFIG_SERIAL_SH_SCI_CONSOLE
+       unsigned short saved_smr;
+       unsigned short saved_fcr;
+       unsigned char saved_brr;
+#endif
  };
  
  /* Function prototypes */
@@ -1076,7 +1083,7 @@ static unsigned int sci_get_mctrl(struct uart_port *port)
         /* This routine is used for getting signals of: DTR, DCD, DSR, RI,
            and CTS/RTS */
  
-       return TIOCM_DTR | TIOCM_RTS | TIOCM_DSR;
+       return TIOCM_DTR | TIOCM_RTS | TIOCM_CTS | TIOCM_DSR;
  }
  
  #ifdef CONFIG_SERIAL_SH_SCI_DMA
@@ -1633,11 +1640,25 @@ static unsigned int sci_scbrr_calc(unsigned int algo_id, unsigned int bps,
         return ((freq + 16 * bps) / (32 * bps) - 1);
  }
  
+static void sci_reset(struct uart_port *port)
+{
+       unsigned int status;
+
+       do {
+               status = sci_in(port, SCxSR);
+       } while (!(status & SCxSR_TEND(port)));
+
+       sci_out(port, SCSCR, 0x00);     /* TE=0, RE=0, CKE1=0 */
+
+       if (port->type != PORT_SCI)
+               sci_out(port, SCFCR, SCFCR_RFRST | SCFCR_TFRST);
+}
+
  static void sci_set_termios(struct uart_port *port, struct ktermios *termios,
                             struct ktermios *old)
  {
         struct sci_port *s = to_sci_port(port);
-       unsigned int status, baud, smr_val, max_baud;
+       unsigned int baud, smr_val, max_baud;
         int t = -1;
         u16 scfcr = 0;
  
@@ -1657,14 +1678,7 @@ static void sci_set_termios(struct uart_port *port, struct ktermios *termios,
  
         sci_port_enable(s);
  
-       do {
-               status = sci_in(port, SCxSR);
-       } while (!(status & SCxSR_TEND(port)));
-
-       sci_out(port, SCSCR, 0x00);     /* TE=0, RE=0, CKE1=0 */
-
-       if (port->type != PORT_SCI)
-               sci_out(port, SCFCR, scfcr | SCFCR_RFRST | SCFCR_TFRST);
+       sci_reset(port);
  
         smr_val = sci_in(port, SCSMR) & 3;
  
@@ -1913,6 +1927,7 @@ static int __devinit sci_init_single(struct platform_device *dev,
  
                 port->dev = &dev->dev;
  
+               pm_runtime_irq_safe(&dev->dev);
                 pm_runtime_enable(&dev->dev);
         }
  
@@ -2036,7 +2051,8 @@ static int __devinit serial_console_setup(struct console *co, char *options)
         if (options)
                 uart_parse_options(options, &baud, &parity, &bits, &flow);
  
-       /* TODO: disable clock */
+       sci_port_disable(sci_port);
+
         return uart_set_options(port, co, baud, parity, bits, flow);
  }
  
@@ -2079,6 +2095,36 @@ static int __devinit sci_probe_earlyprintk(struct platform_device *pdev)
         return 0;
  }
  
+#define uart_console(port)     ((port)->cons->index == (port)->line)
+
+static int sci_runtime_suspend(struct device *dev)
+{
+       struct sci_port *sci_port = dev_get_drvdata(dev);
+       struct uart_port *port = &sci_port->port;
+
+       if (uart_console(port)) {
+               sci_port->saved_smr = sci_in(port, SCSMR);
+               sci_port->saved_brr = sci_in(port, SCBRR);
+               sci_port->saved_fcr = sci_in(port, SCFCR);
+       }
+       return 0;
+}
+
+static int sci_runtime_resume(struct device *dev)
+{
+       struct sci_port *sci_port = dev_get_drvdata(dev);
+       struct uart_port *port = &sci_port->port;
+
+       if (uart_console(port)) {
+               sci_reset(port);
+               sci_out(port, SCSMR, sci_port->saved_smr);
+               sci_out(port, SCBRR, sci_port->saved_brr);
+               sci_out(port, SCFCR, sci_port->saved_fcr);
+               sci_out(port, SCSCR, sci_port->cfg->scscr);
+       }
+       return 0;
+}
+
  #define SCI_CONSOLE    (&serial_console)
  
  #else
@@ -2088,6 +2134,8 @@ static inline int __devinit sci_probe_earlyprintk(struct platform_device *pdev)
  }
  
  #define SCI_CONSOLE    NULL
+#define sci_runtime_suspend    NULL
+#define sci_runtime_resume     NULL
  
  #endif /* CONFIG_SERIAL_SH_SCI_CONSOLE */
  
@@ -2203,6 +2251,8 @@ static int sci_resume(struct device *dev)
  }
  
  static const struct dev_pm_ops sci_dev_pm_ops = {
+       .runtime_suspend = sci_runtime_suspend,
+       .runtime_resume = sci_runtime_resume,
         .suspend        = sci_suspend,
         .resume         = sci_resume,
  };
diff --git a/drivers/tty/serial/ucc_uart.c b/drivers/tty/serial/ucc_uart.c

index c327218cad44c91ab680cf0dc46fcd4933cc8347..9af9f0879a24542860ab8f934d025acf705174ca 100644 (file)
--- a/drivers/tty/serial/ucc_uart.c
+++ b/drivers/tty/serial/ucc_uart.c
@@ -235,7 +235,7 @@ static inline void *qe2cpu_addr(dma_addr_t addr, struct uart_qe_port *qe_port)
                 return qe_port->bd_virt + (addr - qe_port->bd_dma_addr);
  
         /* something nasty happened */
-       printk(KERN_ERR "%s: addr=%x\n", __func__, addr);
+       printk(KERN_ERR "%s: addr=%llx\n", __func__, (u64)addr);
         BUG();
         return NULL;
  }
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c

index 150e4f747c7dc4bc822a706a76d06afefb80424d..4f1fc81112e6b5ae6521e39cf19295e1f3a0c6b5 100644 (file)
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1295,8 +1295,7 @@ static int tty_driver_install_tty(struct tty_driver *driver,
   *
   *     Locking: tty_mutex for now
   */
-static void tty_driver_remove_tty(struct tty_driver *driver,
-                                               struct tty_struct *tty)
+void tty_driver_remove_tty(struct tty_driver *driver, struct tty_struct *tty)
  {
         if (driver->ops->remove)
                 driver->ops->remove(driver, tty);
diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c

index 385acb895ab3518bcbd712ccf91e669220e0a5ea..3f94ac34dce31b80440689ae7f0d319a7d2f908c 100644 (file)
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -268,7 +268,7 @@ usbtmc_abort_bulk_in_status:
                                 dev_err(dev, "usb_bulk_msg returned %d\n", rv);
                                 goto exit;
                         }
-               } while ((actual = max_size) &&
+               } while ((actual == max_size) &&
                          (n < USBTMC_MAX_READS_TO_CLEAR_BULK_IN));
  
         if (actual == max_size) {
diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c

index c962608b4b9abc8b7d403eefb567f2018f9ba98d..26678cadfb215340e35a9b56f5fe23954a13cdd9 100644 (file)
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -123,10 +123,11 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
         }
  
         if (usb_endpoint_xfer_isoc(&ep->desc))
-               max_tx = ep->desc.wMaxPacketSize * (desc->bMaxBurst + 1) *
-                       (desc->bmAttributes + 1);
+               max_tx = (desc->bMaxBurst + 1) * (desc->bmAttributes + 1) *
+                       le16_to_cpu(ep->desc.wMaxPacketSize);
         else if (usb_endpoint_xfer_int(&ep->desc))
-               max_tx = ep->desc.wMaxPacketSize * (desc->bMaxBurst + 1);
+               max_tx = le16_to_cpu(ep->desc.wMaxPacketSize) *
+                       (desc->bMaxBurst + 1);
         else
                 max_tx = 999999;
         if (le16_to_cpu(desc->wBytesPerInterval) > max_tx) {
@@ -134,10 +135,10 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
                                 "config %d interface %d altsetting %d ep %d: "
                                 "setting to %d\n",
                                 usb_endpoint_xfer_isoc(&ep->desc) ? "Isoc" : "Int",
-                               desc->wBytesPerInterval,
+                               le16_to_cpu(desc->wBytesPerInterval),
                                 cfgno, inum, asnum, ep->desc.bEndpointAddress,
                                 max_tx);
-               ep->ss_ep_comp.wBytesPerInterval = max_tx;
+               ep->ss_ep_comp.wBytesPerInterval = cpu_to_le16(max_tx);
         }
  }
  
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c

index 8669ba3fe79486ffe487e6b6b266ff4767afe670..73cbbd85219fa5525b2968443d61a5c6f5086abd 100644 (file)
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1775,6 +1775,8 @@ int usb_hcd_alloc_bandwidth(struct usb_device *udev,
                 struct usb_interface *iface = usb_ifnum_to_if(udev,
                                 cur_alt->desc.bInterfaceNumber);
  
+               if (!iface)
+                       return -EINVAL;
                 if (iface->resetting_device) {
                         /*
                          * The USB core just reset the device, so the xHCI host
diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig

index 44b6b40aafb425d157d85622593d0ef959f5ff2b..5a084b9cfa3c67971bfe86628cbf38de8c49ec3b 100644 (file)
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -310,7 +310,7 @@ config USB_PXA_U2O
  # musb builds in ../musb along with host support
  config USB_GADGET_MUSB_HDRC
         tristate "Inventra HDRC USB Peripheral (TI, ADI, ...)"
-       depends on USB_MUSB_HDRC && (USB_MUSB_PERIPHERAL || USB_MUSB_OTG)
+       depends on USB_MUSB_HDRC
         select USB_GADGET_DUALSPEED
         help
           This OTG-capable silicon IP is used in dual designs including
diff --git a/drivers/usb/gadget/at91_udc.c b/drivers/usb/gadget/at91_udc.c

index 98cbc06c30fda338e74530636383f574c42df2bd..ddb118a76807a9f101e6ce34e3b59116eca04f76 100644 (file)
--- a/drivers/usb/gadget/at91_udc.c
+++ b/drivers/usb/gadget/at91_udc.c
@@ -35,6 +35,7 @@
  #include <linux/list.h>
  #include <linux/interrupt.h>
  #include <linux/proc_fs.h>
+#include <linux/prefetch.h>
  #include <linux/clk.h>
  #include <linux/usb/ch9.h>
  #include <linux/usb/gadget.h>
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c

index 5ef87794fd328743f635d07fd9b56d3429451d7c..aef47414f5d5cb621408d5c95a8cfe63d5d924c8 100644 (file)
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -1079,10 +1079,12 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl)
                         cdev->desc.bMaxPacketSize0 =
                                 cdev->gadget->ep0->maxpacket;
                         if (gadget_is_superspeed(gadget)) {
-                               if (gadget->speed >= USB_SPEED_SUPER)
+                               if (gadget->speed >= USB_SPEED_SUPER) {
                                         cdev->desc.bcdUSB = cpu_to_le16(0x0300);
-                               else
+                                       cdev->desc.bMaxPacketSize0 = 9;
+                               } else {
                                         cdev->desc.bcdUSB = cpu_to_le16(0x0210);
+                               }
                         }
  
                         value = min(w_length, (u16) sizeof cdev->desc);
diff --git a/drivers/usb/gadget/f_hid.c b/drivers/usb/gadget/f_hid.c

index 403a48bcf5609befe4db39dc9fe35a5c3685a8fc..83a266bdb40ef1a5888650654c30bdcb70aa32c4 100644 (file)
--- a/drivers/usb/gadget/f_hid.c
+++ b/drivers/usb/gadget/f_hid.c
@@ -367,6 +367,13 @@ static int hidg_setup(struct usb_function *f,
         case ((USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_INTERFACE) << 8
                   | USB_REQ_GET_DESCRIPTOR):
                 switch (value >> 8) {
+               case HID_DT_HID:
+                       VDBG(cdev, "USB_REQ_GET_DESCRIPTOR: HID\n");
+                       length = min_t(unsigned short, length,
+                                                  hidg_desc.bLength);
+                       memcpy(req->buf, &hidg_desc, length);
+                       goto respond;
+                       break;
                 case HID_DT_REPORT:
                         VDBG(cdev, "USB_REQ_GET_DESCRIPTOR: REPORT\n");
                         length = min_t(unsigned short, length,
diff --git a/drivers/usb/gadget/f_phonet.c b/drivers/usb/gadget/f_phonet.c

index 8f8d3f6cd89edf22fb9d96a67f89bc3fb56b0d5f..8f3eab1af885847fa69ec343b0039a0df4faae39 100644 (file)
--- a/drivers/usb/gadget/f_phonet.c
+++ b/drivers/usb/gadget/f_phonet.c
@@ -434,6 +434,7 @@ static int pn_set_alt(struct usb_function *f, unsigned intf, unsigned alt)
                             config_ep_by_speed(gadget, f, fp->out_ep)) {
                                 fp->in_ep->desc = NULL;
                                 fp->out_ep->desc = NULL;
+                               spin_unlock(&port->lock);
                                 return -EINVAL;
                         }
                         usb_ep_enable(fp->out_ep);
diff --git a/drivers/usb/gadget/fusb300_udc.c b/drivers/usb/gadget/fusb300_udc.c

index 24a924330c81f0d23bd70e5270c31ca97f0488c3..4ec888f900029ac823f2a136ef47ab18f03b5826 100644 (file)
--- a/drivers/usb/gadget/fusb300_udc.c
+++ b/drivers/usb/gadget/fusb300_udc.c
@@ -609,107 +609,6 @@ void fusb300_rdcxf(struct fusb300 *fusb300,
         }
  }
  
-#if 0
-static void fusb300_dbg_fifo(struct fusb300_ep *ep,
-                               u8 entry, u16 length)
-{
-       u32 reg;
-       u32 i = 0;
-       u32 j = 0;
-
-       reg = ioread32(ep->fusb300->reg + FUSB300_OFFSET_GTM);
-       reg &= ~(FUSB300_GTM_TST_EP_ENTRY(0xF) |
-               FUSB300_GTM_TST_EP_NUM(0xF) | FUSB300_GTM_TST_FIFO_DEG);
-       reg |= (FUSB300_GTM_TST_EP_ENTRY(entry) |
-               FUSB300_GTM_TST_EP_NUM(ep->epnum) | FUSB300_GTM_TST_FIFO_DEG);
-       iowrite32(reg, ep->fusb300->reg + FUSB300_OFFSET_GTM);
-
-       for (i = 0; i < (length >> 2); i++) {
-               if (i * 4 == 1024)
-                       break;
-               reg = ioread32(ep->fusb300->reg +
-                       FUSB300_OFFSET_BUFDBG_START + i * 4);
-               printk(KERN_DEBUG"  0x%-8x", reg);
-               j++;
-               if ((j % 4)  == 0)
-                       printk(KERN_DEBUG "\n");
-       }
-
-       if (length % 4) {
-               reg = ioread32(ep->fusb300->reg +
-                       FUSB300_OFFSET_BUFDBG_START + i * 4);
-               printk(KERN_DEBUG "  0x%x\n", reg);
-       }
-
-       if ((j % 4)  != 0)
-               printk(KERN_DEBUG "\n");
-
-       fusb300_disable_bit(ep->fusb300, FUSB300_OFFSET_GTM,
-               FUSB300_GTM_TST_FIFO_DEG);
-}
-
-static void fusb300_cmp_dbg_fifo(struct fusb300_ep *ep,
-                               u8 entry, u16 length, u8 *golden)
-{
-       u32 reg;
-       u32 i = 0;
-       u32 golden_value;
-       u8 *tmp;
-
-       tmp = golden;
-
-       printk(KERN_DEBUG "fusb300_cmp_dbg_fifo (entry %d) : start\n", entry);
-
-       reg = ioread32(ep->fusb300->reg + FUSB300_OFFSET_GTM);
-       reg &= ~(FUSB300_GTM_TST_EP_ENTRY(0xF) |
-               FUSB300_GTM_TST_EP_NUM(0xF) | FUSB300_GTM_TST_FIFO_DEG);
-       reg |= (FUSB300_GTM_TST_EP_ENTRY(entry) |
-               FUSB300_GTM_TST_EP_NUM(ep->epnum) | FUSB300_GTM_TST_FIFO_DEG);
-       iowrite32(reg, ep->fusb300->reg + FUSB300_OFFSET_GTM);
-
-       for (i = 0; i < (length >> 2); i++) {
-               if (i * 4 == 1024)
-                       break;
-               golden_value = *tmp | *(tmp + 1) << 8 |
-                               *(tmp + 2) << 16 | *(tmp + 3) << 24;
-
-               reg = ioread32(ep->fusb300->reg +
-                       FUSB300_OFFSET_BUFDBG_START + i*4);
-
-               if (reg != golden_value) {
-                       printk(KERN_DEBUG "0x%x  :  ", (u32)(ep->fusb300->reg +
-                               FUSB300_OFFSET_BUFDBG_START + i*4));
-                       printk(KERN_DEBUG "    golden = 0x%x, reg = 0x%x\n",
-                               golden_value, reg);
-               }
-               tmp += 4;
-       }
-
-       switch (length % 4) {
-       case 1:
-               golden_value = *tmp;
-       case 2:
-               golden_value = *tmp | *(tmp + 1) << 8;
-       case 3:
-               golden_value = *tmp | *(tmp + 1) << 8 | *(tmp + 2) << 16;
-       default:
-               break;
-
-       reg = ioread32(ep->fusb300->reg + FUSB300_OFFSET_BUFDBG_START + i*4);
-       if (reg != golden_value) {
-               printk(KERN_DEBUG "0x%x:", (u32)(ep->fusb300->reg +
-                       FUSB300_OFFSET_BUFDBG_START + i*4));
-               printk(KERN_DEBUG "  golden = 0x%x, reg = 0x%x\n",
-                       golden_value, reg);
-       }
-       }
-
-       printk(KERN_DEBUG "fusb300_cmp_dbg_fifo : end\n");
-       fusb300_disable_bit(ep->fusb300, FUSB300_OFFSET_GTM,
-               FUSB300_GTM_TST_FIFO_DEG);
-}
-#endif
-
  static void fusb300_rdfifo(struct fusb300_ep *ep,
                           struct fusb300_request *req,
                           u32 length)
diff --git a/drivers/usb/gadget/net2272.c b/drivers/usb/gadget/net2272.c

index 7c7b0e120d8892e118bc078ddf1baa655902bc00..ab98ea926a11b6e227cfe4b800e4b94dd50c27f2 100644 (file)
--- a/drivers/usb/gadget/net2272.c
+++ b/drivers/usb/gadget/net2272.c
@@ -27,13 +27,13 @@
  #include <linux/interrupt.h>
  #include <linux/io.h>
  #include <linux/ioport.h>
-#include <linux/irq.h>
  #include <linux/kernel.h>
  #include <linux/list.h>
  #include <linux/module.h>
  #include <linux/moduleparam.h>
  #include <linux/pci.h>
  #include <linux/platform_device.h>
+#include <linux/prefetch.h>
  #include <linux/sched.h>
  #include <linux/slab.h>
  #include <linux/timer.h>
diff --git a/drivers/usb/gadget/s3c2410_udc.c b/drivers/usb/gadget/s3c2410_udc.c

index 85c1b0d66293c72fe1577ea506faeb1b7cfb1d41..8d31848aab091534ae998e588f7803c88ea121c9 100644 (file)
--- a/drivers/usb/gadget/s3c2410_udc.c
+++ b/drivers/usb/gadget/s3c2410_udc.c
@@ -2060,6 +2060,7 @@ static int s3c2410_udc_resume(struct platform_device *pdev)
  static const struct platform_device_id s3c_udc_ids[] = {
         { "s3c2410-usbgadget", },
         { "s3c2440-usbgadget", },
+       { }
  };
  MODULE_DEVICE_TABLE(platform, s3c_udc_ids);
  
diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c

index bf2c8f65e1aefc6c4d89e9dfe15a9f4a804f090e..4c32cb19b405f7f5c4fc589e37acc3455ab15448 100644 (file)
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -343,7 +343,7 @@ static int ehci_bus_resume (struct usb_hcd *hcd)
         u32                     temp;
         u32                     power_okay;
         int                     i;
-       u8                      resume_needed = 0;
+       unsigned long           resume_needed = 0;
  
         if (time_before (jiffies, ehci->next_statechange))
                 msleep(5);
@@ -416,7 +416,7 @@ static int ehci_bus_resume (struct usb_hcd *hcd)
                 if (test_bit(i, &ehci->bus_suspended) &&
                                 (temp & PORT_SUSPEND)) {
                         temp |= PORT_RESUME;
-                       resume_needed = 1;
+                       set_bit(i, &resume_needed);
                 }
                 ehci_writel(ehci, temp, &ehci->regs->port_status [i]);
         }
@@ -431,8 +431,7 @@ static int ehci_bus_resume (struct usb_hcd *hcd)
         i = HCS_N_PORTS (ehci->hcs_params);
         while (i--) {
                 temp = ehci_readl(ehci, &ehci->regs->port_status [i]);
-               if (test_bit(i, &ehci->bus_suspended) &&
-                               (temp & PORT_SUSPEND)) {
+               if (test_bit(i, &resume_needed)) {
                         temp &= ~(PORT_RWC_BITS | PORT_RESUME);
                         ehci_writel(ehci, temp, &ehci->regs->port_status [i]);
                         ehci_vdbg (ehci, "resumed port %d\n", i + 1);
@@ -1046,7 +1045,19 @@ static int ehci_hub_control (
                         if (!selector || selector > 5)
                                 goto error;
                         ehci_quiesce(ehci);
+
+                       /* Put all enabled ports into suspend */
+                       while (ports--) {
+                               u32 __iomem *sreg =
+                                               &ehci->regs->port_status[ports];
+
+                               temp = ehci_readl(ehci, sreg) & ~PORT_RWC_BITS;
+                               if (temp & PORT_PE)
+                                       ehci_writel(ehci, temp | PORT_SUSPEND,
+                                                       sreg);
+                       }
                         ehci_halt(ehci);
+                       temp = ehci_readl(ehci, status_reg);
                         temp |= selector << 16;
                         ehci_writel(ehci, temp, status_reg);
                         break;
diff --git a/drivers/usb/host/ehci-mxc.c b/drivers/usb/host/ehci-mxc.c

index 0c058be35a3887d5a36b9ffa005873f6824e9457..555a73c864b57567229629e8e3432f7a710a7816 100644 (file)
--- a/drivers/usb/host/ehci-mxc.c
+++ b/drivers/usb/host/ehci-mxc.c
@@ -24,6 +24,7 @@
  #include <linux/usb/ulpi.h>
  #include <linux/slab.h>
  
+#include <mach/hardware.h>
  #include <mach/mxc_ehci.h>
  
  #include <asm/mach-types.h>
diff --git a/drivers/usb/host/ehci-omap.c b/drivers/usb/host/ehci-omap.c

index 55a57c23dd0feac7a07ce10ff5208900564d696d..45240321ca09e774ccf41f58e8f13a58b989d5e1 100644 (file)
--- a/drivers/usb/host/ehci-omap.c
+++ b/drivers/usb/host/ehci-omap.c
@@ -98,6 +98,18 @@ static void omap_ehci_soft_phy_reset(struct platform_device *pdev, u8 port)
         }
  }
  
+static void disable_put_regulator(
+               struct ehci_hcd_omap_platform_data *pdata)
+{
+       int i;
+
+       for (i = 0 ; i < OMAP3_HS_USB_PORTS ; i++) {
+               if (pdata->regulator[i]) {
+                       regulator_disable(pdata->regulator[i]);
+                       regulator_put(pdata->regulator[i]);
+               }
+       }
+}
  
  /* configure so an HC device and id are always provided */
  /* always called with process context; sleeping is OK */
@@ -231,9 +243,11 @@ err_add_hcd:
         omap_usbhs_disable(dev);
  
  err_enable:
+       disable_put_regulator(pdata);
         usb_put_hcd(hcd);
  
  err_io:
+       iounmap(regs);
         return ret;
  }
  
@@ -253,6 +267,8 @@ static int ehci_hcd_omap_remove(struct platform_device *pdev)
  
         usb_remove_hcd(hcd);
         omap_usbhs_disable(dev);
+       disable_put_regulator(dev->platform_data);
+       iounmap(hcd->regs);
         usb_put_hcd(hcd);
         return 0;
  }
diff --git a/drivers/usb/host/ehci-s5p.c b/drivers/usb/host/ehci-s5p.c

index b3958b3d31634f59c9c067712c7f4cf7c2959c0a..9e77f1c8bdbdc1d62e31565f9c8303e292f8026f 100644 (file)
--- a/drivers/usb/host/ehci-s5p.c
+++ b/drivers/usb/host/ehci-s5p.c
@@ -86,6 +86,7 @@ static int __devinit s5p_ehci_probe(struct platform_device *pdev)
                 goto fail_hcd;
         }
  
+       s5p_ehci->hcd = hcd;
         s5p_ehci->clk = clk_get(&pdev->dev, "usbhost");
  
         if (IS_ERR(s5p_ehci->clk)) {
diff --git a/drivers/usb/host/isp1760-hcd.c b/drivers/usb/host/isp1760-hcd.c

index 55d3d5859ac5667da2332a3a782588e4541bbdc9..840beda66dd94aa378c506918ffad093e1ba4052 100644 (file)
--- a/drivers/usb/host/isp1760-hcd.c
+++ b/drivers/usb/host/isp1760-hcd.c
@@ -1583,6 +1583,9 @@ static int isp1760_urb_dequeue(struct usb_hcd *hcd, struct urb *urb,
         int retval = 0;
  
         spin_lock_irqsave(&priv->lock, spinflags);
+       retval = usb_hcd_check_unlink_urb(hcd, urb, status);
+       if (retval)
+               goto out;
  
         qh = urb->ep->hcpriv;
         if (!qh) {
diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c

index a9d315906e3dede9caa39571742a3c34c76b55ee..629a96813fd66a9ebde456a2232a572c13f13dba 100644 (file)
--- a/drivers/usb/host/pci-quirks.c
+++ b/drivers/usb/host/pci-quirks.c
@@ -535,7 +535,7 @@ static void __devinit quirk_usb_handoff_ohci(struct pci_dev *pdev)
         iounmap(base);
  }
  
-static const struct dmi_system_id __initconst ehci_dmi_nohandoff_table[] = {
+static const struct dmi_system_id __devinitconst ehci_dmi_nohandoff_table[] = {
         {
                 /*  Pegatron Lucid (ExoPC) */
                 .matches = {
@@ -817,7 +817,7 @@ static void __devinit quirk_usb_handoff_xhci(struct pci_dev *pdev)
  
         /* If the BIOS owns the HC, signal that the OS wants it, and wait */
         if (val & XHCI_HC_BIOS_OWNED) {
-               writel(val & XHCI_HC_OS_OWNED, base + ext_cap_offset);
+               writel(val | XHCI_HC_OS_OWNED, base + ext_cap_offset);
  
                 /* Wait for 5 seconds with 10 microsecond polling interval */
                 timeout = handshake(base + ext_cap_offset, XHCI_HC_BIOS_OWNED,
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c

index 0be788cc2fdbe6e40920e5f77efa7590475bffa8..723f8231193d95cb92cad93adcc3c47e917b8933 100644 (file)
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -463,11 +463,12 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
                                         && (temp & PORT_POWER))
                                 status |= USB_PORT_STAT_SUSPEND;
                 }
-               if ((temp & PORT_PLS_MASK) == XDEV_RESUME) {
+               if ((temp & PORT_PLS_MASK) == XDEV_RESUME &&
+                               !DEV_SUPERSPEED(temp)) {
                         if ((temp & PORT_RESET) || !(temp & PORT_PE))
                                 goto error;
-                       if (!DEV_SUPERSPEED(temp) && time_after_eq(jiffies,
-                                               bus_state->resume_done[wIndex])) {
+                       if (time_after_eq(jiffies,
+                                       bus_state->resume_done[wIndex])) {
                                 xhci_dbg(xhci, "Resume USB2 port %d\n",
                                         wIndex + 1);
                                 bus_state->resume_done[wIndex] = 0;
@@ -487,6 +488,14 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
                                 xhci_ring_device(xhci, slot_id);
                                 bus_state->port_c_suspend |= 1 << wIndex;
                                 bus_state->suspended_ports &= ~(1 << wIndex);
+                       } else {
+                               /*
+                                * The resume has been signaling for less than
+                                * 20ms. Report the port status as SUSPEND,
+                                * let the usbcore check port status again
+                                * and clear resume signaling later.
+                                */
+                               status |= USB_PORT_STAT_SUSPEND;
                         }
                 }
                 if ((temp & PORT_PLS_MASK) == XDEV_U0
@@ -664,7 +673,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
                         xhci_dbg(xhci, "PORTSC %04x\n", temp);
                         if (temp & PORT_RESET)
                                 goto error;
-                       if (temp & XDEV_U3) {
+                       if ((temp & PORT_PLS_MASK) == XDEV_U3) {
                                 if ((temp & PORT_PE) == 0)
                                         goto error;
  
@@ -752,7 +761,7 @@ int xhci_hub_status_data(struct usb_hcd *hcd, char *buf)
         memset(buf, 0, retval);
         status = 0;
  
-       mask = PORT_CSC | PORT_PEC | PORT_OCC | PORT_PLC;
+       mask = PORT_CSC | PORT_PEC | PORT_OCC | PORT_PLC | PORT_WRC;
  
         spin_lock_irqsave(&xhci->lock, flags);
         /* For each port, did anything change?  If so, set that bit in buf. */
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c

index 7113d16e2d3a40f1febae01cedd8bf2dd2308980..952e2ded61af50737c0627d85560b0fe179528f0 100644 (file)
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -514,8 +514,12 @@ void xhci_find_new_dequeue_state(struct xhci_hcd *xhci,
                         (unsigned long long) addr);
  }
  
+/* flip_cycle means flip the cycle bit of all but the first and last TRB.
+ * (The last TRB actually points to the ring enqueue pointer, which is not part
+ * of this TD.)  This is used to remove partially enqueued isoc TDs from a ring.
+ */
  static void td_to_noop(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
-               struct xhci_td *cur_td)
+               struct xhci_td *cur_td, bool flip_cycle)
  {
         struct xhci_segment *cur_seg;
         union xhci_trb *cur_trb;
@@ -528,6 +532,12 @@ static void td_to_noop(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
                          * leave the pointers intact.
                          */
                         cur_trb->generic.field[3] &= cpu_to_le32(~TRB_CHAIN);
+                       /* Flip the cycle bit (link TRBs can't be the first
+                        * or last TRB).
+                        */
+                       if (flip_cycle)
+                               cur_trb->generic.field[3] ^=
+                                       cpu_to_le32(TRB_CYCLE);
                         xhci_dbg(xhci, "Cancel (unchain) link TRB\n");
                         xhci_dbg(xhci, "Address = %p (0x%llx dma); "
                                         "in seg %p (0x%llx dma)\n",
@@ -541,6 +551,11 @@ static void td_to_noop(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
                         cur_trb->generic.field[2] = 0;
                         /* Preserve only the cycle bit of this TRB */
                         cur_trb->generic.field[3] &= cpu_to_le32(TRB_CYCLE);
+                       /* Flip the cycle bit except on the first or last TRB */
+                       if (flip_cycle && cur_trb != cur_td->first_trb &&
+                                       cur_trb != cur_td->last_trb)
+                               cur_trb->generic.field[3] ^=
+                                       cpu_to_le32(TRB_CYCLE);
                         cur_trb->generic.field[3] |= cpu_to_le32(
                                 TRB_TYPE(TRB_TR_NOOP));
                         xhci_dbg(xhci, "Cancel TRB %p (0x%llx dma) "
@@ -719,14 +734,14 @@ static void handle_stopped_endpoint(struct xhci_hcd *xhci,
                                         cur_td->urb->stream_id,
                                         cur_td, &deq_state);
                 else
-                       td_to_noop(xhci, ep_ring, cur_td);
+                       td_to_noop(xhci, ep_ring, cur_td, false);
  remove_finished_td:
                 /*
                  * The event handler won't see a completion for this TD anymore,
                  * so remove it from the endpoint ring's TD list.  Keep it in
                  * the cancelled TD list for URB completion later.
                  */
-               list_del(&cur_td->td_list);
+               list_del_init(&cur_td->td_list);
         }
         last_unlinked_td = cur_td;
         xhci_stop_watchdog_timer_in_irq(xhci, ep);
@@ -754,7 +769,7 @@ remove_finished_td:
         do {
                 cur_td = list_entry(ep->cancelled_td_list.next,
                                 struct xhci_td, cancelled_td_list);
-               list_del(&cur_td->cancelled_td_list);
+               list_del_init(&cur_td->cancelled_td_list);
  
                 /* Clean up the cancelled URB */
                 /* Doesn't matter what we pass for status, since the core will
@@ -862,9 +877,9 @@ void xhci_stop_endpoint_command_watchdog(unsigned long arg)
                                 cur_td = list_first_entry(&ring->td_list,
                                                 struct xhci_td,
                                                 td_list);
-                               list_del(&cur_td->td_list);
+                               list_del_init(&cur_td->td_list);
                                 if (!list_empty(&cur_td->cancelled_td_list))
-                                       list_del(&cur_td->cancelled_td_list);
+                                       list_del_init(&cur_td->cancelled_td_list);
                                 xhci_giveback_urb_in_irq(xhci, cur_td,
                                                 -ESHUTDOWN, "killed");
                         }
@@ -873,7 +888,7 @@ void xhci_stop_endpoint_command_watchdog(unsigned long arg)
                                                 &temp_ep->cancelled_td_list,
                                                 struct xhci_td,
                                                 cancelled_td_list);
-                               list_del(&cur_td->cancelled_td_list);
+                               list_del_init(&cur_td->cancelled_td_list);
                                 xhci_giveback_urb_in_irq(xhci, cur_td,
                                                 -ESHUTDOWN, "killed");
                         }
@@ -1565,10 +1580,10 @@ td_cleanup:
                         else
                                 *status = 0;
                 }
-               list_del(&td->td_list);
+               list_del_init(&td->td_list);
                 /* Was this TD slated to be cancelled but completed anyway? */
                 if (!list_empty(&td->cancelled_td_list))
-                       list_del(&td->cancelled_td_list);
+                       list_del_init(&td->cancelled_td_list);
  
                 urb_priv->td_cnt++;
                 /* Giveback the urb when all the tds are completed */
@@ -1919,8 +1934,10 @@ static int handle_tx_event(struct xhci_hcd *xhci,
         int status = -EINPROGRESS;
         struct urb_priv *urb_priv;
         struct xhci_ep_ctx *ep_ctx;
+       struct list_head *tmp;
         u32 trb_comp_code;
         int ret = 0;
+       int td_num = 0;
  
         slot_id = TRB_TO_SLOT_ID(le32_to_cpu(event->flags));
         xdev = xhci->devs[slot_id];
@@ -1942,6 +1959,12 @@ static int handle_tx_event(struct xhci_hcd *xhci,
                 return -ENODEV;
         }
  
+       /* Count current td numbers if ep->skip is set */
+       if (ep->skip) {
+               list_for_each(tmp, &ep_ring->td_list)
+                       td_num++;
+       }
+
         event_dma = le64_to_cpu(event->buffer);
         trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len));
         /* Look for common error cases */
@@ -2053,7 +2076,18 @@ static int handle_tx_event(struct xhci_hcd *xhci,
                         goto cleanup;
                 }
  
+               /* We've skipped all the TDs on the ep ring when ep->skip set */
+               if (ep->skip && td_num == 0) {
+                       ep->skip = false;
+                       xhci_dbg(xhci, "All tds on the ep_ring skipped. "
+                                               "Clear skip flag.\n");
+                       ret = 0;
+                       goto cleanup;
+               }
+
                 td = list_entry(ep_ring->td_list.next, struct xhci_td, td_list);
+               if (ep->skip)
+                       td_num--;
  
                 /* Is this a TRB in the currently executing TD? */
                 event_seg = trb_in_td(ep_ring->deq_seg, ep_ring->dequeue,
@@ -2500,11 +2534,8 @@ static int prepare_transfer(struct xhci_hcd *xhci,
  
         if (td_index == 0) {
                 ret = usb_hcd_link_urb_to_ep(bus_to_hcd(urb->dev->bus), urb);
-               if (unlikely(ret)) {
-                       xhci_urb_free_priv(xhci, urb_priv);
-                       urb->hcpriv = NULL;
+               if (unlikely(ret))
                         return ret;
-               }
         }
  
         td->urb = urb;
@@ -2672,6 +2703,10 @@ static u32 xhci_v1_0_td_remainder(int running_total, int trb_buff_len,
  {
         int packets_transferred;
  
+       /* One TRB with a zero-length data packet. */
+       if (running_total == 0 && trb_buff_len == 0)
+               return 0;
+
         /* All the TRB queueing functions don't count the current TRB in
          * running_total.
          */
@@ -3113,20 +3148,15 @@ static int count_isoc_trbs_needed(struct xhci_hcd *xhci,
                 struct urb *urb, int i)
  {
         int num_trbs = 0;
-       u64 addr, td_len, running_total;
+       u64 addr, td_len;
  
         addr = (u64) (urb->transfer_dma + urb->iso_frame_desc[i].offset);
         td_len = urb->iso_frame_desc[i].length;
  
-       running_total = TRB_MAX_BUFF_SIZE - (addr & (TRB_MAX_BUFF_SIZE - 1));
-       running_total &= TRB_MAX_BUFF_SIZE - 1;
-       if (running_total != 0)
-               num_trbs++;
-
-       while (running_total < td_len) {
+       num_trbs = DIV_ROUND_UP(td_len + (addr & (TRB_MAX_BUFF_SIZE - 1)),
+                       TRB_MAX_BUFF_SIZE);
+       if (num_trbs == 0)
                 num_trbs++;
-               running_total += TRB_MAX_BUFF_SIZE;
-       }
  
         return num_trbs;
  }
@@ -3226,6 +3256,7 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
         start_trb = &ep_ring->enqueue->generic;
         start_cycle = ep_ring->cycle_state;
  
+       urb_priv = urb->hcpriv;
         /* Queue the first TRB, even if it's zero-length */
         for (i = 0; i < num_tds; i++) {
                 unsigned int total_packet_count;
@@ -3237,9 +3268,11 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
                 addr = start_addr + urb->iso_frame_desc[i].offset;
                 td_len = urb->iso_frame_desc[i].length;
                 td_remain_len = td_len;
-               /* FIXME: Ignoring zero-length packets, can those happen? */
                 total_packet_count = roundup(td_len,
                                 le16_to_cpu(urb->ep->desc.wMaxPacketSize));
+               /* A zero-length transfer still involves at least one packet. */
+               if (total_packet_count == 0)
+                       total_packet_count++;
                 burst_count = xhci_get_burst_count(xhci, urb->dev, urb,
                                 total_packet_count);
                 residue = xhci_get_last_burst_packet_count(xhci,
@@ -3249,12 +3282,13 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
  
                 ret = prepare_transfer(xhci, xhci->devs[slot_id], ep_index,
                                 urb->stream_id, trbs_per_td, urb, i, mem_flags);
-               if (ret < 0)
-                       return ret;
+               if (ret < 0) {
+                       if (i == 0)
+                               return ret;
+                       goto cleanup;
+               }
  
-               urb_priv = urb->hcpriv;
                 td = urb_priv->td[i];
-
                 for (j = 0; j < trbs_per_td; j++) {
                         u32 remainder = 0;
                         field = TRB_TBC(burst_count) | TRB_TLBPC(residue);
@@ -3344,6 +3378,27 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
         giveback_first_trb(xhci, slot_id, ep_index, urb->stream_id,
                         start_cycle, start_trb);
         return 0;
+cleanup:
+       /* Clean up a partially enqueued isoc transfer. */
+
+       for (i--; i >= 0; i--)
+               list_del_init(&urb_priv->td[i]->td_list);
+
+       /* Use the first TD as a temporary variable to turn the TDs we've queued
+        * into No-ops with a software-owned cycle bit. That way the hardware
+        * won't accidentally start executing bogus TDs when we partially
+        * overwrite them.  td->first_trb and td->start_seg are already set.
+        */
+       urb_priv->td[0]->last_trb = ep_ring->enqueue;
+       /* Every TRB except the first & last will have its cycle bit flipped. */
+       td_to_noop(xhci, ep_ring, urb_priv->td[0], true);
+
+       /* Reset the ring enqueue back to the first TRB and its cycle bit. */
+       ep_ring->enqueue = urb_priv->td[0]->first_trb;
+       ep_ring->enq_seg = urb_priv->td[0]->start_seg;
+       ep_ring->cycle_state = start_cycle;
+       usb_hcd_unlink_urb_from_ep(bus_to_hcd(urb->dev->bus), urb);
+       return ret;
  }
  
  /*
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c

index 763f484bc092d1a60660411580738a519410f01a..3a0f695138f4195d4caf988c2fb0266ca7678e5f 100644 (file)
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -345,7 +345,8 @@ static void xhci_event_ring_work(unsigned long arg)
         spin_lock_irqsave(&xhci->lock, flags);
         temp = xhci_readl(xhci, &xhci->op_regs->status);
         xhci_dbg(xhci, "op reg status = 0x%x\n", temp);
-       if (temp == 0xffffffff || (xhci->xhc_state & XHCI_STATE_DYING)) {
+       if (temp == 0xffffffff || (xhci->xhc_state & XHCI_STATE_DYING) ||
+                       (xhci->xhc_state & XHCI_STATE_HALTED)) {
                 xhci_dbg(xhci, "HW died, polling stopped.\n");
                 spin_unlock_irqrestore(&xhci->lock, flags);
                 return;
@@ -939,8 +940,11 @@ static int xhci_check_args(struct usb_hcd *hcd, struct usb_device *udev,
                 return 0;
         }
  
+       xhci = hcd_to_xhci(hcd);
+       if (xhci->xhc_state & XHCI_STATE_HALTED)
+               return -ENODEV;
+
         if (check_virt_dev) {
-               xhci = hcd_to_xhci(hcd);
                 if (!udev->slot_id || !xhci->devs
                         || !xhci->devs[udev->slot_id]) {
                         printk(KERN_DEBUG "xHCI %s called with unaddressed "
@@ -1081,8 +1085,11 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
                 if (urb->dev->speed == USB_SPEED_FULL) {
                         ret = xhci_check_maxpacket(xhci, slot_id,
                                         ep_index, urb);
-                       if (ret < 0)
+                       if (ret < 0) {
+                               xhci_urb_free_priv(xhci, urb_priv);
+                               urb->hcpriv = NULL;
                                 return ret;
+                       }
                 }
  
                 /* We have a spinlock and interrupts disabled, so we must pass
@@ -1093,6 +1100,8 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
                         goto dying;
                 ret = xhci_queue_ctrl_tx(xhci, GFP_ATOMIC, urb,
                                 slot_id, ep_index);
+               if (ret)
+                       goto free_priv;
                 spin_unlock_irqrestore(&xhci->lock, flags);
         } else if (usb_endpoint_xfer_bulk(&urb->ep->desc)) {
                 spin_lock_irqsave(&xhci->lock, flags);
@@ -1113,6 +1122,8 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
                         ret = xhci_queue_bulk_tx(xhci, GFP_ATOMIC, urb,
                                         slot_id, ep_index);
                 }
+               if (ret)
+                       goto free_priv;
                 spin_unlock_irqrestore(&xhci->lock, flags);
         } else if (usb_endpoint_xfer_int(&urb->ep->desc)) {
                 spin_lock_irqsave(&xhci->lock, flags);
@@ -1120,6 +1131,8 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
                         goto dying;
                 ret = xhci_queue_intr_tx(xhci, GFP_ATOMIC, urb,
                                 slot_id, ep_index);
+               if (ret)
+                       goto free_priv;
                 spin_unlock_irqrestore(&xhci->lock, flags);
         } else {
                 spin_lock_irqsave(&xhci->lock, flags);
@@ -1127,18 +1140,22 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
                         goto dying;
                 ret = xhci_queue_isoc_tx_prepare(xhci, GFP_ATOMIC, urb,
                                 slot_id, ep_index);
+               if (ret)
+                       goto free_priv;
                 spin_unlock_irqrestore(&xhci->lock, flags);
         }
  exit:
         return ret;
  dying:
-       xhci_urb_free_priv(xhci, urb_priv);
-       urb->hcpriv = NULL;
         xhci_dbg(xhci, "Ep 0x%x: URB %p submitted for "
                         "non-responsive xHCI host.\n",
                         urb->ep->desc.bEndpointAddress, urb);
+       ret = -ESHUTDOWN;
+free_priv:
+       xhci_urb_free_priv(xhci, urb_priv);
+       urb->hcpriv = NULL;
         spin_unlock_irqrestore(&xhci->lock, flags);
-       return -ESHUTDOWN;
+       return ret;
  }
  
  /* Get the right ring for the given URB.
@@ -1235,6 +1252,13 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
         if (temp == 0xffffffff || (xhci->xhc_state & XHCI_STATE_HALTED)) {
                 xhci_dbg(xhci, "HW died, freeing TD.\n");
                 urb_priv = urb->hcpriv;
+               for (i = urb_priv->td_cnt; i < urb_priv->length; i++) {
+                       td = urb_priv->td[i];
+                       if (!list_empty(&td->td_list))
+                               list_del_init(&td->td_list);
+                       if (!list_empty(&td->cancelled_td_list))
+                               list_del_init(&td->cancelled_td_list);
+               }
  
                 usb_hcd_unlink_urb_from_ep(hcd, urb);
                 spin_unlock_irqrestore(&xhci->lock, flags);
@@ -1242,7 +1266,8 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
                 xhci_urb_free_priv(xhci, urb_priv);
                 return ret;
         }
-       if (xhci->xhc_state & XHCI_STATE_DYING) {
+       if ((xhci->xhc_state & XHCI_STATE_DYING) ||
+                       (xhci->xhc_state & XHCI_STATE_HALTED)) {
                 xhci_dbg(xhci, "Ep 0x%x: URB %p to be canceled on "
                                 "non-responsive xHCI host.\n",
                                 urb->ep->desc.bEndpointAddress, urb);
@@ -2665,7 +2690,10 @@ void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev)
         int i, ret;
  
         ret = xhci_check_args(hcd, udev, NULL, 0, true, __func__);
-       if (ret <= 0)
+       /* If the host is halted due to driver unload, we still need to free the
+        * device.
+        */
+       if (ret <= 0 && ret != -ENODEV)
                 return;
  
         virt_dev = xhci->devs[udev->slot_id];
@@ -2679,7 +2707,8 @@ void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev)
         spin_lock_irqsave(&xhci->lock, flags);
         /* Don't disable the slot if the host controller is dead. */
         state = xhci_readl(xhci, &xhci->op_regs->status);
-       if (state == 0xffffffff || (xhci->xhc_state & XHCI_STATE_DYING)) {
+       if (state == 0xffffffff || (xhci->xhc_state & XHCI_STATE_DYING) ||
+                       (xhci->xhc_state & XHCI_STATE_HALTED)) {
                 xhci_free_virt_device(xhci, udev->slot_id);
                 spin_unlock_irqrestore(&xhci->lock, flags);
                 return;
diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig

index 6192b45959f434d2e32b10a9e404f0f7c02fbd47..fc34b8b1191055bcb568a171a518d389b4266f25 100644 (file)
--- a/drivers/usb/musb/Kconfig
+++ b/drivers/usb/musb/Kconfig
@@ -3,9 +3,6 @@
  # for silicon based on Mentor Graphics INVENTRA designs
  #
  
-comment "Enable Host or Gadget support to see Inventra options"
-       depends on !USB && USB_GADGET=n
-
  # (M)HDRC = (Multipoint) Highspeed Dual-Role Controller
  config USB_MUSB_HDRC
         depends on USB && USB_GADGET
diff --git a/drivers/usb/musb/blackfin.c b/drivers/usb/musb/blackfin.c

index ae8c396177434a7cb6ea10e67893cee00a8ff5a3..5e7cfba5b079b02d781710c106cfbbe38f37a358 100644 (file)
--- a/drivers/usb/musb/blackfin.c
+++ b/drivers/usb/musb/blackfin.c
@@ -17,6 +17,7 @@
  #include <linux/io.h>
  #include <linux/platform_device.h>
  #include <linux/dma-mapping.h>
+#include <linux/prefetch.h>
  
  #include <asm/cacheflush.h>
  
diff --git a/drivers/usb/musb/cppi_dma.c b/drivers/usb/musb/cppi_dma.c

index 149f3f310a0a1f9903793ca5f56daaf1038a4024..318fb4e8a8850cb21789a4d9f9d89d23bd2fbd12 100644 (file)
--- a/drivers/usb/musb/cppi_dma.c
+++ b/drivers/usb/musb/cppi_dma.c
@@ -226,8 +226,10 @@ static int cppi_controller_stop(struct dma_controller *c)
         struct cppi             *controller;
         void __iomem            *tibase;
         int                     i;
+       struct musb             *musb;
  
         controller = container_of(c, struct cppi, controller);
+       musb = controller->musb;
  
         tibase = controller->tibase;
         /* DISABLE INDIVIDUAL CHANNEL Interrupts */
@@ -289,9 +291,11 @@ cppi_channel_allocate(struct dma_controller *c,
         u8                      index;
         struct cppi_channel     *cppi_ch;
         void __iomem            *tibase;
+       struct musb             *musb;
  
         controller = container_of(c, struct cppi, controller);
         tibase = controller->tibase;
+       musb = controller->musb;
  
         /* ep0 doesn't use DMA; remember cppi indices are 0..N-1 */
         index = ep->epnum - 1;
@@ -339,7 +343,8 @@ static void cppi_channel_release(struct dma_channel *channel)
         c = container_of(channel, struct cppi_channel, channel);
         tibase = c->controller->tibase;
         if (!c->hw_ep)
-               dev_dbg(musb->controller, "releasing idle DMA channel %p\n", c);
+               dev_dbg(c->controller->musb->controller,
+                       "releasing idle DMA channel %p\n", c);
         else if (!c->transmit)
                 core_rxirq_enable(tibase, c->index + 1);
  
@@ -357,10 +362,11 @@ cppi_dump_rx(int level, struct cppi_channel *c, const char *tag)
  
         musb_ep_select(base, c->index + 1);
  
-       DBG(level, "RX DMA%d%s: %d left, csr %04x, "
-                       "%08x H%08x S%08x C%08x, "
-                       "B%08x L%08x %08x .. %08x"
-                       "\n",
+       dev_dbg(c->controller->musb->controller,
+               "RX DMA%d%s: %d left, csr %04x, "
+               "%08x H%08x S%08x C%08x, "
+               "B%08x L%08x %08x .. %08x"
+               "\n",
                 c->index, tag,
                 musb_readl(c->controller->tibase,
                         DAVINCI_RXCPPI_BUFCNT0_REG + 4 * c->index),
@@ -387,10 +393,11 @@ cppi_dump_tx(int level, struct cppi_channel *c, const char *tag)
  
         musb_ep_select(base, c->index + 1);
  
-       DBG(level, "TX DMA%d%s: csr %04x, "
-                       "H%08x S%08x C%08x %08x, "
-                       "F%08x L%08x .. %08x"
-                       "\n",
+       dev_dbg(c->controller->musb->controller,
+               "TX DMA%d%s: csr %04x, "
+               "H%08x S%08x C%08x %08x, "
+               "F%08x L%08x .. %08x"
+               "\n",
                 c->index, tag,
                 musb_readw(c->hw_ep->regs, MUSB_TXCSR),
  
@@ -1022,6 +1029,7 @@ static bool cppi_rx_scan(struct cppi *cppi, unsigned ch)
         int                             i;
         dma_addr_t                      safe2ack;
         void __iomem                    *regs = rx->hw_ep->regs;
+       struct musb                     *musb = cppi->musb;
  
         cppi_dump_rx(6, rx, "/K");
  
diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h

index 668eeef601ae94f093fa925706afcbcc2b1f5e37..b3c065ab9dbc2f9aace5b36939390761fa0c5218 100644 (file)
--- a/drivers/usb/musb/musb_core.h
+++ b/drivers/usb/musb/musb_core.h
@@ -172,7 +172,8 @@ enum musb_g_ep0_state {
  #endif
  
  /* TUSB mapping: "flat" plus ep0 special cases */
-#if    defined(CONFIG_USB_MUSB_TUSB6010)
+#if defined(CONFIG_USB_MUSB_TUSB6010) || \
+       defined(CONFIG_USB_MUSB_TUSB6010_MODULE)
  #define musb_ep_select(_mbase, _epnum) \
         musb_writeb((_mbase), MUSB_INDEX, (_epnum))
  #define        MUSB_EP_OFFSET                  MUSB_TUSB_OFFSET
@@ -241,7 +242,8 @@ struct musb_hw_ep {
         void __iomem            *fifo;
         void __iomem            *regs;
  
-#ifdef CONFIG_USB_MUSB_TUSB6010
+#if defined(CONFIG_USB_MUSB_TUSB6010) || \
+       defined(CONFIG_USB_MUSB_TUSB6010_MODULE)
         void __iomem            *conf;
  #endif
  
@@ -258,7 +260,8 @@ struct musb_hw_ep {
         struct dma_channel      *tx_channel;
         struct dma_channel      *rx_channel;
  
-#ifdef CONFIG_USB_MUSB_TUSB6010
+#if defined(CONFIG_USB_MUSB_TUSB6010) || \
+       defined(CONFIG_USB_MUSB_TUSB6010_MODULE)
         /* TUSB has "asynchronous" and "synchronous" dma modes */
         dma_addr_t              fifo_async;
         dma_addr_t              fifo_sync;
@@ -356,7 +359,8 @@ struct musb {
         void __iomem            *ctrl_base;
         void __iomem            *mregs;
  
-#ifdef CONFIG_USB_MUSB_TUSB6010
+#if defined(CONFIG_USB_MUSB_TUSB6010) || \
+       defined(CONFIG_USB_MUSB_TUSB6010_MODULE)
         dma_addr_t              async;
         dma_addr_t              sync;
         void __iomem            *sync_va;
diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c

index b67a062f556bb65d56fead8404546dfcfb75f541..e81820370d6f9cab155d1fac9ef83d4c8653c723 100644 (file)
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -1698,6 +1698,8 @@ static int musb_gadget_pullup(struct usb_gadget *gadget, int is_on)
  
         is_on = !!is_on;
  
+       pm_runtime_get_sync(musb->controller);
+
         /* NOTE: this assumes we are sensing vbus; we'd rather
          * not pullup unless the B-session is active.
          */
@@ -1707,6 +1709,9 @@ static int musb_gadget_pullup(struct usb_gadget *gadget, int is_on)
                 musb_pullup(musb, is_on);
         }
         spin_unlock_irqrestore(&musb->lock, flags);
+
+       pm_runtime_put(musb->controller);
+
         return 0;
  }
  
@@ -1851,6 +1856,7 @@ int __init musb_gadget_setup(struct musb *musb)
  
         return 0;
  err:
+       musb->g.dev.parent = NULL;
         device_unregister(&musb->g.dev);
         return status;
  }
@@ -1858,7 +1864,8 @@ err:
  void musb_gadget_cleanup(struct musb *musb)
  {
         usb_del_gadget_udc(&musb->g);
-       device_unregister(&musb->g.dev);
+       if (musb->g.dev.parent)
+               device_unregister(&musb->g.dev);
  }
  
  /*
diff --git a/drivers/usb/musb/musb_regs.h b/drivers/usb/musb/musb_regs.h

index 82410703dcd3c1d87e688007c52cd737675d03d6..03f2655af290758cb708a67f832abd6257df2894 100644 (file)
--- a/drivers/usb/musb/musb_regs.h
+++ b/drivers/usb/musb/musb_regs.h
@@ -234,7 +234,8 @@
  #define MUSB_TESTMODE          0x0F    /* 8 bit */
  
  /* Get offset for a given FIFO from musb->mregs */
-#ifdef CONFIG_USB_MUSB_TUSB6010
+#if defined(CONFIG_USB_MUSB_TUSB6010) ||       \
+       defined(CONFIG_USB_MUSB_TUSB6010_MODULE)
  #define MUSB_FIFO_OFFSET(epnum)        (0x200 + ((epnum) * 0x20))
  #else
  #define MUSB_FIFO_OFFSET(epnum)        (0x20 + ((epnum) * 4))
@@ -295,7 +296,8 @@
  #define MUSB_FLAT_OFFSET(_epnum, _offset)      \
         (0x100 + (0x10*(_epnum)) + (_offset))
  
-#ifdef CONFIG_USB_MUSB_TUSB6010
+#if defined(CONFIG_USB_MUSB_TUSB6010) ||       \
+       defined(CONFIG_USB_MUSB_TUSB6010_MODULE)
  /* TUSB6010 EP0 configuration register is special */
  #define MUSB_TUSB_OFFSET(_epnum, _offset)      \
         (0x10 + _offset)
diff --git a/drivers/usb/musb/tusb6010.c b/drivers/usb/musb/tusb6010.c

index 9eec41fbf3a433e0ac250035a997df4440bef1ef..ec1480191f78752a1ff16b09566f3f4d2d6512b1 100644 (file)
--- a/drivers/usb/musb/tusb6010.c
+++ b/drivers/usb/musb/tusb6010.c
@@ -18,6 +18,7 @@
  #include <linux/kernel.h>
  #include <linux/errno.h>
  #include <linux/init.h>
+#include <linux/prefetch.h>
  #include <linux/usb.h>
  #include <linux/irq.h>
  #include <linux/platform_device.h>
diff --git a/drivers/usb/musb/tusb6010_omap.c b/drivers/usb/musb/tusb6010_omap.c

index c784e6c03aac7fbc4cfec0927ee689496ab61749..b67b4bc596c18953b5c9abf997fb3f25605be1fe 100644 (file)
--- a/drivers/usb/musb/tusb6010_omap.c
+++ b/drivers/usb/musb/tusb6010_omap.c
@@ -20,6 +20,7 @@
  #include <plat/mux.h>
  
  #include "musb_core.h"
+#include "tusb6010.h"
  
  #define to_chdat(c)            ((struct tusb_omap_dma_ch *)(c)->private_data)
  
@@ -89,7 +90,7 @@ static inline int tusb_omap_use_shared_dmareq(struct tusb_omap_dma_ch *chdat)
         u32             reg = musb_readl(chdat->tbase, TUSB_DMA_EP_MAP);
  
         if (reg != 0) {
-               dev_dbg(musb->controller, "ep%i dmareq0 is busy for ep%i\n",
+               dev_dbg(chdat->musb->controller, "ep%i dmareq0 is busy for ep%i\n",
                         chdat->epnum, reg & 0xf);
                 return -EAGAIN;
         }
diff --git a/drivers/usb/musb/ux500_dma.c b/drivers/usb/musb/ux500_dma.c

index cecace4118327c8a148ab976d1c8dfb234e7bf57..ef4333f4bbe021178501d14ae4edbcd5562c9dbe 100644 (file)
--- a/drivers/usb/musb/ux500_dma.c
+++ b/drivers/usb/musb/ux500_dma.c
@@ -65,7 +65,8 @@ static void ux500_tx_work(struct work_struct *data)
         struct musb *musb = hw_ep->musb;
         unsigned long flags;
  
-       DBG(4, "DMA tx transfer done on hw_ep=%d\n", hw_ep->epnum);
+       dev_dbg(musb->controller, "DMA tx transfer done on hw_ep=%d\n",
+               hw_ep->epnum);
  
         spin_lock_irqsave(&musb->lock, flags);
         ux500_channel->channel.actual_len = ux500_channel->cur_len;
@@ -84,7 +85,8 @@ static void ux500_rx_work(struct work_struct *data)
         struct musb *musb = hw_ep->musb;
         unsigned long flags;
  
-       DBG(4, "DMA rx transfer done on hw_ep=%d\n", hw_ep->epnum);
+       dev_dbg(musb->controller, "DMA rx transfer done on hw_ep=%d\n",
+               hw_ep->epnum);
  
         spin_lock_irqsave(&musb->lock, flags);
         ux500_channel->channel.actual_len = ux500_channel->cur_len;
@@ -116,9 +118,11 @@ static bool ux500_configure_channel(struct dma_channel *channel,
         enum dma_slave_buswidth addr_width;
         dma_addr_t usb_fifo_addr = (MUSB_FIFO_OFFSET(hw_ep->epnum) +
                                         ux500_channel->controller->phy_base);
+       struct musb *musb = ux500_channel->controller->private_data;
  
-       DBG(4, "packet_sz=%d, mode=%d, dma_addr=0x%x, len=%d is_tx=%d\n",
-                       packet_sz, mode, dma_addr, len, ux500_channel->is_tx);
+       dev_dbg(musb->controller,
+               "packet_sz=%d, mode=%d, dma_addr=0x%x, len=%d is_tx=%d\n",
+               packet_sz, mode, dma_addr, len, ux500_channel->is_tx);
  
         ux500_channel->cur_len = len;
  
@@ -133,15 +137,13 @@ static bool ux500_configure_channel(struct dma_channel *channel,
                                         DMA_SLAVE_BUSWIDTH_4_BYTES;
  
         slave_conf.direction = direction;
-       if (direction == DMA_FROM_DEVICE) {
-               slave_conf.src_addr = usb_fifo_addr;
-               slave_conf.src_addr_width = addr_width;
-               slave_conf.src_maxburst = 16;
-       } else {
-               slave_conf.dst_addr = usb_fifo_addr;
-               slave_conf.dst_addr_width = addr_width;
-               slave_conf.dst_maxburst = 16;
-       }
+       slave_conf.src_addr = usb_fifo_addr;
+       slave_conf.src_addr_width = addr_width;
+       slave_conf.src_maxburst = 16;
+       slave_conf.dst_addr = usb_fifo_addr;
+       slave_conf.dst_addr_width = addr_width;
+       slave_conf.dst_maxburst = 16;
+
         dma_chan->device->device_control(dma_chan, DMA_SLAVE_CONFIG,
                                              (unsigned long) &slave_conf);
  
@@ -166,6 +168,7 @@ static struct dma_channel *ux500_dma_channel_allocate(struct dma_controller *c,
         struct ux500_dma_controller *controller = container_of(c,
                         struct ux500_dma_controller, controller);
         struct ux500_dma_channel *ux500_channel = NULL;
+       struct musb *musb = controller->private_data;
         u8 ch_num = hw_ep->epnum - 1;
         u32 max_ch;
  
@@ -192,7 +195,7 @@ static struct dma_channel *ux500_dma_channel_allocate(struct dma_controller *c,
         ux500_channel->hw_ep = hw_ep;
         ux500_channel->is_allocated = 1;
  
-       DBG(7, "hw_ep=%d, is_tx=0x%x, channel=%d\n",
+       dev_dbg(musb->controller, "hw_ep=%d, is_tx=0x%x, channel=%d\n",
                 hw_ep->epnum, is_tx, ch_num);
  
         return &(ux500_channel->channel);
@@ -201,8 +204,9 @@ static struct dma_channel *ux500_dma_channel_allocate(struct dma_controller *c,
  static void ux500_dma_channel_release(struct dma_channel *channel)
  {
         struct ux500_dma_channel *ux500_channel = channel->private_data;
+       struct musb *musb = ux500_channel->controller->private_data;
  
-       DBG(7, "channel=%d\n", ux500_channel->ch_num);
+       dev_dbg(musb->controller, "channel=%d\n", ux500_channel->ch_num);
  
         if (ux500_channel->is_allocated) {
                 ux500_channel->is_allocated = 0;
@@ -252,8 +256,8 @@ static int ux500_dma_channel_abort(struct dma_channel *channel)
         void __iomem *epio = musb->endpoints[ux500_channel->hw_ep->epnum].regs;
         u16 csr;
  
-       DBG(4, "channel=%d, is_tx=%d\n", ux500_channel->ch_num,
-                                               ux500_channel->is_tx);
+       dev_dbg(musb->controller, "channel=%d, is_tx=%d\n",
+               ux500_channel->ch_num, ux500_channel->is_tx);
  
         if (channel->status == MUSB_DMA_STATUS_BUSY) {
                 if (ux500_channel->is_tx) {
diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c

index ba79dbf5adbcc6cc5ab9080700d9b2c32b2ba775..cb2d451d511ec07bbd099c05b43c3ffa3ad0a27e 100644 (file)
--- a/drivers/usb/renesas_usbhs/mod_gadget.c
+++ b/drivers/usb/renesas_usbhs/mod_gadget.c
@@ -14,6 +14,7 @@
   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   *
   */
+#include <linux/dma-mapping.h>
  #include <linux/io.h>
  #include <linux/module.h>
  #include <linux/platform_device.h>
@@ -76,7 +77,7 @@ struct usbhsg_recip_handle {
                 struct usbhsg_gpriv, mod)
  
  #define __usbhsg_for_each_uep(start, pos, g, i)        \
-       for (i = start, pos = (g)->uep;         \
+       for (i = start, pos = (g)->uep + i;     \
              i < (g)->uep_size;                 \
              i++, pos = (g)->uep + i)
  
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c

index 2e06b90aa1f8f50139c8873011f1c34956077b63..5fc13e717911708fa3f8b1d443b190dad2ecadaf 100644 (file)
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -101,6 +101,7 @@ static int   ftdi_jtag_probe(struct usb_serial *serial);
  static int   ftdi_mtxorb_hack_setup(struct usb_serial *serial);
  static int   ftdi_NDI_device_setup(struct usb_serial *serial);
  static int   ftdi_stmclite_probe(struct usb_serial *serial);
+static int   ftdi_8u2232c_probe(struct usb_serial *serial);
  static void  ftdi_USB_UIRT_setup(struct ftdi_private *priv);
  static void  ftdi_HE_TIRA1_setup(struct ftdi_private *priv);
  
@@ -128,6 +129,10 @@ static struct ftdi_sio_quirk ftdi_stmclite_quirk = {
         .probe  = ftdi_stmclite_probe,
  };
  
+static struct ftdi_sio_quirk ftdi_8u2232c_quirk = {
+       .probe  = ftdi_8u2232c_probe,
+};
+
  /*
   * The 8U232AM has the same API as the sio except for:
   * - it can support MUCH higher baudrates; up to:
@@ -151,6 +156,7 @@ static struct ftdi_sio_quirk ftdi_stmclite_quirk = {
   * /sys/bus/usb/ftdi_sio/new_id, then send patch/report!
   */
  static struct usb_device_id id_table_combined [] = {
+       { USB_DEVICE(FTDI_VID, FTDI_ZEITCONTROL_TAGTRACE_MIFARE_PID) },
         { USB_DEVICE(FTDI_VID, FTDI_CTI_MINI_PID) },
         { USB_DEVICE(FTDI_VID, FTDI_CTI_NANO_PID) },
         { USB_DEVICE(FTDI_VID, FTDI_AMC232_PID) },
@@ -177,7 +183,8 @@ static struct usb_device_id id_table_combined [] = {
         { USB_DEVICE(FTDI_VID, FTDI_8U232AM_PID) },
         { USB_DEVICE(FTDI_VID, FTDI_8U232AM_ALT_PID) },
         { USB_DEVICE(FTDI_VID, FTDI_232RL_PID) },
-       { USB_DEVICE(FTDI_VID, FTDI_8U2232C_PID) },
+       { USB_DEVICE(FTDI_VID, FTDI_8U2232C_PID) ,
+               .driver_info = (kernel_ulong_t)&ftdi_8u2232c_quirk },
         { USB_DEVICE(FTDI_VID, FTDI_4232H_PID) },
         { USB_DEVICE(FTDI_VID, FTDI_232H_PID) },
         { USB_DEVICE(FTDI_VID, FTDI_MICRO_CHAMELEON_PID) },
@@ -1171,7 +1178,7 @@ static __u32 get_ftdi_divisor(struct tty_struct *tty,
         case FT2232H: /* FT2232H chip */
         case FT4232H: /* FT4232H chip */
         case FT232H:  /* FT232H chip */
-               if ((baud <= 12000000) & (baud >= 1200)) {
+               if ((baud <= 12000000) && (baud >= 1200)) {
                         div_value = ftdi_2232h_baud_to_divisor(baud);
                 } else if (baud < 1200) {
                         div_value = ftdi_232bm_baud_to_divisor(baud);
@@ -1205,7 +1212,10 @@ static int change_speed(struct tty_struct *tty, struct usb_serial_port *port)
         urb_index_value = get_ftdi_divisor(tty, port);
         urb_value = (__u16)urb_index_value;
         urb_index = (__u16)(urb_index_value >> 16);
-       if (priv->interface) {  /* FT2232C */
+       if ((priv->chip_type == FT2232C) || (priv->chip_type == FT2232H) ||
+               (priv->chip_type == FT4232H) || (priv->chip_type == FT232H)) {
+               /* Probably the BM type needs the MSB of the encoded fractional
+                * divider also moved like for the chips above. Any infos? */
                 urb_index = (__u16)((urb_index << 8) | priv->interface);
         }
  
@@ -1733,6 +1743,18 @@ static int ftdi_jtag_probe(struct usb_serial *serial)
         return 0;
  }
  
+static int ftdi_8u2232c_probe(struct usb_serial *serial)
+{
+       struct usb_device *udev = serial->dev;
+
+       dbg("%s", __func__);
+
+       if (strcmp(udev->manufacturer, "CALAO Systems") == 0)
+               return ftdi_jtag_probe(serial);
+
+       return 0;
+}
+
  /*
   * First and second port on STMCLiteadaptors is reserved for JTAG interface
   * and the forth port for pio
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h

index 19156d1049fe22134e745158a6dc64da1a9d0a1b..bf5227ad3ef7b7c55dfe00cb2ea5d5e5911f44ff 100644 (file)
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -1159,4 +1159,8 @@
  /* USB-Nano-485*/
  #define FTDI_CTI_NANO_PID      0xF60B
  
-
+/*
+ * ZeitControl cardsystems GmbH rfid-readers http://zeitconrol.de
+ */
+/* TagTracer MIFARE*/
+#define FTDI_ZEITCONTROL_TAGTRACE_MIFARE_PID   0xF7C0
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c

index 60b25d8ea0e2c14376ee4fad71d21a680f0502f7..fe22e90bc879551a912ebcbcbf974a83c4ae0cb9 100644 (file)
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -148,6 +148,12 @@ static void option_instat_callback(struct urb *urb);
  #define HUAWEI_PRODUCT_K4505                   0x1464
  #define HUAWEI_PRODUCT_K3765                   0x1465
  #define HUAWEI_PRODUCT_E14AC                   0x14AC
+#define HUAWEI_PRODUCT_K3806                   0x14AE
+#define HUAWEI_PRODUCT_K4605                   0x14C6
+#define HUAWEI_PRODUCT_K3770                   0x14C9
+#define HUAWEI_PRODUCT_K3771                   0x14CA
+#define HUAWEI_PRODUCT_K4510                   0x14CB
+#define HUAWEI_PRODUCT_K4511                   0x14CC
  #define HUAWEI_PRODUCT_ETS1220                 0x1803
  #define HUAWEI_PRODUCT_E353                    0x1506
  
@@ -412,6 +418,56 @@ static void option_instat_callback(struct urb *urb);
  #define SAMSUNG_VENDOR_ID                       0x04e8
  #define SAMSUNG_PRODUCT_GT_B3730                0x6889
  
+/* YUGA products  www.yuga-info.com*/
+#define YUGA_VENDOR_ID                         0x257A
+#define YUGA_PRODUCT_CEM600                    0x1601
+#define YUGA_PRODUCT_CEM610                    0x1602
+#define YUGA_PRODUCT_CEM500                    0x1603
+#define YUGA_PRODUCT_CEM510                    0x1604
+#define YUGA_PRODUCT_CEM800                    0x1605
+#define YUGA_PRODUCT_CEM900                    0x1606
+
+#define YUGA_PRODUCT_CEU818                    0x1607
+#define YUGA_PRODUCT_CEU816                    0x1608
+#define YUGA_PRODUCT_CEU828                    0x1609
+#define YUGA_PRODUCT_CEU826                    0x160A
+#define YUGA_PRODUCT_CEU518                    0x160B
+#define YUGA_PRODUCT_CEU516                    0x160C
+#define YUGA_PRODUCT_CEU528                    0x160D
+#define YUGA_PRODUCT_CEU526                    0x160F
+
+#define YUGA_PRODUCT_CWM600                    0x2601
+#define YUGA_PRODUCT_CWM610                    0x2602
+#define YUGA_PRODUCT_CWM500                    0x2603
+#define YUGA_PRODUCT_CWM510                    0x2604
+#define YUGA_PRODUCT_CWM800                    0x2605
+#define YUGA_PRODUCT_CWM900                    0x2606
+
+#define YUGA_PRODUCT_CWU718                    0x2607
+#define YUGA_PRODUCT_CWU716                    0x2608
+#define YUGA_PRODUCT_CWU728                    0x2609
+#define YUGA_PRODUCT_CWU726                    0x260A
+#define YUGA_PRODUCT_CWU518                    0x260B
+#define YUGA_PRODUCT_CWU516                    0x260C
+#define YUGA_PRODUCT_CWU528                    0x260D
+#define YUGA_PRODUCT_CWU526                    0x260F
+
+#define YUGA_PRODUCT_CLM600                    0x2601
+#define YUGA_PRODUCT_CLM610                    0x2602
+#define YUGA_PRODUCT_CLM500                    0x2603
+#define YUGA_PRODUCT_CLM510                    0x2604
+#define YUGA_PRODUCT_CLM800                    0x2605
+#define YUGA_PRODUCT_CLM900                    0x2606
+
+#define YUGA_PRODUCT_CLU718                    0x2607
+#define YUGA_PRODUCT_CLU716                    0x2608
+#define YUGA_PRODUCT_CLU728                    0x2609
+#define YUGA_PRODUCT_CLU726                    0x260A
+#define YUGA_PRODUCT_CLU518                    0x260B
+#define YUGA_PRODUCT_CLU516                    0x260C
+#define YUGA_PRODUCT_CLU528                    0x260D
+#define YUGA_PRODUCT_CLU526                    0x260F
+
  /* some devices interfaces need special handling due to a number of reasons */
  enum option_blacklist_reason {
                 OPTION_BLACKLIST_NONE = 0,
@@ -547,6 +603,16 @@ static const struct usb_device_id option_ids[] = {
         { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3765, 0xff, 0xff, 0xff) },
         { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_ETS1220, 0xff, 0xff, 0xff) },
         { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E14AC, 0xff, 0xff, 0xff) },
+       { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3806, 0xff, 0xff, 0xff) },
+       { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4605, 0xff, 0xff, 0xff) },
+       { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3770, 0xff, 0x02, 0x31) },
+       { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3770, 0xff, 0x02, 0x32) },
+       { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3771, 0xff, 0x02, 0x31) },
+       { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3771, 0xff, 0x02, 0x32) },
+       { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4510, 0xff, 0x01, 0x31) },
+       { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4510, 0xff, 0x01, 0x32) },
+       { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4511, 0xff, 0x01, 0x31) },
+       { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4511, 0xff, 0x01, 0x32) },
         { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E353, 0xff, 0x01, 0x01) },
         { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V640) },
         { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V620) },
@@ -993,6 +1059,48 @@ static const struct usb_device_id option_ids[] = {
         { USB_DEVICE(CELOT_VENDOR_ID, CELOT_PRODUCT_CT680M) }, /* CT-650 CDMA 450 1xEVDO modem */
         { USB_DEVICE(ONDA_VENDOR_ID, ONDA_MT825UP) }, /* ONDA MT825UP modem */
         { USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, SAMSUNG_PRODUCT_GT_B3730, USB_CLASS_CDC_DATA, 0x00, 0x00) }, /* Samsung GT-B3730 LTE USB modem.*/
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM600) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM610) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM500) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM510) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM800) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM900) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU818) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU816) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU828) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU826) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU518) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU516) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU528) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU526) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM600) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM610) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM500) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM510) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM800) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM900) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU718) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU716) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU728) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU726) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU518) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU516) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU528) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU526) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM600) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM610) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM500) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM510) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM800) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM900) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU718) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU716) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU728) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU726) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU518) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU516) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU528) },
+       { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU526) },
         { } /* Terminating entry */
  };
  MODULE_DEVICE_TABLE(usb, option_ids);
@@ -1122,11 +1230,13 @@ static int option_probe(struct usb_serial *serial,
                 serial->interface->cur_altsetting->desc.bInterfaceClass != 0xff)
                 return -ENODEV;
  
-       /* Don't bind network interfaces on Huawei K3765 & K4505 */
+       /* Don't bind network interfaces on Huawei K3765, K4505 & K4605 */
         if (serial->dev->descriptor.idVendor == HUAWEI_VENDOR_ID &&
                 (serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K3765 ||
-                       serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K4505) &&
-               serial->interface->cur_altsetting->desc.bInterfaceNumber == 1)
+                       serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K4505 ||
+                       serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K4605) &&
+               (serial->interface->cur_altsetting->desc.bInterfaceNumber == 1 ||
+                       serial->interface->cur_altsetting->desc.bInterfaceNumber == 2))
                 return -ENODEV;
  
         /* Don't bind network interface on Samsung GT-B3730, it is handled by a separate module */
diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c

index 54a9dab1f33b4e4d335201d04572499ab701c424..aeccc7f0a93c5db645fa0442b433ba2291265af2 100644 (file)
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -45,6 +45,7 @@ static const struct usb_device_id id_table[] = {
         {USB_DEVICE(0x05c6, 0x9203)},   /* Generic Gobi Modem device */
         {USB_DEVICE(0x05c6, 0x9222)},   /* Generic Gobi Modem device */
         {USB_DEVICE(0x05c6, 0x9008)},   /* Generic Gobi QDL device */
+       {USB_DEVICE(0x05c6, 0x9009)},   /* Generic Gobi Modem device */
         {USB_DEVICE(0x05c6, 0x9201)},   /* Generic Gobi QDL device */
         {USB_DEVICE(0x05c6, 0x9221)},   /* Generic Gobi QDL device */
         {USB_DEVICE(0x05c6, 0x9231)},   /* Generic Gobi QDL device */
@@ -78,6 +79,7 @@ static const struct usb_device_id id_table[] = {
         {USB_DEVICE(0x1199, 0x9008)},   /* Sierra Wireless Gobi 2000 Modem device (VT773) */
         {USB_DEVICE(0x1199, 0x9009)},   /* Sierra Wireless Gobi 2000 Modem device (VT773) */
         {USB_DEVICE(0x1199, 0x900a)},   /* Sierra Wireless Gobi 2000 Modem device (VT773) */
+       {USB_DEVICE(0x1199, 0x9011)},   /* Sierra Wireless Gobi 2000 Modem device (MC8305) */
         {USB_DEVICE(0x16d8, 0x8001)},   /* CMDTech Gobi 2000 QDL device (VU922) */
         {USB_DEVICE(0x16d8, 0x8002)},   /* CMDTech Gobi 2000 Modem device (VU922) */
         {USB_DEVICE(0x05c6, 0x9204)},   /* Gobi 2000 QDL device */
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h

index ccff3483eebc87d0c82a562cbffa3f5ab36ab878..3041a974faf39278ef8fad4033e089f8e64b7b9c 100644 (file)
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -1988,6 +1988,16 @@ UNUSUAL_DEV(  0x4146, 0xba01, 0x0100, 0x0100,
                 "Micro Mini 1GB",
                 USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NOT_LOCKABLE ),
  
+/*
+ * Nick Bowler <nbowler@elliptictech.com>
+ * SCSI stack spams (otherwise harmless) error messages.
+ */
+UNUSUAL_DEV(  0xc251, 0x4003, 0x0100, 0x0100,
+               "Keil Software, Inc.",
+               "V2M MotherBoard",
+               USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+               US_FL_NOT_LOCKABLE),
+
  /* Reported by Andrew Simmons <andrew.simmons@gmail.com> */
  UNUSUAL_DEV(  0xed06, 0x4500, 0x0001, 0x0001,
                 "DataStor",
diff --git a/drivers/video/backlight/adp8870_bl.c b/drivers/video/backlight/adp8870_bl.c

index 05a8832bb3eb360d2d835e5281323d978d078500..d06886a2bfb564a4e1651083835037d79dbca8f1 100644 (file)
--- a/drivers/video/backlight/adp8870_bl.c
+++ b/drivers/video/backlight/adp8870_bl.c
@@ -1009,4 +1009,4 @@ module_exit(adp8870_exit);
  MODULE_LICENSE("GPL v2");
  MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
  MODULE_DESCRIPTION("ADP8870 Backlight driver");
-MODULE_ALIAS("platform:adp8870-backlight");
+MODULE_ALIAS("i2c:adp8870-backlight");
diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c

index 80d292fb92d8a3c10825130867dbdc55b586f939..7363c1b169e8f5719e5ba9bc1dee3542de468169 100644 (file)
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -19,7 +19,7 @@
  #include <asm/backlight.h>
  #endif
  
-static const char const *backlight_types[] = {
+static const char *const backlight_types[] = {
         [BACKLIGHT_RAW] = "raw",
         [BACKLIGHT_PLATFORM] = "platform",
         [BACKLIGHT_FIRMWARE] = "firmware",
diff --git a/drivers/video/backlight/ep93xx_bl.c b/drivers/video/backlight/ep93xx_bl.c

index 9f1e389d51d2f8fac9337bf6be305705b2fd6b07..b0582917f0c8f71e879ed1be66e84897fb00ca38 100644 (file)
--- a/drivers/video/backlight/ep93xx_bl.c
+++ b/drivers/video/backlight/ep93xx_bl.c
@@ -11,7 +11,7 @@
   * BRIGHT, on the Cirrus EP9307, EP9312, and EP9315 processors.
   */
  
-
+#include <linux/module.h>
  #include <linux/platform_device.h>
  #include <linux/io.h>
  #include <linux/fb.h>
diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c

index b8f38ec6eb1898ab0b60d606ec1f3998e11fe339..8b5b2a4124c7980129be8146f881772f5c33fa97 100644 (file)
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c
@@ -28,6 +28,8 @@ struct pwm_bl_data {
         unsigned int            lth_brightness;
         int                     (*notify)(struct device *,
                                           int brightness);
+       void                    (*notify_after)(struct device *,
+                                       int brightness);
         int                     (*check_fb)(struct device *, struct fb_info *);
  };
  
@@ -55,6 +57,10 @@ static int pwm_backlight_update_status(struct backlight_device *bl)
                 pwm_config(pb->pwm, brightness, pb->period);
                 pwm_enable(pb->pwm);
         }
+
+       if (pb->notify_after)
+               pb->notify_after(pb->dev, brightness);
+
         return 0;
  }
  
@@ -105,6 +111,7 @@ static int pwm_backlight_probe(struct platform_device *pdev)
  
         pb->period = data->pwm_period_ns;
         pb->notify = data->notify;
+       pb->notify_after = data->notify_after;
         pb->check_fb = data->check_fb;
         pb->lth_brightness = data->lth_brightness *
                 (data->pwm_period_ns / data->max_brightness);
@@ -172,6 +179,8 @@ static int pwm_backlight_suspend(struct platform_device *pdev,
                 pb->notify(pb->dev, 0);
         pwm_config(pb->pwm, 0, pb->period);
         pwm_disable(pb->pwm);
+       if (pb->notify_after)
+               pb->notify_after(pb->dev, 0);
         return 0;
  }
  
diff --git a/drivers/w1/masters/ds2490.c b/drivers/w1/masters/ds2490.c

index 02bf7bf7160bcbb79e2d94b36e4fa32022ff3be0..b5abaae38e97702c6a54c3c6a95b4d2149f7fbeb 100644 (file)
--- a/drivers/w1/masters/ds2490.c
+++ b/drivers/w1/masters/ds2490.c
@@ -1,7 +1,7 @@
  /*
   *     dscore.c
   *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
   *
   *
   * This program is free software; you can redistribute it and/or modify
@@ -1024,5 +1024,5 @@ module_init(ds_init);
  module_exit(ds_fini);
  
  MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
  MODULE_DESCRIPTION("DS2490 USB <-> W1 bus master driver (DS9490*)");
diff --git a/drivers/w1/masters/matrox_w1.c b/drivers/w1/masters/matrox_w1.c

index 334d1ccf9c922618da2b5addab4bd46015263037..f667c26b219571e4409b167985605d79c56d9e8b 100644 (file)
--- a/drivers/w1/masters/matrox_w1.c
+++ b/drivers/w1/masters/matrox_w1.c
@@ -1,7 +1,7 @@
  /*
   *     matrox_w1.c
   *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
   *
   *
   * This program is free software; you can redistribute it and/or modify
@@ -39,7 +39,7 @@
  #include "../w1_log.h"
  
  MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
  MODULE_DESCRIPTION("Driver for transport(Dallas 1-wire prtocol) over VGA DDC(matrox gpio).");
  
  static struct pci_device_id matrox_w1_tbl[] = {
diff --git a/drivers/w1/slaves/w1_ds2408.c b/drivers/w1/slaves/w1_ds2408.c

index c37781899d90369b9bae86e58af2910ab4243666..7c8cdb8aed26048895465b72c9df1bdc8fccd887 100644 (file)
--- a/drivers/w1/slaves/w1_ds2408.c
+++ b/drivers/w1/slaves/w1_ds2408.c
@@ -373,7 +373,7 @@ static int w1_f29_add_slave(struct w1_slave *sl)
  static void w1_f29_remove_slave(struct w1_slave *sl)
  {
         int i;
-       for (i = NB_SYSFS_BIN_FILES; i <= 0; --i)
+       for (i = NB_SYSFS_BIN_FILES - 1; i >= 0; --i)
                 sysfs_remove_bin_file(&sl->dev.kobj,
                         &(w1_f29_sysfs_bin_files[i]));
  }
diff --git a/drivers/w1/slaves/w1_smem.c b/drivers/w1/slaves/w1_smem.c

index cc8c02e92593f813c8c94961f213ebb288427c9b..84655625c8705108df61d703ff5356e764c1d536 100644 (file)
--- a/drivers/w1/slaves/w1_smem.c
+++ b/drivers/w1/slaves/w1_smem.c
@@ -1,7 +1,7 @@
  /*
   *     w1_smem.c
   *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
   *
   *
   * This program is free software; you can redistribute it and/or modify
@@ -32,7 +32,7 @@
  #include "../w1_family.h"
  
  MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
  MODULE_DESCRIPTION("Driver for 1-wire Dallas network protocol, 64bit memory family.");
  
  static struct w1_family w1_smem_family_01 = {
diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c

index 402928b135d19c062a38a9008b0587a144b091d2..a1ef9b5b38cfdd2cd0f2e5322a78fb445d5f1df4 100644 (file)
--- a/drivers/w1/slaves/w1_therm.c
+++ b/drivers/w1/slaves/w1_therm.c
@@ -1,7 +1,7 @@
  /*
   *     w1_therm.c
   *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
   *
   *
   * This program is free software; you can redistribute it and/or modify
@@ -34,7 +34,7 @@
  #include "../w1_family.h"
  
  MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
  MODULE_DESCRIPTION("Driver for 1-wire Dallas network protocol, temperature family.");
  
  /* Allow the strong pullup to be disabled, but default to enabled.
diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c

index 6c136c19e982ad98246511cb019d5680b8889dfb..c374978238515cd6a0e23b464076be91a9fcb744 100644 (file)
--- a/drivers/w1/w1.c
+++ b/drivers/w1/w1.c
@@ -1,7 +1,7 @@
  /*
   *     w1.c
   *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
   *
   *
   * This program is free software; you can redistribute it and/or modify
@@ -42,7 +42,7 @@
  #include "w1_netlink.h"
  
  MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
  MODULE_DESCRIPTION("Driver for 1-wire Dallas network protocol.");
  
  static int w1_timeout = 10;
diff --git a/drivers/w1/w1.h b/drivers/w1/w1.h

index 1ce23fc6186c36def2f0ed2608fb0b010ab32501..4d012ca3f32c42a27c2ccd54b9b506907abd597e 100644 (file)
--- a/drivers/w1/w1.h
+++ b/drivers/w1/w1.h
@@ -1,7 +1,7 @@
  /*
   *     w1.h
   *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
   *
   *
   * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/w1/w1_family.c b/drivers/w1/w1_family.c

index 4a099041f28a8a80a73b0d4d63da6be496efbdc8..63359797c8b199abcf62d09122f37981bca7cc41 100644 (file)
--- a/drivers/w1/w1_family.c
+++ b/drivers/w1/w1_family.c
@@ -1,7 +1,7 @@
  /*
   *     w1_family.c
   *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
   *
   *
   * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/w1/w1_family.h b/drivers/w1/w1_family.h

index 98a1ac0f4693a634898c22d00a9cf3eb89219b57..490cda2281bc924d8f7de8c105bd4cad1abdef10 100644 (file)
--- a/drivers/w1/w1_family.h
+++ b/drivers/w1/w1_family.h
@@ -1,7 +1,7 @@
  /*
   *     w1_family.h
   *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
   *
   *
   * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c

index b50be3f1073d0fc92af534cc61dc757408e57a5b..d220bce2cee4d242532cb4d6b84114b0e45ea662 100644 (file)
--- a/drivers/w1/w1_int.c
+++ b/drivers/w1/w1_int.c
@@ -1,7 +1,7 @@
  /*
   *     w1_int.c
   *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
   *
   *
   * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/w1/w1_int.h b/drivers/w1/w1_int.h

index 4274082d22629192e6379059496d631ecf4458bb..2ad7d4414bed8b9052eb44aac75d43b04a9a9c81 100644 (file)
--- a/drivers/w1/w1_int.h
+++ b/drivers/w1/w1_int.h
@@ -1,7 +1,7 @@
  /*
   *     w1_int.h
   *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
   *
   *
   * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/w1/w1_io.c b/drivers/w1/w1_io.c

index 8e8b64cfafb69a417bded8e6f3f745ca3a7e44a1..765b37b62a4f608ceb062630ebae1afc6c5448fa 100644 (file)
--- a/drivers/w1/w1_io.c
+++ b/drivers/w1/w1_io.c
@@ -1,7 +1,7 @@
  /*
   *     w1_io.c
   *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
   *
   *
   * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/w1/w1_log.h b/drivers/w1/w1_log.h

index e6ab7cf08f8885d07cbfa222420d5c621c6039c3..9c7bd62e6bdc05fa5fffcc135b65d6e3c9dc3187 100644 (file)
--- a/drivers/w1/w1_log.h
+++ b/drivers/w1/w1_log.h
@@ -1,7 +1,7 @@
  /*
   *     w1_log.h
   *
- * Copyright (c) 2004 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004 Evgeniy Polyakov <zbr@ioremap.net>
   *
   *
   * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c

index 55aabd927c60557e82c01a3eb11d92cd9bcd18c1..40788c925d1c9b4d847e6572f5a8510ac402ac71 100644 (file)
--- a/drivers/w1/w1_netlink.c
+++ b/drivers/w1/w1_netlink.c
@@ -1,7 +1,7 @@
  /*
   * w1_netlink.c
   *
- * Copyright (c) 2003 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2003 Evgeniy Polyakov <zbr@ioremap.net>
   *
   *
   * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/w1/w1_netlink.h b/drivers/w1/w1_netlink.h

index 27e950f935b18040437ae3e64019dde69fc4e273..b0922dc29658a3fe1d19e8b17942e2521009beb0 100644 (file)
--- a/drivers/w1/w1_netlink.h
+++ b/drivers/w1/w1_netlink.h
@@ -1,7 +1,7 @@
  /*
   * w1_netlink.h
   *
- * Copyright (c) 2003 Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2003 Evgeniy Polyakov <zbr@ioremap.net>
   *
   *
   * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/xen/events.c b/drivers/xen/events.c

index da70f5c32eb9875d9d24eade0861d5004d918d14..7523719bf8a4aab4dadfeb063151958bff9a0d45 100644 (file)
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -54,7 +54,7 @@
   * This lock protects updates to the following mapping and reference-count
   * arrays. The lock does not need to be acquired to read the mapping tables.
   */
-static DEFINE_SPINLOCK(irq_mapping_update_lock);
+static DEFINE_MUTEX(irq_mapping_update_lock);
  
  static LIST_HEAD(xen_irq_list_head);
  
@@ -631,7 +631,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
         int irq = -1;
         struct physdev_irq irq_op;
  
-       spin_lock(&irq_mapping_update_lock);
+       mutex_lock(&irq_mapping_update_lock);
  
         irq = find_irq_by_gsi(gsi);
         if (irq != -1) {
@@ -684,7 +684,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
                                 handle_edge_irq, name);
  
  out:
-       spin_unlock(&irq_mapping_update_lock);
+       mutex_unlock(&irq_mapping_update_lock);
  
         return irq;
  }
@@ -710,7 +710,7 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
  {
         int irq, ret;
  
-       spin_lock(&irq_mapping_update_lock);
+       mutex_lock(&irq_mapping_update_lock);
  
         irq = xen_allocate_irq_dynamic();
         if (irq == -1)
@@ -724,10 +724,10 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
         if (ret < 0)
                 goto error_irq;
  out:
-       spin_unlock(&irq_mapping_update_lock);
+       mutex_unlock(&irq_mapping_update_lock);
         return irq;
  error_irq:
-       spin_unlock(&irq_mapping_update_lock);
+       mutex_unlock(&irq_mapping_update_lock);
         xen_free_irq(irq);
         return -1;
  }
@@ -740,7 +740,7 @@ int xen_destroy_irq(int irq)
         struct irq_info *info = info_for_irq(irq);
         int rc = -ENOENT;
  
-       spin_lock(&irq_mapping_update_lock);
+       mutex_lock(&irq_mapping_update_lock);
  
         desc = irq_to_desc(irq);
         if (!desc)
@@ -766,7 +766,7 @@ int xen_destroy_irq(int irq)
         xen_free_irq(irq);
  
  out:
-       spin_unlock(&irq_mapping_update_lock);
+       mutex_unlock(&irq_mapping_update_lock);
         return rc;
  }
  
@@ -776,7 +776,7 @@ int xen_irq_from_pirq(unsigned pirq)
  
         struct irq_info *info;
  
-       spin_lock(&irq_mapping_update_lock);
+       mutex_lock(&irq_mapping_update_lock);
  
         list_for_each_entry(info, &xen_irq_list_head, list) {
                 if (info == NULL || info->type != IRQT_PIRQ)
@@ -787,7 +787,7 @@ int xen_irq_from_pirq(unsigned pirq)
         }
         irq = -1;
  out:
-       spin_unlock(&irq_mapping_update_lock);
+       mutex_unlock(&irq_mapping_update_lock);
  
         return irq;
  }
@@ -802,7 +802,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
  {
         int irq;
  
-       spin_lock(&irq_mapping_update_lock);
+       mutex_lock(&irq_mapping_update_lock);
  
         irq = evtchn_to_irq[evtchn];
  
@@ -818,7 +818,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
         }
  
  out:
-       spin_unlock(&irq_mapping_update_lock);
+       mutex_unlock(&irq_mapping_update_lock);
  
         return irq;
  }
@@ -829,7 +829,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
         struct evtchn_bind_ipi bind_ipi;
         int evtchn, irq;
  
-       spin_lock(&irq_mapping_update_lock);
+       mutex_lock(&irq_mapping_update_lock);
  
         irq = per_cpu(ipi_to_irq, cpu)[ipi];
  
@@ -853,7 +853,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
         }
  
   out:
-       spin_unlock(&irq_mapping_update_lock);
+       mutex_unlock(&irq_mapping_update_lock);
         return irq;
  }
  
@@ -878,7 +878,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
         struct evtchn_bind_virq bind_virq;
         int evtchn, irq;
  
-       spin_lock(&irq_mapping_update_lock);
+       mutex_lock(&irq_mapping_update_lock);
  
         irq = per_cpu(virq_to_irq, cpu)[virq];
  
@@ -903,7 +903,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
         }
  
  out:
-       spin_unlock(&irq_mapping_update_lock);
+       mutex_unlock(&irq_mapping_update_lock);
  
         return irq;
  }
@@ -913,7 +913,7 @@ static void unbind_from_irq(unsigned int irq)
         struct evtchn_close close;
         int evtchn = evtchn_from_irq(irq);
  
-       spin_lock(&irq_mapping_update_lock);
+       mutex_lock(&irq_mapping_update_lock);
  
         if (VALID_EVTCHN(evtchn)) {
                 close.port = evtchn;
@@ -943,7 +943,7 @@ static void unbind_from_irq(unsigned int irq)
  
         xen_free_irq(irq);
  
-       spin_unlock(&irq_mapping_update_lock);
+       mutex_unlock(&irq_mapping_update_lock);
  }
  
  int bind_evtchn_to_irqhandler(unsigned int evtchn,
@@ -1279,7 +1279,7 @@ void rebind_evtchn_irq(int evtchn, int irq)
            will also be masked. */
         disable_irq(irq);
  
-       spin_lock(&irq_mapping_update_lock);
+       mutex_lock(&irq_mapping_update_lock);
  
         /* After resume the irq<->evtchn mappings are all cleared out */
         BUG_ON(evtchn_to_irq[evtchn] != -1);
@@ -1289,7 +1289,7 @@ void rebind_evtchn_irq(int evtchn, int irq)
  
         xen_irq_info_evtchn_init(irq, evtchn);
  
-       spin_unlock(&irq_mapping_update_lock);
+       mutex_unlock(&irq_mapping_update_lock);
  
         /* new event channels are always bound to cpu 0 */
         irq_set_affinity(irq, cpumask_of(0));
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c

index 1b4afd81f872306bb4f5ffd9b860fb5f3c0adc6f..6ea852e251629518cc8d808a087bbc9d4c87fb31 100644 (file)
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -70,6 +70,7 @@
  #include <linux/kernel.h>
  #include <linux/mm.h>
  #include <linux/mman.h>
+#include <linux/module.h>
  #include <linux/workqueue.h>
  #include <xen/balloon.h>
  #include <xen/tmem.h>
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h

index 46ce357ca1abd46d5bff175aa5518b9e1f7f778f..410ffd6ceb5fb10961d9230e056cd2fe87bef148 100644 (file)
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -54,9 +54,9 @@ extern struct kmem_cache *v9fs_inode_cache;
  
  struct inode *v9fs_alloc_inode(struct super_block *sb);
  void v9fs_destroy_inode(struct inode *inode);
-struct inode *v9fs_get_inode(struct super_block *sb, int mode);
+struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t);
  int v9fs_init_inode(struct v9fs_session_info *v9ses,
-                   struct inode *inode, int mode);
+                   struct inode *inode, int mode, dev_t);
  void v9fs_evict_inode(struct inode *inode);
  ino_t v9fs_qid2ino(struct p9_qid *qid);
  void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
@@ -83,4 +83,6 @@ static inline void v9fs_invalidate_inode_attr(struct inode *inode)
         v9inode->cache_validity |= V9FS_INO_INVALID_ATTR;
         return;
  }
+
+int v9fs_open_to_dotl_flags(int flags);
  #endif
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c

index 3c173fcc2c5a0902be016dd20e6a48411f5d3a55..62857a810a79d00332a150c6a02a433e638d72b8 100644 (file)
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -65,7 +65,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
         v9inode = V9FS_I(inode);
         v9ses = v9fs_inode2v9ses(inode);
         if (v9fs_proto_dotl(v9ses))
-               omode = file->f_flags;
+               omode = v9fs_open_to_dotl_flags(file->f_flags);
         else
                 omode = v9fs_uflags2omode(file->f_flags,
                                         v9fs_proto_dotu(v9ses));
@@ -169,7 +169,18 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
  
         /* convert posix lock to p9 tlock args */
         memset(&flock, 0, sizeof(flock));
-       flock.type = fl->fl_type;
+       /* map the lock type */
+       switch (fl->fl_type) {
+       case F_RDLCK:
+               flock.type = P9_LOCK_TYPE_RDLCK;
+               break;
+       case F_WRLCK:
+               flock.type = P9_LOCK_TYPE_WRLCK;
+               break;
+       case F_UNLCK:
+               flock.type = P9_LOCK_TYPE_UNLCK;
+               break;
+       }
         flock.start = fl->fl_start;
         if (fl->fl_end == OFFSET_MAX)
                 flock.length = 0;
@@ -245,7 +256,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
  
         /* convert posix lock to p9 tgetlock args */
         memset(&glock, 0, sizeof(glock));
-       glock.type = fl->fl_type;
+       glock.type  = P9_LOCK_TYPE_UNLCK;
         glock.start = fl->fl_start;
         if (fl->fl_end == OFFSET_MAX)
                 glock.length = 0;
@@ -257,17 +268,26 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
         res = p9_client_getlock_dotl(fid, &glock);
         if (res < 0)
                 return res;
-       if (glock.type != F_UNLCK) {
-               fl->fl_type = glock.type;
+       /* map 9p lock type to os lock type */
+       switch (glock.type) {
+       case P9_LOCK_TYPE_RDLCK:
+               fl->fl_type = F_RDLCK;
+               break;
+       case P9_LOCK_TYPE_WRLCK:
+               fl->fl_type = F_WRLCK;
+               break;
+       case P9_LOCK_TYPE_UNLCK:
+               fl->fl_type = F_UNLCK;
+               break;
+       }
+       if (glock.type != P9_LOCK_TYPE_UNLCK) {
                 fl->fl_start = glock.start;
                 if (glock.length == 0)
                         fl->fl_end = OFFSET_MAX;
                 else
                         fl->fl_end = glock.start + glock.length - 1;
                 fl->fl_pid = glock.proc_id;
-       } else
-               fl->fl_type = F_UNLCK;
-
+       }
         return res;
  }
  
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c

index 8bb5507e822f4151574f564150f48acd4531f002..e3c03db3c788149b83cd629468d782783f99e15b 100644 (file)
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -95,15 +95,18 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
  /**
   * p9mode2unixmode- convert plan9 mode bits to unix mode bits
   * @v9ses: v9fs session information
- * @mode: mode to convert
+ * @stat: p9_wstat from which mode need to be derived
+ * @rdev: major number, minor number in case of device files.
   *
   */
-
-static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
+static int p9mode2unixmode(struct v9fs_session_info *v9ses,
+                          struct p9_wstat *stat, dev_t *rdev)
  {
         int res;
+       int mode = stat->mode;
  
-       res = mode & 0777;
+       res = mode & S_IALLUGO;
+       *rdev = 0;
  
         if ((mode & P9_DMDIR) == P9_DMDIR)
                 res |= S_IFDIR;
@@ -116,9 +119,26 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
                  && (v9ses->nodev == 0))
                 res |= S_IFIFO;
         else if ((mode & P9_DMDEVICE) && (v9fs_proto_dotu(v9ses))
-                && (v9ses->nodev == 0))
-               res |= S_IFBLK;
-       else
+                && (v9ses->nodev == 0)) {
+               char type = 0, ext[32];
+               int major = -1, minor = -1;
+
+               strncpy(ext, stat->extension, sizeof(ext));
+               sscanf(ext, "%c %u %u", &type, &major, &minor);
+               switch (type) {
+               case 'c':
+                       res |= S_IFCHR;
+                       break;
+               case 'b':
+                       res |= S_IFBLK;
+                       break;
+               default:
+                       P9_DPRINTK(P9_DEBUG_ERROR,
+                               "Unknown special type %c %s\n", type,
+                               stat->extension);
+               };
+               *rdev = MKDEV(major, minor);
+       } else
                 res |= S_IFREG;
  
         if (v9fs_proto_dotu(v9ses)) {
@@ -131,7 +151,6 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
                 if ((mode & P9_DMSETVTX) == P9_DMSETVTX)
                         res |= S_ISVTX;
         }
-
         return res;
  }
  
@@ -242,13 +261,13 @@ void v9fs_destroy_inode(struct inode *inode)
  }
  
  int v9fs_init_inode(struct v9fs_session_info *v9ses,
-                   struct inode *inode, int mode)
+                   struct inode *inode, int mode, dev_t rdev)
  {
         int err = 0;
  
         inode_init_owner(inode, NULL, mode);
         inode->i_blocks = 0;
-       inode->i_rdev = 0;
+       inode->i_rdev = rdev;
         inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
         inode->i_mapping->a_ops = &v9fs_addr_operations;
  
@@ -335,7 +354,7 @@ error:
   *
   */
  
-struct inode *v9fs_get_inode(struct super_block *sb, int mode)
+struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t rdev)
  {
         int err;
         struct inode *inode;
@@ -348,7 +367,7 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
                 P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
                 return ERR_PTR(-ENOMEM);
         }
-       err = v9fs_init_inode(v9ses, inode, mode);
+       err = v9fs_init_inode(v9ses, inode, mode, rdev);
         if (err) {
                 iput(inode);
                 return ERR_PTR(err);
@@ -435,11 +454,12 @@ void v9fs_evict_inode(struct inode *inode)
  static int v9fs_test_inode(struct inode *inode, void *data)
  {
         int umode;
+       dev_t rdev;
         struct v9fs_inode *v9inode = V9FS_I(inode);
         struct p9_wstat *st = (struct p9_wstat *)data;
         struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
  
-       umode = p9mode2unixmode(v9ses, st->mode);
+       umode = p9mode2unixmode(v9ses, st, &rdev);
         /* don't match inode of different type */
         if ((inode->i_mode & S_IFMT) != (umode & S_IFMT))
                 return 0;
@@ -473,6 +493,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
                                    struct p9_wstat *st,
                                    int new)
  {
+       dev_t rdev;
         int retval, umode;
         unsigned long i_ino;
         struct inode *inode;
@@ -496,8 +517,8 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
          * later.
          */
         inode->i_ino = i_ino;
-       umode = p9mode2unixmode(v9ses, st->mode);
-       retval = v9fs_init_inode(v9ses, inode, umode);
+       umode = p9mode2unixmode(v9ses, st, &rdev);
+       retval = v9fs_init_inode(v9ses, inode, umode, rdev);
         if (retval)
                 goto error;
  
@@ -531,6 +552,19 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
         return inode;
  }
  
+/**
+ * v9fs_at_to_dotl_flags- convert Linux specific AT flags to
+ * plan 9 AT flag.
+ * @flags: flags to convert
+ */
+static int v9fs_at_to_dotl_flags(int flags)
+{
+       int rflags = 0;
+       if (flags & AT_REMOVEDIR)
+               rflags |= P9_DOTL_AT_REMOVEDIR;
+       return rflags;
+}
+
  /**
   * v9fs_remove - helper function to remove files and directories
   * @dir: directory inode that is being deleted
@@ -558,7 +592,8 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags)
                 return retval;
         }
         if (v9fs_proto_dotl(v9ses))
-               retval = p9_client_unlinkat(dfid, dentry->d_name.name, flags);
+               retval = p9_client_unlinkat(dfid, dentry->d_name.name,
+                                           v9fs_at_to_dotl_flags(flags));
         if (retval == -EOPNOTSUPP) {
                 /* Try the one based on path */
                 v9fid = v9fs_fid_clone(dentry);
@@ -645,13 +680,11 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
                 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
                 goto error;
         }
-       d_instantiate(dentry, inode);
         err = v9fs_fid_add(dentry, fid);
         if (err < 0)
                 goto error;
-
+       d_instantiate(dentry, inode);
         return ofid;
-
  error:
         if (ofid)
                 p9_client_clunk(ofid);
@@ -792,6 +825,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
  struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
                                       struct nameidata *nameidata)
  {
+       struct dentry *res;
         struct super_block *sb;
         struct v9fs_session_info *v9ses;
         struct p9_fid *dfid, *fid;
@@ -823,22 +857,35 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
  
                 return ERR_PTR(result);
         }
-
-       inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
+       /*
+        * Make sure we don't use a wrong inode due to parallel
+        * unlink. For cached mode create calls request for new
+        * inode. But with cache disabled, lookup should do this.
+        */
+       if (v9ses->cache)
+               inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
+       else
+               inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
         if (IS_ERR(inode)) {
                 result = PTR_ERR(inode);
                 inode = NULL;
                 goto error;
         }
-
         result = v9fs_fid_add(dentry, fid);
         if (result < 0)
                 goto error_iput;
-
  inst_out:
-       d_add(dentry, inode);
-       return NULL;
-
+       /*
+        * If we had a rename on the server and a parallel lookup
+        * for the new name, then make sure we instantiate with
+        * the new name. ie look up for a/b, while on server somebody
+        * moved b under k and client parallely did a lookup for
+        * k/b.
+        */
+       res = d_materialise_unique(dentry, inode);
+       if (!IS_ERR(res))
+               return res;
+       result = PTR_ERR(res);
  error_iput:
         iput(inode);
  error:
@@ -1002,7 +1049,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
                 return PTR_ERR(st);
  
         v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb);
-               generic_fillattr(dentry->d_inode, stat);
+       generic_fillattr(dentry->d_inode, stat);
  
         p9stat_free(st);
         kfree(st);
@@ -1086,6 +1133,7 @@ void
  v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
         struct super_block *sb)
  {
+       mode_t mode;
         char ext[32];
         char tag_name[14];
         unsigned int i_nlink;
@@ -1121,31 +1169,9 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
                                 inode->i_nlink = i_nlink;
                 }
         }
-       inode->i_mode = p9mode2unixmode(v9ses, stat->mode);
-       if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) {
-               char type = 0;
-               int major = -1;
-               int minor = -1;
-
-               strncpy(ext, stat->extension, sizeof(ext));
-               sscanf(ext, "%c %u %u", &type, &major, &minor);
-               switch (type) {
-               case 'c':
-                       inode->i_mode &= ~S_IFBLK;
-                       inode->i_mode |= S_IFCHR;
-                       break;
-               case 'b':
-                       break;
-               default:
-                       P9_DPRINTK(P9_DEBUG_ERROR,
-                               "Unknown special type %c %s\n", type,
-                               stat->extension);
-               };
-               inode->i_rdev = MKDEV(major, minor);
-               init_special_inode(inode, inode->i_mode, inode->i_rdev);
-       } else
-               inode->i_rdev = 0;
-
+       mode = stat->mode & S_IALLUGO;
+       mode |= inode->i_mode & ~S_IALLUGO;
+       inode->i_mode = mode;
         i_size_write(inode, stat->length);
  
         /* not real number of blocks, but 512 byte ones ... */
@@ -1411,6 +1437,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
  
  int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
  {
+       int umode;
+       dev_t rdev;
         loff_t i_size;
         struct p9_wstat *st;
         struct v9fs_session_info *v9ses;
@@ -1419,6 +1447,12 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
         st = p9_client_stat(fid);
         if (IS_ERR(st))
                 return PTR_ERR(st);
+       /*
+        * Don't update inode if the file type is different
+        */
+       umode = p9mode2unixmode(v9ses, st, &rdev);
+       if ((inode->i_mode & S_IFMT) != (umode & S_IFMT))
+               goto out;
  
         spin_lock(&inode->i_lock);
         /*
@@ -1430,6 +1464,7 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
         if (v9ses->cache)
                 inode->i_size = i_size;
         spin_unlock(&inode->i_lock);
+out:
         p9stat_free(st);
         kfree(st);
         return 0;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c

index b6c8ed205192e5ab34f1b8ae15323dcc01fdec34..aded79fcd5cfdadc359929f3b39fad1f92ff2001 100644 (file)
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -153,7 +153,8 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
          * later.
          */
         inode->i_ino = i_ino;
-       retval = v9fs_init_inode(v9ses, inode, st->st_mode);
+       retval = v9fs_init_inode(v9ses, inode,
+                                st->st_mode, new_decode_dev(st->st_rdev));
         if (retval)
                 goto error;
  
@@ -190,6 +191,58 @@ v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
         return inode;
  }
  
+struct dotl_openflag_map {
+       int open_flag;
+       int dotl_flag;
+};
+
+static int v9fs_mapped_dotl_flags(int flags)
+{
+       int i;
+       int rflags = 0;
+       struct dotl_openflag_map dotl_oflag_map[] = {
+               { O_CREAT,      P9_DOTL_CREATE },
+               { O_EXCL,       P9_DOTL_EXCL },
+               { O_NOCTTY,     P9_DOTL_NOCTTY },
+               { O_TRUNC,      P9_DOTL_TRUNC },
+               { O_APPEND,     P9_DOTL_APPEND },
+               { O_NONBLOCK,   P9_DOTL_NONBLOCK },
+               { O_DSYNC,      P9_DOTL_DSYNC },
+               { FASYNC,       P9_DOTL_FASYNC },
+               { O_DIRECT,     P9_DOTL_DIRECT },
+               { O_LARGEFILE,  P9_DOTL_LARGEFILE },
+               { O_DIRECTORY,  P9_DOTL_DIRECTORY },
+               { O_NOFOLLOW,   P9_DOTL_NOFOLLOW },
+               { O_NOATIME,    P9_DOTL_NOATIME },
+               { O_CLOEXEC,    P9_DOTL_CLOEXEC },
+               { O_SYNC,       P9_DOTL_SYNC},
+       };
+       for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) {
+               if (flags & dotl_oflag_map[i].open_flag)
+                       rflags |= dotl_oflag_map[i].dotl_flag;
+       }
+       return rflags;
+}
+
+/**
+ * v9fs_open_to_dotl_flags- convert Linux specific open flags to
+ * plan 9 open flag.
+ * @flags: flags to convert
+ */
+int v9fs_open_to_dotl_flags(int flags)
+{
+       int rflags = 0;
+
+       /*
+        * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY
+        * and P9_DOTL_NOACCESS
+        */
+       rflags |= flags & O_ACCMODE;
+       rflags |= v9fs_mapped_dotl_flags(flags);
+
+       return rflags;
+}
+
  /**
   * v9fs_vfs_create_dotl - VFS hook to create files for 9P2000.L protocol.
   * @dir: directory inode that is being created
@@ -258,7 +311,8 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
                            "Failed to get acl values in creat %d\n", err);
                 goto error;
         }
-       err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid);
+       err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags),
+                                   mode, gid, &qid);
         if (err < 0) {
                 P9_DPRINTK(P9_DEBUG_VFS,
                                 "p9_client_open_dotl failed in creat %d\n",
@@ -281,10 +335,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
                 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
                 goto error;
         }
-       d_instantiate(dentry, inode);
         err = v9fs_fid_add(dentry, fid);
         if (err < 0)
                 goto error;
+       d_instantiate(dentry, inode);
  
         /* Now set the ACL based on the default value */
         v9fs_set_create_acl(dentry, &dacl, &pacl);
@@ -403,10 +457,10 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
                                 err);
                         goto error;
                 }
-               d_instantiate(dentry, inode);
                 err = v9fs_fid_add(dentry, fid);
                 if (err < 0)
                         goto error;
+               d_instantiate(dentry, inode);
                 fid = NULL;
         } else {
                 /*
@@ -414,7 +468,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
                  * inode with stat. We need to get an inode
                  * so that we can set the acl with dentry
                  */
-               inode = v9fs_get_inode(dir->i_sb, mode);
+               inode = v9fs_get_inode(dir->i_sb, mode, 0);
                 if (IS_ERR(inode)) {
                         err = PTR_ERR(inode);
                         goto error;
@@ -540,6 +594,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
  void
  v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
  {
+       mode_t mode;
         struct v9fs_inode *v9inode = V9FS_I(inode);
  
         if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
@@ -552,11 +607,10 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
                 inode->i_uid = stat->st_uid;
                 inode->i_gid = stat->st_gid;
                 inode->i_nlink = stat->st_nlink;
-               inode->i_mode = stat->st_mode;
-               inode->i_rdev = new_decode_dev(stat->st_rdev);
  
-               if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode)))
-                       init_special_inode(inode, inode->i_mode, inode->i_rdev);
+               mode = stat->st_mode & S_IALLUGO;
+               mode |= inode->i_mode & ~S_IALLUGO;
+               inode->i_mode = mode;
  
                 i_size_write(inode, stat->st_size);
                 inode->i_blocks = stat->st_blocks;
@@ -657,14 +711,14 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
                                         err);
                         goto error;
                 }
-               d_instantiate(dentry, inode);
                 err = v9fs_fid_add(dentry, fid);
                 if (err < 0)
                         goto error;
+               d_instantiate(dentry, inode);
                 fid = NULL;
         } else {
                 /* Not in cached mode. No need to populate inode with stat */
-               inode = v9fs_get_inode(dir->i_sb, S_IFLNK);
+               inode = v9fs_get_inode(dir->i_sb, S_IFLNK, 0);
                 if (IS_ERR(inode)) {
                         err = PTR_ERR(inode);
                         goto error;
@@ -810,17 +864,17 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
                                 err);
                         goto error;
                 }
-               d_instantiate(dentry, inode);
                 err = v9fs_fid_add(dentry, fid);
                 if (err < 0)
                         goto error;
+               d_instantiate(dentry, inode);
                 fid = NULL;
         } else {
                 /*
                  * Not in cached mode. No need to populate inode with stat.
                  * socket syscall returns a fd, so we need instantiate
                  */
-               inode = v9fs_get_inode(dir->i_sb, mode);
+               inode = v9fs_get_inode(dir->i_sb, mode, rdev);
                 if (IS_ERR(inode)) {
                         err = PTR_ERR(inode);
                         goto error;
@@ -886,6 +940,11 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
         st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
         if (IS_ERR(st))
                 return PTR_ERR(st);
+       /*
+        * Don't update inode if the file type is different
+        */
+       if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT))
+               goto out;
  
         spin_lock(&inode->i_lock);
         /*
@@ -897,6 +956,7 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
         if (v9ses->cache)
                 inode->i_size = i_size;
         spin_unlock(&inode->i_lock);
+out:
         kfree(st);
         return 0;
  }
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c

index feef6cdc1fd22d23ba45a8f98f2f21ec0a0dc95a..c70251d47ed196c65594365091c1125f4e86d987 100644 (file)
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -149,7 +149,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
         else
                 sb->s_d_op = &v9fs_dentry_operations;
  
-       inode = v9fs_get_inode(sb, S_IFDIR | mode);
+       inode = v9fs_get_inode(sb, S_IFDIR | mode, 0);
         if (IS_ERR(inode)) {
                 retval = PTR_ERR(inode);
                 goto release_sb;
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h

index 475f9c597cb7c0dd86bce6a0c42602377d33fc14..326dc08d3e3f53aa075e68948e5e4598d548c263 100644 (file)
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -39,27 +39,17 @@
  
  /* #define DEBUG */
  
-#ifdef DEBUG
-#define DPRINTK(fmt, args...)                          \
-do {                                                   \
-       printk(KERN_DEBUG "pid %d: %s: " fmt "\n",      \
-               current->pid, __func__, ##args);        \
-} while (0)
-#else
-#define DPRINTK(fmt, args...) do {} while (0)
-#endif
-
-#define AUTOFS_WARN(fmt, args...)                      \
-do {                                                   \
+#define DPRINTK(fmt, ...)                              \
+       pr_debug("pid %d: %s: " fmt "\n",               \
+               current->pid, __func__, ##__VA_ARGS__)
+
+#define AUTOFS_WARN(fmt, ...)                          \
         printk(KERN_WARNING "pid %d: %s: " fmt "\n",    \
-               current->pid, __func__, ##args);        \
-} while (0)
+               current->pid, __func__, ##__VA_ARGS__)
  
-#define AUTOFS_ERROR(fmt, args...)                     \
-do {                                                   \
+#define AUTOFS_ERROR(fmt, ...)                         \
         printk(KERN_ERR "pid %d: %s: " fmt "\n",        \
-               current->pid, __func__, ##args);        \
-} while (0)
+               current->pid, __func__, ##__VA_ARGS__)
  
  /* Unified info structure.  This is pointed to by both the dentry and
     inode structures.  Each file in the filesystem has an instance of this
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c

index 25435987d6ae1cdcbbcfea4e9bf4d744c55bab44..e1fbdeef85db2e4b73e78eaf6266d2784e2385b0 100644 (file)
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -104,7 +104,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
         size_t pktsz;
  
         DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d",
-               wq->wait_queue_token, wq->name.len, wq->name.name, type);
+               (unsigned long) wq->wait_queue_token, wq->name.len, wq->name.name, type);
  
         memset(&pkt,0,sizeof pkt); /* For security reasons */
  
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c

index 54b8c28bebc8f8b4b63dcc1f010bbf035a09cbd6..720d885e8dcaa87b54f61f0fa8b027109528758b 100644 (file)
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -474,17 +474,22 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd)
                 befs_data_stream *data = &befs_ino->i_data.ds;
                 befs_off_t len = data->size;
  
-               befs_debug(sb, "Follow long symlink");
-
-               link = kmalloc(len, GFP_NOFS);
-               if (!link) {
-                       link = ERR_PTR(-ENOMEM);
-               } else if (befs_read_lsymlink(sb, data, link, len) != len) {
-                       kfree(link);
-                       befs_error(sb, "Failed to read entire long symlink");
+               if (len == 0) {
+                       befs_error(sb, "Long symlink with illegal length");
                         link = ERR_PTR(-EIO);
                 } else {
-                       link[len - 1] = '\0';
+                       befs_debug(sb, "Follow long symlink");
+
+                       link = kmalloc(len, GFP_NOFS);
+                       if (!link) {
+                               link = ERR_PTR(-ENOMEM);
+                       } else if (befs_read_lsymlink(sb, data, link, len) != len) {
+                               kfree(link);
+                               befs_error(sb, "Failed to read entire long symlink");
+                               link = ERR_PTR(-EIO);
+                       } else {
+                               link[len - 1] = '\0';
+                       }
                 }
         } else {
                 link = befs_ino->i_data.symlink;
diff --git a/fs/block_dev.c b/fs/block_dev.c

index ff77262e887cb66819f31d781e679eccb2d1975a..95f786ec7f088f1c85b7e82dfd01fe9bb7246fe3 100644 (file)
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1429,6 +1429,11 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
                 WARN_ON_ONCE(bdev->bd_holders);
                 sync_blockdev(bdev);
                 kill_bdev(bdev);
+               /* ->release can cause the old bdi to disappear,
+                * so must switch it out first
+                */
+               bdev_inode_switch_bdi(bdev->bd_inode,
+                                       &default_backing_dev_info);
         }
         if (bdev->bd_contains == bdev) {
                 if (disk->fops->release)
@@ -1442,8 +1447,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
                 disk_put_part(bdev->bd_part);
                 bdev->bd_part = NULL;
                 bdev->bd_disk = NULL;
-               bdev_inode_switch_bdi(bdev->bd_inode,
-                                       &default_backing_dev_info);
                 if (bdev != bdev->bd_contains)
                         victim = bdev->bd_contains;
                 bdev->bd_contains = NULL;
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h

index 502b9e98867949736b03d5bbf55b14b7d67205e7..d9f99a16edd6d85a9eb5fbe892f195f0ca00eeb3 100644 (file)
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -176,7 +176,11 @@ static inline u64 btrfs_ino(struct inode *inode)
  {
         u64 ino = BTRFS_I(inode)->location.objectid;
  
-       if (ino <= BTRFS_FIRST_FREE_OBJECTID)
+       /*
+        * !ino: btree_inode
+        * type == BTRFS_ROOT_ITEM_KEY: subvol dir
+        */
+       if (!ino || BTRFS_I(inode)->location.type == BTRFS_ROOT_ITEM_KEY)
                 ino = inode->i_ino;
         return ino;
  }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h

index 0469263e327eebd66f36b6ba9a4e8a1ba67850d5..03912c5c6f498221b965baef0fafed10904677e6 100644 (file)
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1415,17 +1415,15 @@ void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val);
  #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits)            \
  static inline u##bits btrfs_##name(struct extent_buffer *eb)           \
  {                                                                      \
-       type *p = kmap_atomic(eb->first_page, KM_USER0);                \
+       type *p = page_address(eb->first_page);                         \
         u##bits res = le##bits##_to_cpu(p->member);                     \
-       kunmap_atomic(p, KM_USER0);                                     \
         return res;                                                     \
  }                                                                      \
  static inline void btrfs_set_##name(struct extent_buffer *eb,          \
                                     u##bits val)                        \
  {                                                                      \
-       type *p = kmap_atomic(eb->first_page, KM_USER0);                \
+       type *p = page_address(eb->first_page);                         \
         p->member = cpu_to_le##bits(val);                               \
-       kunmap_atomic(p, KM_USER0);                                     \
  }
  
  #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits)             \
@@ -2367,8 +2365,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
  int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
  int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
  int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
-int btrfs_drop_snapshot(struct btrfs_root *root,
-                       struct btrfs_block_rsv *block_rsv, int update_ref);
+void btrfs_drop_snapshot(struct btrfs_root *root,
+                        struct btrfs_block_rsv *block_rsv, int update_ref);
  int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
                         struct btrfs_root *root,
                         struct extent_buffer *node,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c

index 66bac226944e05ae82edb56a9fd43116f31a901a..f5be06a2462f4b01b5bba3834c1527a3a0c5bf18 100644 (file)
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1782,6 +1782,9 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
  
  
                 for (i = 0; i < multi->num_stripes; i++, stripe++) {
+                       if (!stripe->dev->can_discard)
+                               continue;
+
                         ret = btrfs_issue_discard(stripe->dev->bdev,
                                                   stripe->physical,
                                                   stripe->length);
@@ -1789,11 +1792,16 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
                                 discarded_bytes += stripe->length;
                         else if (ret != -EOPNOTSUPP)
                                 break;
+
+                       /*
+                        * Just in case we get back EOPNOTSUPP for some reason,
+                        * just ignore the return value so we don't screw up
+                        * people calling discard_extent.
+                        */
+                       ret = 0;
                 }
                 kfree(multi);
         }
-       if (discarded_bytes && ret == -EOPNOTSUPP)
-               ret = 0;
  
         if (actual_bytes)
                 *actual_bytes = discarded_bytes;
@@ -6269,8 +6277,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
   * also make sure backrefs for the shared block and all lower level
   * blocks are properly updated.
   */
-int btrfs_drop_snapshot(struct btrfs_root *root,
-                       struct btrfs_block_rsv *block_rsv, int update_ref)
+void btrfs_drop_snapshot(struct btrfs_root *root,
+                        struct btrfs_block_rsv *block_rsv, int update_ref)
  {
         struct btrfs_path *path;
         struct btrfs_trans_handle *trans;
@@ -6283,13 +6291,16 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
         int level;
  
         path = btrfs_alloc_path();
-       if (!path)
-               return -ENOMEM;
+       if (!path) {
+               err = -ENOMEM;
+               goto out;
+       }
  
         wc = kzalloc(sizeof(*wc), GFP_NOFS);
         if (!wc) {
                 btrfs_free_path(path);
-               return -ENOMEM;
+               err = -ENOMEM;
+               goto out;
         }
  
         trans = btrfs_start_transaction(tree_root, 0);
@@ -6318,7 +6329,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
                 path->lowest_level = 0;
                 if (ret < 0) {
                         err = ret;
-                       goto out;
+                       goto out_free;
                 }
                 WARN_ON(ret > 0);
  
@@ -6425,11 +6436,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
                 free_extent_buffer(root->commit_root);
                 kfree(root);
         }
-out:
+out_free:
         btrfs_end_transaction_throttle(trans, tree_root);
         kfree(wc);
         btrfs_free_path(path);
-       return err;
+out:
+       if (err)
+               btrfs_std_error(root->fs_info, err);
+       return;
  }
  
  /*
@@ -6720,6 +6734,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
         struct btrfs_space_info *space_info;
         struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
         struct btrfs_device *device;
+       u64 min_free;
+       u64 dev_min = 1;
+       u64 dev_nr = 0;
+       int index;
         int full = 0;
         int ret = 0;
  
@@ -6729,8 +6747,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
         if (!block_group)
                 return -1;
  
+       min_free = btrfs_block_group_used(&block_group->item);
+
         /* no bytes used, we're good */
-       if (!btrfs_block_group_used(&block_group->item))
+       if (!min_free)
                 goto out;
  
         space_info = block_group->space_info;
@@ -6746,10 +6766,9 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
          * all of the extents from this block group.  If we can, we're good
          */
         if ((space_info->total_bytes != block_group->key.offset) &&
-          (space_info->bytes_used + space_info->bytes_reserved +
-           space_info->bytes_pinned + space_info->bytes_readonly +
-           btrfs_block_group_used(&block_group->item) <
-           space_info->total_bytes)) {
+           (space_info->bytes_used + space_info->bytes_reserved +
+            space_info->bytes_pinned + space_info->bytes_readonly +
+            min_free < space_info->total_bytes)) {
                 spin_unlock(&space_info->lock);
                 goto out;
         }
@@ -6766,9 +6785,31 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
         if (full)
                 goto out;
  
+       /*
+        * index:
+        *      0: raid10
+        *      1: raid1
+        *      2: dup
+        *      3: raid0
+        *      4: single
+        */
+       index = get_block_group_index(block_group);
+       if (index == 0) {
+               dev_min = 4;
+               /* Divide by 2 */
+               min_free >>= 1;
+       } else if (index == 1) {
+               dev_min = 2;
+       } else if (index == 2) {
+               /* Multiply by 2 */
+               min_free <<= 1;
+       } else if (index == 3) {
+               dev_min = fs_devices->rw_devices;
+               do_div(min_free, dev_min);
+       }
+
         mutex_lock(&root->fs_info->chunk_mutex);
         list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
-               u64 min_free = btrfs_block_group_used(&block_group->item);
                 u64 dev_offset;
  
                 /*
@@ -6779,7 +6820,11 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
                         ret = find_free_dev_extent(NULL, device, min_free,
                                                    &dev_offset, NULL);
                         if (!ret)
+                               dev_nr++;
+
+                       if (dev_nr >= dev_min)
                                 break;
+
                         ret = -1;
                 }
         }
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c

index b910694f61ed25bbf0f309ff673381afcf0994d8..a1cb7821becda9229d1e68712cd7a93a1e8d6fcc 100644 (file)
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -183,8 +183,10 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
          * read from the commit root and sidestep a nasty deadlock
          * between reading the free space cache and updating the csum tree.
          */
-       if (btrfs_is_free_space_inode(root, inode))
+       if (btrfs_is_free_space_inode(root, inode)) {
                 path->search_commit_root = 1;
+               path->skip_locking = 1;
+       }
  
         disk_bytenr = (u64)bio->bi_sector << 9;
         if (dio)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c

index 658d66959abe9eca7608f650d73bd9952a053ac4..a381cd22f5184e83401f872bcd74528ecdb80e69 100644 (file)
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -150,6 +150,8 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
         spin_lock(&root->fs_info->defrag_inodes_lock);
         if (!BTRFS_I(inode)->in_defrag)
                 __btrfs_add_inode_defrag(inode, defrag);
+       else
+               kfree(defrag);
         spin_unlock(&root->fs_info->defrag_inodes_lock);
         return 0;
  }
@@ -1073,12 +1075,6 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
         start_pos = pos & ~((u64)root->sectorsize - 1);
         last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
  
-       if (start_pos > inode->i_size) {
-               err = btrfs_cont_expand(inode, i_size_read(inode), start_pos);
-               if (err)
-                       return err;
-       }
-
  again:
         for (i = 0; i < num_pages; i++) {
                 pages[i] = find_or_create_page(inode->i_mapping, index + i,
@@ -1336,6 +1332,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
         struct inode *inode = fdentry(file)->d_inode;
         struct btrfs_root *root = BTRFS_I(inode)->root;
         loff_t *ppos = &iocb->ki_pos;
+       u64 start_pos;
         ssize_t num_written = 0;
         ssize_t err = 0;
         size_t count, ocount;
@@ -1384,6 +1381,15 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
         file_update_time(file);
         BTRFS_I(inode)->sequence++;
  
+       start_pos = round_down(pos, root->sectorsize);
+       if (start_pos > i_size_read(inode)) {
+               err = btrfs_cont_expand(inode, i_size_read(inode), start_pos);
+               if (err) {
+                       mutex_unlock(&inode->i_mutex);
+                       goto out;
+               }
+       }
+
         if (unlikely(file->f_flags & O_DIRECT)) {
                 num_written = __btrfs_direct_write(iocb, iov, nr_segs,
                                                    pos, ppos, count, ocount);
@@ -1638,11 +1644,15 @@ static long btrfs_fallocate(struct file *file, int mode,
  
         cur_offset = alloc_start;
         while (1) {
+               u64 actual_end;
+
                 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
                                       alloc_end - cur_offset, 0);
                 BUG_ON(IS_ERR_OR_NULL(em));
                 last_byte = min(extent_map_end(em), alloc_end);
+               actual_end = min_t(u64, extent_map_end(em), offset + len);
                 last_byte = (last_byte + mask) & ~mask;
+
                 if (em->block_start == EXTENT_MAP_HOLE ||
                     (cur_offset >= inode->i_size &&
                      !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
@@ -1655,6 +1665,16 @@ static long btrfs_fallocate(struct file *file, int mode,
                                 free_extent_map(em);
                                 break;
                         }
+               } else if (actual_end > inode->i_size &&
+                          !(mode & FALLOC_FL_KEEP_SIZE)) {
+                       /*
+                        * We didn't need to allocate any more space, but we
+                        * still extended the size of the file so we need to
+                        * update i_size.
+                        */
+                       inode->i_ctime = CURRENT_TIME;
+                       i_size_write(inode, actual_end);
+                       btrfs_ordered_update_i_size(inode, actual_end, NULL);
                 }
                 free_extent_map(em);
  
@@ -1797,6 +1817,11 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin)
                 goto out;
         case SEEK_DATA:
         case SEEK_HOLE:
+               if (offset >= i_size_read(inode)) {
+                       mutex_unlock(&inode->i_mutex);
+                       return -ENXIO;
+               }
+
                 ret = find_desired_extent(inode, &offset, origin);
                 if (ret) {
                         mutex_unlock(&inode->i_mutex);
@@ -1804,10 +1829,14 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin)
                 }
         }
  
-       if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
-               return -EINVAL;
-       if (offset > inode->i_sb->s_maxbytes)
-               return -EINVAL;
+       if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) {
+               offset = -EINVAL;
+               goto out;
+       }
+       if (offset > inode->i_sb->s_maxbytes) {
+               offset = -EINVAL;
+               goto out;
+       }
  
         /* Special lock needed here? */
         if (offset != file->f_pos) {
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c

index 6377713f639c978db84f64dec2dc9541e8c8287e..41ac927401d06bc14a2214e245d1cdfbb2d7b603 100644 (file)
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -190,9 +190,11 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
                                     struct btrfs_path *path,
                                     struct inode *inode)
  {
+       struct btrfs_block_rsv *rsv;
         loff_t oldsize;
         int ret = 0;
  
+       rsv = trans->block_rsv;
         trans->block_rsv = root->orphan_block_rsv;
         ret = btrfs_block_rsv_check(trans, root,
                                     root->orphan_block_rsv,
@@ -210,6 +212,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
          */
         ret = btrfs_truncate_inode_items(trans, root, inode,
                                          0, BTRFS_EXTENT_DATA_KEY);
+
+       trans->block_rsv = rsv;
         if (ret) {
                 WARN_ON(1);
                 return ret;
@@ -1168,9 +1172,9 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
                 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
  }
  
-static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
-                             struct btrfs_free_space *info, u64 offset,
-                             u64 bytes)
+static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
+                                      struct btrfs_free_space *info,
+                                      u64 offset, u64 bytes)
  {
         unsigned long start, count;
  
@@ -1181,6 +1185,13 @@ static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
         bitmap_clear(info->bitmap, start, count);
  
         info->bytes -= bytes;
+}
+
+static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
+                             struct btrfs_free_space *info, u64 offset,
+                             u64 bytes)
+{
+       __bitmap_clear_bits(ctl, info, offset, bytes);
         ctl->free_space -= bytes;
  }
  
@@ -1984,7 +1995,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
                 return 0;
  
         ret = search_start;
-       bitmap_clear_bits(ctl, entry, ret, bytes);
+       __bitmap_clear_bits(ctl, entry, ret, bytes);
  
         return ret;
  }
@@ -2039,7 +2050,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
                                 continue;
                         }
                 } else {
-
                         ret = entry->offset;
  
                         entry->offset += bytes;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index 15fceefbca0a02b9d1c9ddf3a4da224228c2ec40..b2d004ad66a0159042c38f96691c92bb0c31537e 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1786,7 +1786,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                           &ordered_extent->list);
  
         ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
-       if (!ret) {
+       if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
                 ret = btrfs_update_inode(trans, root, inode);
                 BUG_ON(ret);
         }
@@ -3510,15 +3510,19 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                         err = btrfs_drop_extents(trans, inode, cur_offset,
                                                  cur_offset + hole_size,
                                                  &hint_byte, 1);
-                       if (err)
+                       if (err) {
+                               btrfs_end_transaction(trans, root);
                                 break;
+                       }
  
                         err = btrfs_insert_file_extent(trans, root,
                                         btrfs_ino(inode), cur_offset, 0,
                                         0, hole_size, 0, hole_size,
                                         0, 0, 0);
-                       if (err)
+                       if (err) {
+                               btrfs_end_transaction(trans, root);
                                 break;
+                       }
  
                         btrfs_drop_extent_cache(inode, hole_start,
                                         last_byte - 1, 0);
@@ -3952,7 +3956,6 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
                          struct btrfs_root *root, int *new)
  {
         struct inode *inode;
-       int bad_inode = 0;
  
         inode = btrfs_iget_locked(s, location->objectid, root);
         if (!inode)
@@ -3968,15 +3971,12 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
                         if (new)
                                 *new = 1;
                 } else {
-                       bad_inode = 1;
+                       unlock_new_inode(inode);
+                       iput(inode);
+                       inode = ERR_PTR(-ESTALE);
                 }
         }
  
-       if (bad_inode) {
-               iput(inode);
-               inode = ERR_PTR(-ESTALE);
-       }
-
         return inode;
  }
  
@@ -4018,7 +4018,8 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
                 memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key));
                 kfree(dentry->d_fsdata);
                 dentry->d_fsdata = NULL;
-               d_clear_need_lookup(dentry);
+               /* This thing is hashed, drop it for now */
+               d_drop(dentry);
         } else {
                 ret = btrfs_inode_by_name(dir, dentry, &location);
         }
@@ -4085,7 +4086,15 @@ static void btrfs_dentry_release(struct dentry *dentry)
  static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
                                    struct nameidata *nd)
  {
-       return d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
+       struct dentry *ret;
+
+       ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
+       if (unlikely(d_need_lookup(dentry))) {
+               spin_lock(&dentry->d_lock);
+               dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
+               spin_unlock(&dentry->d_lock);
+       }
+       return ret;
  }
  
  unsigned char btrfs_filetype_table[] = {
@@ -4125,7 +4134,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
  
         /* special case for "." */
         if (filp->f_pos == 0) {
-               over = filldir(dirent, ".", 1, 1, btrfs_ino(inode), DT_DIR);
+               over = filldir(dirent, ".", 1,
+                              filp->f_pos, btrfs_ino(inode), DT_DIR);
                 if (over)
                         return 0;
                 filp->f_pos = 1;
@@ -4134,7 +4144,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
         if (filp->f_pos == 1) {
                 u64 pino = parent_ino(filp->f_path.dentry);
                 over = filldir(dirent, "..", 2,
-                              2, pino, DT_DIR);
+                              filp->f_pos, pino, DT_DIR);
                 if (over)
                         return 0;
                 filp->f_pos = 2;
@@ -5823,7 +5833,7 @@ again:
  
         add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
         ret = btrfs_ordered_update_i_size(inode, 0, ordered);
-       if (!ret)
+       if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags))
                 btrfs_update_inode(trans, root, inode);
         ret = 0;
  out_unlock:
@@ -7354,11 +7364,15 @@ static int btrfs_set_page_dirty(struct page *page)
  static int btrfs_permission(struct inode *inode, int mask)
  {
         struct btrfs_root *root = BTRFS_I(inode)->root;
+       umode_t mode = inode->i_mode;
  
-       if (btrfs_root_readonly(root) && (mask & MAY_WRITE))
-               return -EROFS;
-       if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
-               return -EACCES;
+       if (mask & MAY_WRITE &&
+           (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
+               if (btrfs_root_readonly(root))
+                       return -EROFS;
+               if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
+                       return -EACCES;
+       }
         return generic_permission(inode, mask);
  }
  
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c

index 7cf013349941f7cb8509ee075d94f9099778c905..d11fd28efa6a2f6aa4b08ad6143a11628c88ad74 100644 (file)
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2177,6 +2177,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
         if (!(src_file->f_mode & FMODE_READ))
                 goto out_fput;
  
+       /* don't make the dst file partly checksummed */
+       if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
+           (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
+               goto out_fput;
+
         ret = -EISDIR;
         if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
                 goto out_fput;
@@ -2220,6 +2225,16 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
             !IS_ALIGNED(destoff, bs))
                 goto out_unlock;
  
+       if (destoff > inode->i_size) {
+               ret = btrfs_cont_expand(inode, inode->i_size, destoff);
+               if (ret)
+                       goto out_unlock;
+       }
+
+       /* truncate page cache pages from target inode range */
+       truncate_inode_pages_range(&inode->i_data, destoff,
+                                  PAGE_CACHE_ALIGN(destoff + len) - 1);
+
         /* do any pending delalloc/csum calc on src, one way or
            another, and lock file content */
         while (1) {
@@ -2321,14 +2336,21 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
  
                         if (type == BTRFS_FILE_EXTENT_REG ||
                             type == BTRFS_FILE_EXTENT_PREALLOC) {
+                               /*
+                                *    a  | --- range to clone ---|  b
+                                * | ------------- extent ------------- |
+                                */
+
+                               /* substract range b */
+                               if (key.offset + datal > off + len)
+                                       datal = off + len - key.offset;
+
+                               /* substract range a */
                                 if (off > key.offset) {
                                         datao += off - key.offset;
                                         datal -= off - key.offset;
                                 }
  
-                               if (key.offset + datal > off + len)
-                                       datal = off + len - key.offset;
-
                                 ret = btrfs_drop_extents(trans, inode,
                                                          new_key.offset,
                                                          new_key.offset + datal,
@@ -2425,7 +2447,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                         if (endoff > inode->i_size)
                                 btrfs_i_size_write(inode, endoff);
  
-                       BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
                         ret = btrfs_update_inode(trans, root, inode);
                         BUG_ON(ret);
                         btrfs_end_transaction(trans, root);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c

index 7dc36fab4afc5b9df1c4655e455c8df8bd3ee39b..e24b7964a15502b2ff0f6325f4a2b98c226b6688 100644 (file)
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -884,6 +884,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
         struct btrfs_root *tree_root = fs_info->tree_root;
         struct btrfs_root *root = pending->root;
         struct btrfs_root *parent_root;
+       struct btrfs_block_rsv *rsv;
         struct inode *parent_inode;
         struct dentry *parent;
         struct dentry *dentry;
@@ -895,6 +896,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
         u64 objectid;
         u64 root_flags;
  
+       rsv = trans->block_rsv;
+
         new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
         if (!new_root_item) {
                 pending->error = -ENOMEM;
@@ -1002,6 +1005,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
         btrfs_orphan_post_snapshot(trans, pending);
  fail:
         kfree(new_root_item);
+       trans->block_rsv = rsv;
         btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1);
         return 0;
  }
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c

index babee65f8edaf779e26bc134ad7d02e8541ff8f2..786639fca067992f75f546e4fc68bc3f2ac2e4fb 100644 (file)
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -799,14 +799,15 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
                                   struct extent_buffer *eb, int slot,
                                   struct btrfs_key *key)
  {
-       struct inode *dir;
-       int ret;
         struct btrfs_inode_ref *ref;
+       struct btrfs_dir_item *di;
+       struct inode *dir;
         struct inode *inode;
-       char *name;
-       int namelen;
         unsigned long ref_ptr;
         unsigned long ref_end;
+       char *name;
+       int namelen;
+       int ret;
         int search_done = 0;
  
         /*
@@ -909,6 +910,25 @@ again:
         }
         btrfs_release_path(path);
  
+       /* look for a conflicting sequence number */
+       di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir),
+                                        btrfs_inode_ref_index(eb, ref),
+                                        name, namelen, 0);
+       if (di && !IS_ERR(di)) {
+               ret = drop_one_dir_item(trans, root, path, dir, di);
+               BUG_ON(ret);
+       }
+       btrfs_release_path(path);
+
+       /* look for a conflicing name */
+       di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir),
+                                  name, namelen, 0);
+       if (di && !IS_ERR(di)) {
+               ret = drop_one_dir_item(trans, root, path, dir, di);
+               BUG_ON(ret);
+       }
+       btrfs_release_path(path);
+
  insert:
         /* insert our name */
         ret = btrfs_add_link(trans, dir, inode, name, namelen, 0,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c

index 53875ae73ad4f634548e8d1d315056822c9cbd4b..f2a4cc79da61da62740d1de33552037f27b072a2 100644 (file)
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -142,6 +142,7 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
         unsigned long limit;
         unsigned long last_waited = 0;
         int force_reg = 0;
+       int sync_pending = 0;
         struct blk_plug plug;
  
         /*
@@ -229,6 +230,22 @@ loop_lock:
  
                 BUG_ON(atomic_read(&cur->bi_cnt) == 0);
  
+               /*
+                * if we're doing the sync list, record that our
+                * plug has some sync requests on it
+                *
+                * If we're doing the regular list and there are
+                * sync requests sitting around, unplug before
+                * we add more
+                */
+               if (pending_bios == &device->pending_sync_bios) {
+                       sync_pending = 1;
+               } else if (sync_pending) {
+                       blk_finish_plug(&plug);
+                       blk_start_plug(&plug);
+                       sync_pending = 0;
+               }
+
                 submit_bio(cur->bi_rw, cur);
                 num_run++;
                 batch_run++;
@@ -500,6 +517,9 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
                         fs_devices->rw_devices--;
                 }
  
+               if (device->can_discard)
+                       fs_devices->num_can_discard--;
+
                 new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
                 BUG_ON(!new_device);
                 memcpy(new_device, device, sizeof(*new_device));
@@ -508,6 +528,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
                 new_device->bdev = NULL;
                 new_device->writeable = 0;
                 new_device->in_fs_metadata = 0;
+               new_device->can_discard = 0;
                 list_replace_rcu(&device->dev_list, &new_device->dev_list);
  
                 call_rcu(&device->rcu, free_device);
@@ -547,6 +568,7 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
  static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
                                 fmode_t flags, void *holder)
  {
+       struct request_queue *q;
         struct block_device *bdev;
         struct list_head *head = &fs_devices->devices;
         struct btrfs_device *device;
@@ -603,6 +625,12 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
                         seeding = 0;
                 }
  
+               q = bdev_get_queue(bdev);
+               if (blk_queue_discard(q)) {
+                       device->can_discard = 1;
+                       fs_devices->num_can_discard++;
+               }
+
                 device->bdev = bdev;
                 device->in_fs_metadata = 0;
                 device->mode = flags;
@@ -835,6 +863,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
  
         max_hole_start = search_start;
         max_hole_size = 0;
+       hole_size = 0;
  
         if (search_start >= search_end) {
                 ret = -ENOSPC;
@@ -917,7 +946,14 @@ next:
                 cond_resched();
         }
  
-       hole_size = search_end- search_start;
+       /*
+        * At this point, search_start should be the end of
+        * allocated dev extents, and when shrinking the device,
+        * search_end may be smaller than search_start.
+        */
+       if (search_end > search_start)
+               hole_size = search_end - search_start;
+
         if (hole_size > max_hole_size) {
                 max_hole_start = search_start;
                 max_hole_size = hole_size;
@@ -1543,6 +1579,7 @@ error:
  
  int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
  {
+       struct request_queue *q;
         struct btrfs_trans_handle *trans;
         struct btrfs_device *device;
         struct block_device *bdev;
@@ -1612,6 +1649,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
  
         lock_chunks(root);
  
+       q = bdev_get_queue(bdev);
+       if (blk_queue_discard(q))
+               device->can_discard = 1;
         device->writeable = 1;
         device->work.func = pending_bios_fn;
         generate_random_uuid(device->uuid);
@@ -1647,6 +1687,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
         root->fs_info->fs_devices->num_devices++;
         root->fs_info->fs_devices->open_devices++;
         root->fs_info->fs_devices->rw_devices++;
+       if (device->can_discard)
+               root->fs_info->fs_devices->num_can_discard++;
         root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
  
         if (!blk_queue_nonrot(bdev_get_queue(bdev)))
@@ -2413,9 +2455,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                         total_avail = device->total_bytes - device->bytes_used;
                 else
                         total_avail = 0;
-               /* avail is off by max(alloc_start, 1MB), but that is the same
-                * for all devices, so it doesn't hurt the sorting later on
-                */
+
+               /* If there is no space on this device, skip it. */
+               if (total_avail == 0)
+                       continue;
  
                 ret = find_free_dev_extent(trans, device,
                                            max_stripe_size * dev_stripes,
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h

index 7c12d61ae7aed7936d07886d8abfb4a8028dea4b..6d866db4e1774ab08e73b8f6c5ad6d8aaa08861b 100644 (file)
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -48,6 +48,7 @@ struct btrfs_device {
         int writeable;
         int in_fs_metadata;
         int missing;
+       int can_discard;
  
         spinlock_t io_lock;
  
@@ -104,6 +105,7 @@ struct btrfs_fs_devices {
         u64 rw_devices;
         u64 missing_devices;
         u64 total_rw_bytes;
+       u64 num_can_discard;
         struct block_device *latest_bdev;
  
         /* all of the devices in the FS, protected by a mutex
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c

index d733b9cfea343207e71bdb6d8d8b51323717c800..69565e5fc6a0f1031394f75e1fe73677ea168a83 100644 (file)
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -116,6 +116,12 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
                 if (ret)
                         goto out;
                 btrfs_release_path(path);
+
+               /*
+                * remove the attribute
+                */
+               if (!value)
+                       goto out;
         }
  
  again:
@@ -158,6 +164,9 @@ out:
         return ret;
  }
  
+/*
+ * @value: "" makes the attribute to empty, NULL removes it
+ */
  int __btrfs_setxattr(struct btrfs_trans_handle *trans,
                      struct inode *inode, const char *name,
                      const void *value, size_t size, int flags)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c

index fee028b5332e0d802b26fdc94dab21057b41028a..86c59e16ba74e459ad0c3f29743ace6663f29206 100644 (file)
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1595,7 +1595,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
                 r = build_dentry_path(rdentry, ppath, pathlen, ino, freepath);
                 dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
                      *ppath);
-       } else if (rpath) {
+       } else if (rpath || rino) {
                 *ino = rino;
                 *ppath = rpath;
                 *pathlen = strlen(rpath);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c

index d47c5ec7fb1ff0500ebbeb7f403debfa865c87a0..88bacaf385d960fcd1ba98e99049e7a47658abe7 100644 (file)
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -813,8 +813,8 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type,
         fsc = create_fs_client(fsopt, opt);
         if (IS_ERR(fsc)) {
                 res = ERR_CAST(fsc);
-               kfree(fsopt);
-               kfree(opt);
+               destroy_mount_options(fsopt);
+               ceph_destroy_options(opt);
                 goto out_final;
         }
  
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c

index 2fe3cf13b2e92b968221703f0d462a94c834ca29..6d40656e1e2994ad4fedbdcb9645b2367dbce3b7 100644 (file)
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -176,7 +176,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
  
  #ifdef CONFIG_CIFS_STATS2
                         seq_printf(m, " In Send: %d In MaxReq Wait: %d",
-                               atomic_read(&server->inSend),
+                               atomic_read(&server->in_send),
                                 atomic_read(&server->num_waiters));
  #endif
  
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c

index 21de1d6d5849e21977ce9c23bb480439693f2d1e..d0f59faefb7893f2189cca07e660fb424bd3d629 100644 (file)
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -991,24 +991,6 @@ struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
         return pntsd;
  }
  
-static int set_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, __u16 fid,
-               struct cifs_ntsd *pnntsd, u32 acllen)
-{
-       int xid, rc;
-       struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
-
-       if (IS_ERR(tlink))
-               return PTR_ERR(tlink);
-
-       xid = GetXid();
-       rc = CIFSSMBSetCIFSACL(xid, tlink_tcon(tlink), fid, pnntsd, acllen);
-       FreeXid(xid);
-       cifs_put_tlink(tlink);
-
-       cFYI(DBG2, "SetCIFSACL rc = %d", rc);
-       return rc;
-}
-
  static int set_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path,
                 struct cifs_ntsd *pnntsd, u32 acllen)
  {
@@ -1047,18 +1029,10 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
                                 struct inode *inode, const char *path)
  {
         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
-       struct cifsFileInfo *open_file;
-       int rc;
  
         cFYI(DBG2, "set ACL for %s from mode 0x%x", path, inode->i_mode);
  
-       open_file = find_readable_file(CIFS_I(inode), true);
-       if (!open_file)
-               return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen);
-
-       rc = set_cifs_acl_by_fid(cifs_sb, open_file->netfid, pnntsd, acllen);
-       cifsFileInfo_put(open_file);
-       return rc;
+       return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen);
  }
  
  /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h

index cb71dc1f94d1e38dc042bf79e68614032553486a..95da8027983d2a6b47b7eecafb18da75e8335249 100644 (file)
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -125,5 +125,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
  extern const struct export_operations cifs_export_ops;
  #endif /* CIFS_NFSD_EXPORT */
  
-#define CIFS_VERSION   "1.74"
+#define CIFS_VERSION   "1.75"
  #endif                         /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h

index 38ce6d44b1451ef420bc7e233b1afac107b7b932..95dad9d14cf10211156e1896c0e97d270d76ab2f 100644 (file)
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -291,7 +291,7 @@ struct TCP_Server_Info {
         struct fscache_cookie   *fscache; /* client index cache cookie */
  #endif
  #ifdef CONFIG_CIFS_STATS2
-       atomic_t inSend; /* requests trying to send */
+       atomic_t in_send; /* requests trying to send */
         atomic_t num_waiters;   /* blocked waiting to get in sendrecv */
  #endif
  };
@@ -672,12 +672,54 @@ struct mid_q_entry {
         bool multiEnd:1;        /* both received */
  };
  
-struct oplock_q_entry {
-       struct list_head qhead;
-       struct inode *pinode;
-       struct cifs_tcon *tcon;
-       __u16 netfid;
-};
+/*     Make code in transport.c a little cleaner by moving
+       update of optional stats into function below */
+#ifdef CONFIG_CIFS_STATS2
+
+static inline void cifs_in_send_inc(struct TCP_Server_Info *server)
+{
+       atomic_inc(&server->in_send);
+}
+
+static inline void cifs_in_send_dec(struct TCP_Server_Info *server)
+{
+       atomic_dec(&server->in_send);
+}
+
+static inline void cifs_num_waiters_inc(struct TCP_Server_Info *server)
+{
+       atomic_inc(&server->num_waiters);
+}
+
+static inline void cifs_num_waiters_dec(struct TCP_Server_Info *server)
+{
+       atomic_dec(&server->num_waiters);
+}
+
+static inline void cifs_save_when_sent(struct mid_q_entry *mid)
+{
+       mid->when_sent = jiffies;
+}
+#else
+static inline void cifs_in_send_inc(struct TCP_Server_Info *server)
+{
+}
+static inline void cifs_in_send_dec(struct TCP_Server_Info *server)
+{
+}
+
+static inline void cifs_num_waiters_inc(struct TCP_Server_Info *server)
+{
+}
+
+static inline void cifs_num_waiters_dec(struct TCP_Server_Info *server)
+{
+}
+
+static inline void cifs_save_when_sent(struct mid_q_entry *mid)
+{
+}
+#endif
  
  /* for pending dnotify requests */
  struct dir_notify_req {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c

index 80c2e3add3a2714748336560b249b77e348c4278..633c246b67752efe0f7460cb01e8ac62309314e8 100644 (file)
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2878,7 +2878,8 @@ cleanup_volume_info_contents(struct smb_vol *volume_info)
         kfree(volume_info->username);
         kzfree(volume_info->password);
         kfree(volume_info->UNC);
-       kfree(volume_info->UNCip);
+       if (volume_info->UNCip != volume_info->UNC + 2)
+               kfree(volume_info->UNCip);
         kfree(volume_info->domainname);
         kfree(volume_info->iocharset);
         kfree(volume_info->prepath);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c

index ae576fbb5142e508010d53d1506b72fd954575e7..72d448bf96ce57d8f6ed770277da46a3e3d7ace4 100644 (file)
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -105,8 +105,8 @@ cifs_bp_rename_retry:
         }
         rcu_read_unlock();
         if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) {
-               cERROR(1, "did not end path lookup where expected namelen is %d",
-                       namelen);
+               cFYI(1, "did not end path lookup where expected. namelen=%d "
+                       "dfsplen=%d", namelen, dfsplen);
                 /* presumably this is only possible if racing with a rename
                 of one of the parent directories  (we can not lock the dentries
                 above us to prevent this, but retrying should be harmless) */
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c

index c1b9c4b107398bd31504f594f294c5bb6dc7ef60..10ca6b2c26b7ff939e286b08533420b7cca1a539 100644 (file)
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -266,15 +266,11 @@ static int wait_for_free_request(struct TCP_Server_Info *server,
         while (1) {
                 if (atomic_read(&server->inFlight) >= cifs_max_pending) {
                         spin_unlock(&GlobalMid_Lock);
-#ifdef CONFIG_CIFS_STATS2
-                       atomic_inc(&server->num_waiters);
-#endif
+                       cifs_num_waiters_inc(server);
                         wait_event(server->request_q,
                                    atomic_read(&server->inFlight)
                                      < cifs_max_pending);
-#ifdef CONFIG_CIFS_STATS2
-                       atomic_dec(&server->num_waiters);
-#endif
+                       cifs_num_waiters_dec(server);
                         spin_lock(&GlobalMid_Lock);
                 } else {
                         if (server->tcpStatus == CifsExiting) {
@@ -381,15 +377,13 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
         mid->callback = callback;
         mid->callback_data = cbdata;
         mid->midState = MID_REQUEST_SUBMITTED;
-#ifdef CONFIG_CIFS_STATS2
-       atomic_inc(&server->inSend);
-#endif
+
+       cifs_in_send_inc(server);
         rc = smb_sendv(server, iov, nvec);
-#ifdef CONFIG_CIFS_STATS2
-       atomic_dec(&server->inSend);
-       mid->when_sent = jiffies;
-#endif
+       cifs_in_send_dec(server);
+       cifs_save_when_sent(mid);
         mutex_unlock(&server->srv_mutex);
+
         if (rc)
                 goto out_err;
  
@@ -575,14 +569,10 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
         }
  
         midQ->midState = MID_REQUEST_SUBMITTED;
-#ifdef CONFIG_CIFS_STATS2
-       atomic_inc(&ses->server->inSend);
-#endif
+       cifs_in_send_inc(ses->server);
         rc = smb_sendv(ses->server, iov, n_vec);
-#ifdef CONFIG_CIFS_STATS2
-       atomic_dec(&ses->server->inSend);
-       midQ->when_sent = jiffies;
-#endif
+       cifs_in_send_dec(ses->server);
+       cifs_save_when_sent(midQ);
  
         mutex_unlock(&ses->server->srv_mutex);
  
@@ -703,14 +693,11 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
         }
  
         midQ->midState = MID_REQUEST_SUBMITTED;
-#ifdef CONFIG_CIFS_STATS2
-       atomic_inc(&ses->server->inSend);
-#endif
+
+       cifs_in_send_inc(ses->server);
         rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
-#ifdef CONFIG_CIFS_STATS2
-       atomic_dec(&ses->server->inSend);
-       midQ->when_sent = jiffies;
-#endif
+       cifs_in_send_dec(ses->server);
+       cifs_save_when_sent(midQ);
         mutex_unlock(&ses->server->srv_mutex);
  
         if (rc < 0)
@@ -843,14 +830,10 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
         }
  
         midQ->midState = MID_REQUEST_SUBMITTED;
-#ifdef CONFIG_CIFS_STATS2
-       atomic_inc(&ses->server->inSend);
-#endif
+       cifs_in_send_inc(ses->server);
         rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
-#ifdef CONFIG_CIFS_STATS2
-       atomic_dec(&ses->server->inSend);
-       midQ->when_sent = jiffies;
-#endif
+       cifs_in_send_dec(ses->server);
+       cifs_save_when_sent(midQ);
         mutex_unlock(&ses->server->srv_mutex);
  
         if (rc < 0) {
diff --git a/fs/compat.c b/fs/compat.c

index 0b48d018e38adbc1a72f78a4ab851626f2fffca8..58b1da4598933ca9990494121234be6dcdc35bc0 100644 (file)
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1675,11 +1675,6 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
  }
  #endif /* HAVE_SET_RESTORE_SIGMASK */
  
-long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2)
-{
-       return sys_ni_syscall();
-}
-
  #ifdef CONFIG_EPOLL
  
  #ifdef HAVE_SET_RESTORE_SIGMASK
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c

index 8be086e9abe4d3e5598e85a562744e6e3708a678..51352de88ef10ea41583661f39625f93b475aa34 100644 (file)
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1003,6 +1003,7 @@ COMPATIBLE_IOCTL(PPPIOCCONNECT)
  COMPATIBLE_IOCTL(PPPIOCDISCONN)
  COMPATIBLE_IOCTL(PPPIOCATTCHAN)
  COMPATIBLE_IOCTL(PPPIOCGCHAN)
+COMPATIBLE_IOCTL(PPPIOCGL2TPSTATS)
  /* PPPOX */
  COMPATIBLE_IOCTL(PPPOEIOCSFWD)
  COMPATIBLE_IOCTL(PPPOEIOCDFWD)
diff --git a/fs/ecryptfs/Kconfig b/fs/ecryptfs/Kconfig

index 1cd6d9d3e29a53dd102524b3b065b39642672dbc..cc16562654ded3c6bd4903171a585107405eaedd 100644 (file)
--- a/fs/ecryptfs/Kconfig
+++ b/fs/ecryptfs/Kconfig
@@ -1,6 +1,6 @@
  config ECRYPT_FS
         tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
-       depends on EXPERIMENTAL && KEYS && CRYPTO
+       depends on EXPERIMENTAL && KEYS && CRYPTO && (ENCRYPTED_KEYS || ENCRYPTED_KEYS=n)
         select CRYPTO_ECB
         select CRYPTO_CBC
         select CRYPTO_MD5
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c

index 08a2b52bf565c2aab2eab60546f961b76ca699c4..ac1ad48c2376df4b3061e55a973b27b42388f510 100644 (file)
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1973,7 +1973,7 @@ pki_encrypt_session_key(struct key *auth_tok_key,
  {
         struct ecryptfs_msg_ctx *msg_ctx = NULL;
         char *payload = NULL;
-       size_t payload_len;
+       size_t payload_len = 0;
         struct ecryptfs_message *msg;
         int rc;
  
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c

index 9f1bb747d77dd17bbcc907b3e249f3865cf4d31e..b4a6befb12168d6b0aa0bb7411794e062ded8734 100644 (file)
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -175,6 +175,7 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig,
         ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig,
         ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes,
         ecryptfs_opt_unlink_sigs, ecryptfs_opt_mount_auth_tok_only,
+       ecryptfs_opt_check_dev_ruid,
         ecryptfs_opt_err };
  
  static const match_table_t tokens = {
@@ -191,6 +192,7 @@ static const match_table_t tokens = {
         {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"},
         {ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"},
         {ecryptfs_opt_mount_auth_tok_only, "ecryptfs_mount_auth_tok_only"},
+       {ecryptfs_opt_check_dev_ruid, "ecryptfs_check_dev_ruid"},
         {ecryptfs_opt_err, NULL}
  };
  
@@ -236,6 +238,7 @@ static void ecryptfs_init_mount_crypt_stat(
   * ecryptfs_parse_options
   * @sb: The ecryptfs super block
   * @options: The options passed to the kernel
+ * @check_ruid: set to 1 if device uid should be checked against the ruid
   *
   * Parse mount options:
   * debug=N        - ecryptfs_verbosity level for debug output
@@ -251,7 +254,8 @@ static void ecryptfs_init_mount_crypt_stat(
   *
   * Returns zero on success; non-zero on error
   */
-static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options)
+static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
+                                 uid_t *check_ruid)
  {
         char *p;
         int rc = 0;
@@ -276,6 +280,8 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options)
         char *cipher_key_bytes_src;
         char *fn_cipher_key_bytes_src;
  
+       *check_ruid = 0;
+
         if (!options) {
                 rc = -EINVAL;
                 goto out;
@@ -380,6 +386,9 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options)
                         mount_crypt_stat->flags |=
                                 ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY;
                         break;
+               case ecryptfs_opt_check_dev_ruid:
+                       *check_ruid = 1;
+                       break;
                 case ecryptfs_opt_err:
                 default:
                         printk(KERN_WARNING
@@ -475,6 +484,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
         const char *err = "Getting sb failed";
         struct inode *inode;
         struct path path;
+       uid_t check_ruid;
         int rc;
  
         sbi = kmem_cache_zalloc(ecryptfs_sb_info_cache, GFP_KERNEL);
@@ -483,7 +493,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
                 goto out;
         }
  
-       rc = ecryptfs_parse_options(sbi, raw_data);
+       rc = ecryptfs_parse_options(sbi, raw_data, &check_ruid);
         if (rc) {
                 err = "Error parsing options";
                 goto out;
@@ -521,6 +531,15 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
                         "known incompatibilities\n");
                 goto out_free;
         }
+
+       if (check_ruid && path.dentry->d_inode->i_uid != current_uid()) {
+               rc = -EPERM;
+               printk(KERN_ERR "Mount of device (uid: %d) not owned by "
+                      "requested user (uid: %d)\n",
+                      path.dentry->d_inode->i_uid, current_uid());
+               goto out_free;
+       }
+
         ecryptfs_set_superblock_lower(s, path.dentry->d_sb);
         s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
         s->s_blocksize = path.dentry->d_sb->s_blocksize;
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c

index 85d43096311625b37dd12beeeab7149a00899d0e..3745f7c2b9c214756b778ab40a00af325c0e9e99 100644 (file)
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -39,15 +39,16 @@
  int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
                          loff_t offset, size_t size)
  {
-       struct ecryptfs_inode_info *inode_info;
+       struct file *lower_file;
         mm_segment_t fs_save;
         ssize_t rc;
  
-       inode_info = ecryptfs_inode_to_private(ecryptfs_inode);
-       BUG_ON(!inode_info->lower_file);
+       lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file;
+       if (!lower_file)
+               return -EIO;
         fs_save = get_fs();
         set_fs(get_ds());
-       rc = vfs_write(inode_info->lower_file, data, size, &offset);
+       rc = vfs_write(lower_file, data, size, &offset);
         set_fs(fs_save);
         mark_inode_dirty_sync(ecryptfs_inode);
         return rc;
@@ -225,15 +226,16 @@ out:
  int ecryptfs_read_lower(char *data, loff_t offset, size_t size,
                         struct inode *ecryptfs_inode)
  {
-       struct ecryptfs_inode_info *inode_info =
-               ecryptfs_inode_to_private(ecryptfs_inode);
+       struct file *lower_file;
         mm_segment_t fs_save;
         ssize_t rc;
  
-       BUG_ON(!inode_info->lower_file);
+       lower_file = ecryptfs_inode_to_private(ecryptfs_inode)->lower_file;
+       if (!lower_file)
+               return -EIO;
         fs_save = get_fs();
         set_fs(get_ds());
-       rc = vfs_read(inode_info->lower_file, data, size, &offset);
+       rc = vfs_read(lower_file, data, size, &offset);
         set_fs(fs_save);
         return rc;
  }
diff --git a/fs/exec.c b/fs/exec.c

index da80612a35f42e9bc1bb2d09fe79090e44b3808b..25dcbe5fc35664d6f389ce48f051d6528d607014 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1459,6 +1459,23 @@ static int do_execve_common(const char *filename,
         struct files_struct *displaced;
         bool clear_in_exec;
         int retval;
+       const struct cred *cred = current_cred();
+
+       /*
+        * We move the actual failure in case of RLIMIT_NPROC excess from
+        * set*uid() to execve() because too many poorly written programs
+        * don't check setuid() return code.  Here we additionally recheck
+        * whether NPROC limit is still exceeded.
+        */
+       if ((current->flags & PF_NPROC_EXCEEDED) &&
+           atomic_read(&cred->user->processes) > rlimit(RLIMIT_NPROC)) {
+               retval = -EAGAIN;
+               goto out_ret;
+       }
+
+       /* We're below the limit (still or again), so we don't want to make
+        * further execve() calls fail. */
+       current->flags &= ~PF_NPROC_EXCEEDED;
  
         retval = unshare_files(&displaced);
         if (retval)
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c

index 6e18a0b7750db81d5900ad813614e8455bf4b82d..5571708b6a58f7e33e4bddf7130a133d6f952a9d 100644 (file)
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2209,9 +2209,11 @@ static int ext3_symlink (struct inode * dir,
                 /*
                  * For non-fast symlinks, we just allocate inode and put it on
                  * orphan list in the first transaction => we need bitmap,
-                * group descriptor, sb, inode block, quota blocks.
+                * group descriptor, sb, inode block, quota blocks, and
+                * possibly selinux xattr blocks.
                  */
-               credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
+               credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
+                         EXT3_XATTR_TRANS_BLOCKS;
         } else {
                 /*
                  * Fast symlink. We have to add entry to directory
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h

index e717dfd2f2b4b2e1871e4c66452202b89191ba61..b7d7bd0f066ef4d02d9cba1cf2f72704cb44a4f5 100644 (file)
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -175,6 +175,7 @@ struct mpage_da_data {
   */
  #define        EXT4_IO_END_UNWRITTEN   0x0001
  #define EXT4_IO_END_ERROR      0x0002
+#define EXT4_IO_END_QUEUED     0x0004
  
  struct ext4_io_page {
         struct page     *p_page;
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h

index bb85757689b6af513dd556559a407049a1fc9782..5802fa1dab18f4a8aac358bd79f3ae92109daf02 100644 (file)
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -289,10 +289,10 @@ static inline int ext4_should_order_data(struct inode *inode)
  
  static inline int ext4_should_writeback_data(struct inode *inode)
  {
-       if (!S_ISREG(inode->i_mode))
-               return 0;
         if (EXT4_JOURNAL(inode) == NULL)
                 return 1;
+       if (!S_ISREG(inode->i_mode))
+               return 0;
         if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
                 return 0;
         if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c

index b8602cde5b5af7d11f2fe3e62cc88661b8fc52b9..0962642119c0475953fc70da38a5ed9bbea417a2 100644 (file)
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -800,12 +800,17 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
         }
  
  retry:
-       if (rw == READ && ext4_should_dioread_nolock(inode))
+       if (rw == READ && ext4_should_dioread_nolock(inode)) {
+               if (unlikely(!list_empty(&ei->i_completed_io_list))) {
+                       mutex_lock(&inode->i_mutex);
+                       ext4_flush_completed_IO(inode);
+                       mutex_unlock(&inode->i_mutex);
+               }
                 ret = __blockdev_direct_IO(rw, iocb, inode,
                                  inode->i_sb->s_bdev, iov,
                                  offset, nr_segs,
                                  ext4_get_block, NULL, NULL, 0);
-       else {
+       } else {
                 ret = blockdev_direct_IO(rw, iocb, inode, iov,
                                  offset, nr_segs, ext4_get_block);
  
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index d47264cafee00e2f95e90460d67c42c2bc1d7656..18d2558b7624b8cf8b16689011ca2049e8e15083 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -120,6 +120,9 @@ void ext4_evict_inode(struct inode *inode)
         int err;
  
         trace_ext4_evict_inode(inode);
+
+       ext4_ioend_wait(inode);
+
         if (inode->i_nlink) {
                 /*
                  * When journalling data dirty buffers are tracked only in the
@@ -983,6 +986,8 @@ static int ext4_journalled_write_end(struct file *file,
         from = pos & (PAGE_CACHE_SIZE - 1);
         to = from + len;
  
+       BUG_ON(!ext4_handle_valid(handle));
+
         if (copied < len) {
                 if (!PageUptodate(page))
                         copied = 0;
@@ -1283,7 +1288,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
                         else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT))
                                 err = ext4_bio_write_page(&io_submit, page,
                                                           len, mpd->wbc);
-                       else
+                       else if (buffer_uninit(page_bufs)) {
+                               ext4_set_bh_endio(page_bufs, inode);
+                               err = block_write_full_page_endio(page,
+                                       noalloc_get_block_write,
+                                       mpd->wbc, ext4_end_io_buffer_write);
+                       } else
                                 err = block_write_full_page(page,
                                         noalloc_get_block_write, mpd->wbc);
  
@@ -1699,6 +1709,8 @@ static int __ext4_journalled_writepage(struct page *page,
                 goto out;
         }
  
+       BUG_ON(!ext4_handle_valid(handle));
+
         ret = walk_page_buffers(handle, page_bufs, 0, len, NULL,
                                 do_journal_get_write_access);
  
@@ -2668,8 +2680,15 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
                 goto out;
         }
  
-       io_end->flag = EXT4_IO_END_UNWRITTEN;
+       /*
+        * It may be over-defensive here to check EXT4_IO_END_UNWRITTEN now,
+        * but being more careful is always safe for the future change.
+        */
         inode = io_end->inode;
+       if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+               io_end->flag |= EXT4_IO_END_UNWRITTEN;
+               atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
+       }
  
         /* Add the io_end to per-inode completed io list*/
         spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c

index 565a154e22d4bee058d8853cb02539e02509bc2f..f8068c7bae9fd05c7d94880dd909c1f17df7dad5 100644 (file)
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2253,9 +2253,11 @@ static int ext4_symlink(struct inode *dir,
                 /*
                  * For non-fast symlinks, we just allocate inode and put it on
                  * orphan list in the first transaction => we need bitmap,
-                * group descriptor, sb, inode block, quota blocks.
+                * group descriptor, sb, inode block, quota blocks, and
+                * possibly selinux xattr blocks.
                  */
-               credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
+               credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
+                         EXT4_XATTR_TRANS_BLOCKS;
         } else {
                 /*
                  * Fast symlink. We have to add entry to directory
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c

index 430c401d089514b6297d5ad88ea2d3c00b9dbb7b..92f38ee13f8a9efacadcfac2fe8d674d5500aebc 100644 (file)
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -142,7 +142,23 @@ static void ext4_end_io_work(struct work_struct *work)
         unsigned long           flags;
         int                     ret;
  
-       mutex_lock(&inode->i_mutex);
+       if (!mutex_trylock(&inode->i_mutex)) {
+               /*
+                * Requeue the work instead of waiting so that the work
+                * items queued after this can be processed.
+                */
+               queue_work(EXT4_SB(inode->i_sb)->dio_unwritten_wq, &io->work);
+               /*
+                * To prevent the ext4-dio-unwritten thread from keeping
+                * requeueing end_io requests and occupying cpu for too long,
+                * yield the cpu if it sees an end_io request that has already
+                * been requeued.
+                */
+               if (io->flag & EXT4_IO_END_QUEUED)
+                       yield();
+               io->flag |= EXT4_IO_END_QUEUED;
+               return;
+       }
         ret = ext4_end_io_nolock(io);
         if (ret < 0) {
                 mutex_unlock(&inode->i_mutex);
@@ -334,8 +350,10 @@ submit_and_retry:
         if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
             (io_end->pages[io_end->num_io_pages-1] != io_page))
                 goto submit_and_retry;
-       if (buffer_uninit(bh))
-               io->io_end->flag |= EXT4_IO_END_UNWRITTEN;
+       if (buffer_uninit(bh) && !(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+               io_end->flag |= EXT4_IO_END_UNWRITTEN;
+               atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
+       }
         io->io_end->size += bh->b_size;
         io->io_next_block++;
         ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
diff --git a/fs/ext4/super.c b/fs/ext4/super.c

index 4687fea0c00f24498ff58c80e7d39d9c086bd62a..44d0c8db2239f958d08d4de4462cdc6ca936d1e1 100644 (file)
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -919,7 +919,6 @@ static void ext4_i_callback(struct rcu_head *head)
  
  static void ext4_destroy_inode(struct inode *inode)
  {
-       ext4_ioend_wait(inode);
         if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
                 ext4_msg(inode->i_sb, KERN_ERR,
                          "Inode %lu (%p): orphan list check failed!",
diff --git a/fs/fat/dir.c b/fs/fat/dir.c

index 4ad64732cbced5460d238d637f54f596e4ab0a07..5efbd5d7701a1d63721f646958d92f36830807f0 100644 (file)
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -1231,7 +1231,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots,
         struct super_block *sb = dir->i_sb;
         struct msdos_sb_info *sbi = MSDOS_SB(sb);
         struct buffer_head *bh, *prev, *bhs[3]; /* 32*slots (672bytes) */
-       struct msdos_dir_entry *de;
+       struct msdos_dir_entry *uninitialized_var(de);
         int err, free_slots, i, nr_bhs;
         loff_t pos, i_pos;
  
diff --git a/fs/fat/inode.c b/fs/fat/inode.c

index 5942fec22c65ddacec9efd25467ae039784884c6..1726d7303047e1966bc4991b5264113a2dcb7a0d 100644 (file)
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1188,9 +1188,9 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
  out:
         /* UTF-8 doesn't provide FAT semantics */
         if (!strcmp(opts->iocharset, "utf8")) {
-               fat_msg(sb, KERN_ERR, "utf8 is not a recommended IO charset"
+               fat_msg(sb, KERN_WARNING, "utf8 is not a recommended IO charset"
                        " for FAT filesystems, filesystem will be "
-                      "case sensitive!\n");
+                      "case sensitive!");
         }
  
         /* If user doesn't specify allow_utime, it's initialized from dmask. */
@@ -1367,6 +1367,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
         sbi->free_clusters = -1;        /* Don't know yet */
         sbi->free_clus_valid = 0;
         sbi->prev_free = FAT_START_ENT;
+       sb->s_maxbytes = 0xffffffff;
  
         if (!sbi->fat_length && b->fat32_length) {
                 struct fat_boot_fsinfo *fsinfo;
@@ -1377,8 +1378,6 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
                 sbi->fat_length = le32_to_cpu(b->fat32_length);
                 sbi->root_cluster = le32_to_cpu(b->root_cluster);
  
-               sb->s_maxbytes = 0xffffffff;
-
                 /* MC - if info_sector is 0, don't multiply by 0 */
                 sbi->fsinfo_sector = le16_to_cpu(b->info_sector);
                 if (sbi->fsinfo_sector == 0)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c

index 640fc229df10323b6c9fc94b0bda452157bf1ef0..5cb8614508c339fb5e0c18031f3ebf249a4ec6c0 100644 (file)
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -258,10 +258,14 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
         forget->forget_one.nlookup = nlookup;
  
         spin_lock(&fc->lock);
-       fc->forget_list_tail->next = forget;
-       fc->forget_list_tail = forget;
-       wake_up(&fc->waitq);
-       kill_fasync(&fc->fasync, SIGIO, POLL_IN);
+       if (fc->connected) {
+               fc->forget_list_tail->next = forget;
+               fc->forget_list_tail = forget;
+               wake_up(&fc->waitq);
+               kill_fasync(&fc->fasync, SIGIO, POLL_IN);
+       } else {
+               kfree(forget);
+       }
         spin_unlock(&fc->lock);
  }
  
@@ -1358,6 +1362,10 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
         if (outarg.namelen > FUSE_NAME_MAX)
                 goto err;
  
+       err = -EINVAL;
+       if (size != sizeof(outarg) + outarg.namelen + 1)
+               goto err;
+
         name.name = buf;
         name.len = outarg.namelen;
         err = fuse_copy_one(cs, buf, outarg.namelen + 1);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c

index d480d9af46c964f8b86ced68feb7e787959a4fdb..594f07a81c2899ba33a173be33cfc818afa0d39b 100644 (file)
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -14,6 +14,7 @@
  #include <linux/sched.h>
  #include <linux/module.h>
  #include <linux/compat.h>
+#include <linux/swap.h>
  
  static const struct file_operations fuse_direct_io_file_operations;
  
@@ -245,6 +246,12 @@ void fuse_release_common(struct file *file, int opcode)
         req = ff->reserved_req;
         fuse_prepare_release(ff, file->f_flags, opcode);
  
+       if (ff->flock) {
+               struct fuse_release_in *inarg = &req->misc.release.in;
+               inarg->release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
+               inarg->lock_owner = fuse_lock_owner_id(ff->fc,
+                                                      (fl_owner_t) file);
+       }
         /* Hold vfsmount and dentry until release is finished */
         path_get(&file->f_path);
         req->misc.release.path = file->f_path;
@@ -755,18 +762,6 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file,
         return req->misc.write.out.size;
  }
  
-static int fuse_write_begin(struct file *file, struct address_space *mapping,
-                       loff_t pos, unsigned len, unsigned flags,
-                       struct page **pagep, void **fsdata)
-{
-       pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-
-       *pagep = grab_cache_page_write_begin(mapping, index, flags);
-       if (!*pagep)
-               return -ENOMEM;
-       return 0;
-}
-
  void fuse_write_update_size(struct inode *inode, loff_t pos)
  {
         struct fuse_conn *fc = get_fuse_conn(inode);
@@ -779,62 +774,6 @@ void fuse_write_update_size(struct inode *inode, loff_t pos)
         spin_unlock(&fc->lock);
  }
  
-static int fuse_buffered_write(struct file *file, struct inode *inode,
-                              loff_t pos, unsigned count, struct page *page)
-{
-       int err;
-       size_t nres;
-       struct fuse_conn *fc = get_fuse_conn(inode);
-       unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
-       struct fuse_req *req;
-
-       if (is_bad_inode(inode))
-               return -EIO;
-
-       /*
-        * Make sure writepages on the same page are not mixed up with
-        * plain writes.
-        */
-       fuse_wait_on_page_writeback(inode, page->index);
-
-       req = fuse_get_req(fc);
-       if (IS_ERR(req))
-               return PTR_ERR(req);
-
-       req->in.argpages = 1;
-       req->num_pages = 1;
-       req->pages[0] = page;
-       req->page_offset = offset;
-       nres = fuse_send_write(req, file, pos, count, NULL);
-       err = req->out.h.error;
-       fuse_put_request(fc, req);
-       if (!err && !nres)
-               err = -EIO;
-       if (!err) {
-               pos += nres;
-               fuse_write_update_size(inode, pos);
-               if (count == PAGE_CACHE_SIZE)
-                       SetPageUptodate(page);
-       }
-       fuse_invalidate_attr(inode);
-       return err ? err : nres;
-}
-
-static int fuse_write_end(struct file *file, struct address_space *mapping,
-                       loff_t pos, unsigned len, unsigned copied,
-                       struct page *page, void *fsdata)
-{
-       struct inode *inode = mapping->host;
-       int res = 0;
-
-       if (copied)
-               res = fuse_buffered_write(file, inode, pos, copied, page);
-
-       unlock_page(page);
-       page_cache_release(page);
-       return res;
-}
-
  static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
                                     struct inode *inode, loff_t pos,
                                     size_t count)
@@ -908,6 +847,8 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
                 pagefault_enable();
                 flush_dcache_page(page);
  
+               mark_page_accessed(page);
+
                 if (!tmp) {
                         unlock_page(page);
                         page_cache_release(page);
@@ -1559,11 +1500,14 @@ static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl)
         struct fuse_conn *fc = get_fuse_conn(inode);
         int err;
  
-       if (fc->no_lock) {
+       if (fc->no_flock) {
                 err = flock_lock_file_wait(file, fl);
         } else {
+               struct fuse_file *ff = file->private_data;
+
                 /* emulate flock with POSIX locks */
                 fl->fl_owner = (fl_owner_t) file;
+               ff->flock = true;
                 err = fuse_setlk(file, fl, 1);
         }
  
@@ -2201,8 +2145,6 @@ static const struct address_space_operations fuse_file_aops  = {
         .readpage       = fuse_readpage,
         .writepage      = fuse_writepage,
         .launder_page   = fuse_launder_page,
-       .write_begin    = fuse_write_begin,
-       .write_end      = fuse_write_end,
         .readpages      = fuse_readpages,
         .set_page_dirty = __set_page_dirty_nobuffers,
         .bmap           = fuse_bmap,
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h

index c6aa2d4b851733a250205734215c785be6944507..cf6db0a932192c30d6dc65a327cca84e7258a5dc 100644 (file)
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -135,6 +135,9 @@ struct fuse_file {
  
         /** Wait queue head for poll */
         wait_queue_head_t poll_wait;
+
+       /** Has flock been performed on this file? */
+       bool flock:1;
  };
  
  /** One input argument of a request */
@@ -448,7 +451,7 @@ struct fuse_conn {
         /** Is removexattr not implemented by fs? */
         unsigned no_removexattr:1;
  
-       /** Are file locking primitives not implemented by fs? */
+       /** Are posix file locking primitives not implemented by fs? */
         unsigned no_lock:1;
  
         /** Is access not implemented by fs? */
@@ -472,6 +475,9 @@ struct fuse_conn {
         /** Don't apply umask to creation modes */
         unsigned dont_mask:1;
  
+       /** Are BSD file locking primitives not implemented by fs? */
+       unsigned no_flock:1;
+
         /** The number of requests waiting for completion */
         atomic_t num_waiting;
  
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c

index 38f84cd48b67d057798f8f75fe5c8f22f12b10dc..add96f6ffda563738a90b7a7a1572a9a1e2f3c5b 100644 (file)
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -71,7 +71,7 @@ struct fuse_mount_data {
         unsigned blksize;
  };
  
-struct fuse_forget_link *fuse_alloc_forget()
+struct fuse_forget_link *fuse_alloc_forget(void)
  {
         return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL);
  }
@@ -809,6 +809,13 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
                                 fc->async_read = 1;
                         if (!(arg->flags & FUSE_POSIX_LOCKS))
                                 fc->no_lock = 1;
+                       if (arg->minor >= 17) {
+                               if (!(arg->flags & FUSE_FLOCK_LOCKS))
+                                       fc->no_flock = 1;
+                       } else {
+                               if (!(arg->flags & FUSE_POSIX_LOCKS))
+                                       fc->no_flock = 1;
+                       }
                         if (arg->flags & FUSE_ATOMIC_O_TRUNC)
                                 fc->atomic_o_trunc = 1;
                         if (arg->minor >= 9) {
@@ -823,6 +830,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
                 } else {
                         ra_pages = fc->max_read / PAGE_CACHE_SIZE;
                         fc->no_lock = 1;
+                       fc->no_flock = 1;
                 }
  
                 fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages);
@@ -843,7 +851,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
         arg->minor = FUSE_KERNEL_MINOR_VERSION;
         arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
         arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
-               FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK;
+               FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
+               FUSE_FLOCK_LOCKS;
         req->in.h.opcode = FUSE_INIT;
         req->in.numargs = 1;
         req->in.args[0].size = sizeof(*arg);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c

index c106ca22e81260ece215747e90a63acded0f864b..d24a9b666a23cc58081da7eed23038b7929da7bb 100644 (file)
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -344,6 +344,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
         struct inode *root, *inode;
         struct qstr str;
         struct nls_table *nls = NULL;
+       u64 last_fs_block, last_fs_page;
         int err;
  
         err = -EINVAL;
@@ -399,9 +400,13 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
         if (!sbi->rsrc_clump_blocks)
                 sbi->rsrc_clump_blocks = 1;
  
-       err = generic_check_addressable(sbi->alloc_blksz_shift,
-                                       sbi->total_blocks);
-       if (err) {
+       err = -EFBIG;
+       last_fs_block = sbi->total_blocks - 1;
+       last_fs_page = (last_fs_block << sbi->alloc_blksz_shift) >>
+                       PAGE_CACHE_SHIFT;
+
+       if ((last_fs_block > (sector_t)(~0ULL) >> (sbi->alloc_blksz_shift - 9)) ||
+           (last_fs_page > (pgoff_t)(~0ULL))) {
                 printk(KERN_ERR "hfs: filesystem size too large.\n");
                 goto out_free_vhdr;
         }
@@ -525,8 +530,8 @@ out_close_cat_tree:
  out_close_ext_tree:
         hfs_btree_close(sbi->ext_tree);
  out_free_vhdr:
-       kfree(sbi->s_vhdr);
-       kfree(sbi->s_backup_vhdr);
+       kfree(sbi->s_vhdr_buf);
+       kfree(sbi->s_backup_vhdr_buf);
  out_unload_nls:
         unload_nls(sbi->nls);
         unload_nls(nls);
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c

index 10e515a0d452aa1d2b0dce1bfd2d1e41924b9b68..7daf4b852d1c78ca89c31d791e9eddeb9dcf1a0d 100644 (file)
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -272,9 +272,9 @@ reread:
         return 0;
  
  out_free_backup_vhdr:
-       kfree(sbi->s_backup_vhdr);
+       kfree(sbi->s_backup_vhdr_buf);
  out_free_vhdr:
-       kfree(sbi->s_vhdr);
+       kfree(sbi->s_vhdr_buf);
  out:
         return error;
  }
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c

index 87b6e0421c12b8a44b75bf0cf79c198893303552..ec889538e5a6afe4014922e9468845195ea56193 100644 (file)
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -491,6 +491,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
                         inode->i_op = &page_symlink_inode_operations;
                         break;
                 }
+               lockdep_annotate_inode_mutex_key(inode);
         }
         return inode;
  }
diff --git a/fs/inode.c b/fs/inode.c

index 73920d555c8890b2bd0fd208ef2b469af142e358..ec7924696a139870c70a0971c8d2d1483a747ade 100644 (file)
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -848,16 +848,9 @@ struct inode *new_inode(struct super_block *sb)
  }
  EXPORT_SYMBOL(new_inode);
  
-/**
- * unlock_new_inode - clear the I_NEW state and wake up any waiters
- * @inode:     new inode to unlock
- *
- * Called when the inode is fully initialised to clear the new state of the
- * inode and wake up anyone waiting for the inode to finish initialisation.
- */
-void unlock_new_inode(struct inode *inode)
-{
  #ifdef CONFIG_DEBUG_LOCK_ALLOC
+void lockdep_annotate_inode_mutex_key(struct inode *inode)
+{
         if (S_ISDIR(inode->i_mode)) {
                 struct file_system_type *type = inode->i_sb->s_type;
  
@@ -873,7 +866,20 @@ void unlock_new_inode(struct inode *inode)
                                           &type->i_mutex_dir_key);
                 }
         }
+}
+EXPORT_SYMBOL(lockdep_annotate_inode_mutex_key);
  #endif
+
+/**
+ * unlock_new_inode - clear the I_NEW state and wake up any waiters
+ * @inode:     new inode to unlock
+ *
+ * Called when the inode is fully initialised to clear the new state of the
+ * inode and wake up anyone waiting for the inode to finish initialisation.
+ */
+void unlock_new_inode(struct inode *inode)
+{
+       lockdep_annotate_inode_mutex_key(inode);
         spin_lock(&inode->i_lock);
         WARN_ON(!(inode->i_state & I_NEW));
         inode->i_state &= ~I_NEW;
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c

index adcf92d3b6035f0396f4a3428aa91f0fddae1055..7971f37534a359e0dcfed248cb56e56a14c7fac7 100644 (file)
--- a/fs/jfs/jfs_umount.c
+++ b/fs/jfs/jfs_umount.c
@@ -68,7 +68,7 @@ int jfs_umount(struct super_block *sb)
                 /*
                  * Wait for outstanding transactions to be written to log:
                  */
-               jfs_flush_journal(log, 1);
+               jfs_flush_journal(log, 2);
  
         /*
          * close fileset inode allocation map (aka fileset inode)
@@ -146,7 +146,7 @@ int jfs_umount_rw(struct super_block *sb)
          *
          * remove file system from log active file system list.
          */
-       jfs_flush_journal(log, 1);
+       jfs_flush_journal(log, 2);
  
         /*
          * Make sure all metadata makes it to disk
diff --git a/fs/namei.c b/fs/namei.c

index 2826db35dc257bb15241376e3b6611582b2e4c92..f4788365ea222676303b9fd7c2b40d4c2040fed1 100644 (file)
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -727,25 +727,22 @@ static int follow_automount(struct path *path, unsigned flags,
         if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT))
                 return -EISDIR; /* we actually want to stop here */
  
-       /*
-        * We don't want to mount if someone's just doing a stat and they've
-        * set AT_SYMLINK_NOFOLLOW - unless they're stat'ing a directory and
-        * appended a '/' to the name.
+       /* We don't want to mount if someone's just doing a stat -
+        * unless they're stat'ing a directory and appended a '/' to
+        * the name.
+        *
+        * We do, however, want to mount if someone wants to open or
+        * create a file of any type under the mountpoint, wants to
+        * traverse through the mountpoint or wants to open the
+        * mounted directory.  Also, autofs may mark negative dentries
+        * as being automount points.  These will need the attentions
+        * of the daemon to instantiate them before they can be used.
          */
-       if (!(flags & LOOKUP_FOLLOW)) {
-               /* We do, however, want to mount if someone wants to open or
-                * create a file of any type under the mountpoint, wants to
-                * traverse through the mountpoint or wants to open the mounted
-                * directory.
-                * Also, autofs may mark negative dentries as being automount
-                * points.  These will need the attentions of the daemon to
-                * instantiate them before they can be used.
-                */
-               if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
-                            LOOKUP_OPEN | LOOKUP_CREATE)) &&
-                   path->dentry->d_inode)
-                       return -EISDIR;
-       }
+       if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
+                    LOOKUP_OPEN | LOOKUP_CREATE)) &&
+           path->dentry->d_inode)
+               return -EISDIR;
+
         current->total_link_count++;
         if (current->total_link_count >= 40)
                 return -ELOOP;
@@ -2619,6 +2616,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
         if (!dir->i_op->rmdir)
                 return -EPERM;
  
+       dget(dentry);
         mutex_lock(&dentry->d_inode->i_mutex);
  
         error = -EBUSY;
@@ -2639,6 +2637,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
  
  out:
         mutex_unlock(&dentry->d_inode->i_mutex);
+       dput(dentry);
         if (!error)
                 d_delete(dentry);
         return error;
@@ -3028,6 +3027,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
         if (error)
                 return error;
  
+       dget(new_dentry);
         if (target)
                 mutex_lock(&target->i_mutex);
  
@@ -3048,6 +3048,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
  out:
         if (target)
                 mutex_unlock(&target->i_mutex);
+       dput(new_dentry);
         if (!error)
                 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
                         d_move(old_dentry,new_dentry);
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig

index be020771c6b46188b9c4534a99c4014c76ed5957..dbcd82126aed309026dc55f33754f8209ceaaeb5 100644 (file)
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -79,12 +79,9 @@ config NFS_V4_1
         depends on NFS_FS && NFS_V4 && EXPERIMENTAL
         select SUNRPC_BACKCHANNEL
         select PNFS_FILE_LAYOUT
-       select PNFS_BLOCK
-       select MD
-       select BLK_DEV_DM
         help
           This option enables support for minor version 1 of the NFSv4 protocol
-         (RFC 5661 and RFC 5663) in the kernel's NFS client.
+         (RFC 5661) in the kernel's NFS client.
  
           If unsure, say N.
  
@@ -93,16 +90,13 @@ config PNFS_FILE_LAYOUT
  
  config PNFS_BLOCK
         tristate
+       depends on NFS_FS && NFS_V4_1 && BLK_DEV_DM
+       default m
  
  config PNFS_OBJLAYOUT
-       tristate "Provide support for the pNFS Objects Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)"
+       tristate
         depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD
-       help
-         Say M here if you want your pNFS client to support the Objects Layout Driver.
-         Requires the SCSI osd initiator library (SCSI_OSD_INITIATOR) and
-         upper level driver (SCSI_OSD_ULD).
-
-         If unsure, say N.
+       default m
  
  config ROOT_NFS
         bool "Root file system on NFS"
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c

index e56564d2ef95a37608106fb439723ff3e3c1e90d..9561c8fc8bdb6d7fed807d94f6cb5c2e3bb52276 100644 (file)
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -36,6 +36,7 @@
  #include <linux/namei.h>
  #include <linux/bio.h>         /* struct bio */
  #include <linux/buffer_head.h> /* various write calls */
+#include <linux/prefetch.h>
  
  #include "blocklayout.h"
  
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h

index b257383bb565a1331a68197a53d45545d436225e..07df5f1d85e5188b16d51f13049dd925870b5238 100644 (file)
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -38,6 +38,7 @@ enum nfs4_callback_opnum {
  struct cb_process_state {
         __be32                  drc_status;
         struct nfs_client       *clp;
+       int                     slotid;
  };
  
  struct cb_compound_hdr_arg {
@@ -166,7 +167,6 @@ extern unsigned nfs4_callback_layoutrecall(
         void *dummy, struct cb_process_state *cps);
  
  extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses);
-extern void nfs4_cb_take_slot(struct nfs_client *clp);
  
  struct cb_devicenotifyitem {
         uint32_t                cbd_notify_type;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c

index 74780f9f852c6d3c5a01d71c3e2461ace4322963..43926add945b0a4a88625b23062e1cdd393a743d 100644 (file)
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -348,7 +348,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
         /* Normal */
         if (likely(args->csa_sequenceid == slot->seq_nr + 1)) {
                 slot->seq_nr++;
-               return htonl(NFS4_OK);
+               goto out_ok;
         }
  
         /* Replay */
@@ -367,11 +367,14 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
         /* Wraparound */
         if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) {
                 slot->seq_nr = 1;
-               return htonl(NFS4_OK);
+               goto out_ok;
         }
  
         /* Misordered request */
         return htonl(NFS4ERR_SEQ_MISORDERED);
+out_ok:
+       tbl->highest_used_slotid = args->csa_slotid;
+       return htonl(NFS4_OK);
  }
  
  /*
@@ -433,26 +436,37 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
                               struct cb_sequenceres *res,
                               struct cb_process_state *cps)
  {
+       struct nfs4_slot_table *tbl;
         struct nfs_client *clp;
         int i;
         __be32 status = htonl(NFS4ERR_BADSESSION);
  
-       cps->clp = NULL;
-
         clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid);
         if (clp == NULL)
                 goto out;
  
+       tbl = &clp->cl_session->bc_slot_table;
+
+       spin_lock(&tbl->slot_tbl_lock);
         /* state manager is resetting the session */
         if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) {
-               status = NFS4ERR_DELAY;
+               spin_unlock(&tbl->slot_tbl_lock);
+               status = htonl(NFS4ERR_DELAY);
+               /* Return NFS4ERR_BADSESSION if we're draining the session
+                * in order to reset it.
+                */
+               if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
+                       status = htonl(NFS4ERR_BADSESSION);
                 goto out;
         }
  
         status = validate_seqid(&clp->cl_session->bc_slot_table, args);
+       spin_unlock(&tbl->slot_tbl_lock);
         if (status)
                 goto out;
  
+       cps->slotid = args->csa_slotid;
+
         /*
          * Check for pending referring calls.  If a match is found, a
          * related callback was received before the response to the original
@@ -469,7 +483,6 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
         res->csr_slotid = args->csa_slotid;
         res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
         res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
-       nfs4_cb_take_slot(clp);
  
  out:
         cps->clp = clp; /* put in nfs4_callback_compound */
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c

index c6c86a77e043fe938c40df750314116b8a3fcbb0..918ad647afeaf4fe62840ea8d3146bb6ed87f406 100644 (file)
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -754,26 +754,15 @@ static void nfs4_callback_free_slot(struct nfs4_session *session)
          * Let the state manager know callback processing done.
          * A single slot, so highest used slotid is either 0 or -1
          */
-       tbl->highest_used_slotid--;
+       tbl->highest_used_slotid = -1;
         nfs4_check_drain_bc_complete(session);
         spin_unlock(&tbl->slot_tbl_lock);
  }
  
-static void nfs4_cb_free_slot(struct nfs_client *clp)
+static void nfs4_cb_free_slot(struct cb_process_state *cps)
  {
-       if (clp && clp->cl_session)
-               nfs4_callback_free_slot(clp->cl_session);
-}
-
-/* A single slot, so highest used slotid is either 0 or -1 */
-void nfs4_cb_take_slot(struct nfs_client *clp)
-{
-       struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table;
-
-       spin_lock(&tbl->slot_tbl_lock);
-       tbl->highest_used_slotid++;
-       BUG_ON(tbl->highest_used_slotid != 0);
-       spin_unlock(&tbl->slot_tbl_lock);
+       if (cps->slotid != -1)
+               nfs4_callback_free_slot(cps->clp->cl_session);
  }
  
  #else /* CONFIG_NFS_V4_1 */
@@ -784,7 +773,7 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
         return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
  }
  
-static void nfs4_cb_free_slot(struct nfs_client *clp)
+static void nfs4_cb_free_slot(struct cb_process_state *cps)
  {
  }
  #endif /* CONFIG_NFS_V4_1 */
@@ -866,6 +855,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
         struct cb_process_state cps = {
                 .drc_status = 0,
                 .clp = NULL,
+               .slotid = -1,
         };
         unsigned int nops = 0;
  
@@ -906,7 +896,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
  
         *hdr_res.status = status;
         *hdr_res.nops = htonl(nops);
-       nfs4_cb_free_slot(cps.clp);
+       nfs4_cb_free_slot(&cps);
         nfs_put_client(cps.clp);
         dprintk("%s: done, status = %u\n", __func__, ntohl(status));
         return rpc_success;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h

index 1ec1a85fa71c027b2523dcac7636a0a43a552165..3e93e9a1bee14073c1adf5c8bab9a5a652e0f371 100644 (file)
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -56,6 +56,9 @@ enum nfs4_session_state {
         NFS4_SESSION_DRAINING,
  };
  
+#define NFS4_RENEW_TIMEOUT             0x01
+#define NFS4_RENEW_DELEGATION_CB       0x02
+
  struct nfs4_minor_version_ops {
         u32     minor_version;
  
@@ -225,7 +228,7 @@ struct nfs4_state_recovery_ops {
  };
  
  struct nfs4_state_maintenance_ops {
-       int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *);
+       int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *, unsigned);
         struct rpc_cred * (*get_state_renewal_cred_locked)(struct nfs_client *);
         int (*renew_lease)(struct nfs_client *, struct rpc_cred *);
  };
@@ -237,8 +240,6 @@ extern const struct inode_operations nfs4_dir_inode_operations;
  extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
  extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
  extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
-extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
-extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
  extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
  extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
  extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
@@ -349,6 +350,7 @@ extern void nfs4_close_sync(struct nfs4_state *, fmode_t);
  extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
  extern void nfs4_schedule_lease_recovery(struct nfs_client *);
  extern void nfs4_schedule_state_manager(struct nfs_client *);
+extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp);
  extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
  extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
  extern void nfs41_handle_recall_slot(struct nfs_client *clp);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c

index 8c77039e7a811a0fb08ab5e8f7f04e82e087642d..4700fae1ada0c543b4f2095ea941291a07fd61b1 100644 (file)
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3374,9 +3374,13 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata)
  
         if (task->tk_status < 0) {
                 /* Unless we're shutting down, schedule state recovery! */
-               if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0)
+               if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) == 0)
+                       return;
+               if (task->tk_status != NFS4ERR_CB_PATH_DOWN) {
                         nfs4_schedule_lease_recovery(clp);
-               return;
+                       return;
+               }
+               nfs4_schedule_path_down_recovery(clp);
         }
         do_renew_lease(clp, timestamp);
  }
@@ -3386,7 +3390,7 @@ static const struct rpc_call_ops nfs4_renew_ops = {
         .rpc_release = nfs4_renew_release,
  };
  
-int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred)
+static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags)
  {
         struct rpc_message msg = {
                 .rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -3395,9 +3399,11 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred)
         };
         struct nfs4_renewdata *data;
  
+       if (renew_flags == 0)
+               return 0;
         if (!atomic_inc_not_zero(&clp->cl_count))
                 return -EIO;
-       data = kmalloc(sizeof(*data), GFP_KERNEL);
+       data = kmalloc(sizeof(*data), GFP_NOFS);
         if (data == NULL)
                 return -ENOMEM;
         data->client = clp;
@@ -3406,7 +3412,7 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred)
                         &nfs4_renew_ops, data);
  }
  
-int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
+static int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
  {
         struct rpc_message msg = {
                 .rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -5504,11 +5510,13 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_
         return rpc_run_task(&task_setup_data);
  }
  
-static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred)
+static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags)
  {
         struct rpc_task *task;
         int ret = 0;
  
+       if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
+               return 0;
         task = _nfs41_proc_sequence(clp, cred);
         if (IS_ERR(task))
                 ret = PTR_ERR(task);
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c

index df8e7f3ca56d9001019e9e08887f04b44421fb6e..dc484c0eae7f9706716e4a73cabe03857dd3ae15 100644 (file)
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -60,6 +60,7 @@ nfs4_renew_state(struct work_struct *work)
         struct rpc_cred *cred;
         long lease;
         unsigned long last, now;
+       unsigned renew_flags = 0;
  
         ops = clp->cl_mvops->state_renewal_ops;
         dprintk("%s: start\n", __func__);
@@ -72,18 +73,23 @@ nfs4_renew_state(struct work_struct *work)
         last = clp->cl_last_renewal;
         now = jiffies;
         /* Are we close to a lease timeout? */
-       if (time_after(now, last + lease/3)) {
+       if (time_after(now, last + lease/3))
+               renew_flags |= NFS4_RENEW_TIMEOUT;
+       if (nfs_delegations_present(clp))
+               renew_flags |= NFS4_RENEW_DELEGATION_CB;
+
+       if (renew_flags != 0) {
                 cred = ops->get_state_renewal_cred_locked(clp);
                 spin_unlock(&clp->cl_lock);
                 if (cred == NULL) {
-                       if (!nfs_delegations_present(clp)) {
+                       if (!(renew_flags & NFS4_RENEW_DELEGATION_CB)) {
                                 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
                                 goto out;
                         }
                         nfs_expire_all_delegations(clp);
                 } else {
                         /* Queue an asynchronous RENEW. */
-                       ops->sched_state_renewal(clp, cred);
+                       ops->sched_state_renewal(clp, cred, renew_flags);
                         put_rpccred(cred);
                         goto out_exp;
                 }
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c

index 72ab97ef3d617fdee9d2f538908dcac245f42093..39914be40b03694008ada2c56af6aaf5fb3a7f97 100644 (file)
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1038,6 +1038,12 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp)
         nfs4_schedule_state_manager(clp);
  }
  
+void nfs4_schedule_path_down_recovery(struct nfs_client *clp)
+{
+       nfs_handle_cb_pathdown(clp);
+       nfs4_schedule_state_manager(clp);
+}
+
  static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
  {
  
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c

index 9383ca7245bc7f3ef5a5eeeb9a3b64248f933079..d0cda12fddc325c104ff643455ebae50afbb1ac9 100644 (file)
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -479,7 +479,6 @@ static int _io_check(struct objio_state *ios, bool is_write)
         for (i = 0; i <  ios->numdevs; i++) {
                 struct osd_sense_info osi;
                 struct osd_request *or = ios->per_dev[i].or;
-               unsigned dev;
                 int ret;
  
                 if (!or)
@@ -500,9 +499,8 @@ static int _io_check(struct objio_state *ios, bool is_write)
  
                         continue; /* we recovered */
                 }
-               dev = ios->per_dev[i].dev;
-               objlayout_io_set_result(&ios->ol_state, dev,
-                                       &ios->layout->comps[dev].oc_object_id,
+               objlayout_io_set_result(&ios->ol_state, i,
+                                       &ios->layout->comps[i].oc_object_id,
                                         osd_pri_2_pnfs_err(osi.osd_err_pri),
                                         ios->per_dev[i].offset,
                                         ios->per_dev[i].length,
@@ -589,22 +587,19 @@ static void _calc_stripe_info(struct objio_state *ios, u64 file_offset,
  }
  
  static int _add_stripe_unit(struct objio_state *ios,  unsigned *cur_pg,
-               unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len,
+               unsigned pgbase, struct _objio_per_comp *per_dev, int len,
                 gfp_t gfp_flags)
  {
         unsigned pg = *cur_pg;
+       int cur_len = len;
         struct request_queue *q =
                         osd_request_queue(_io_od(ios, per_dev->dev));
  
-       per_dev->length += cur_len;
-
         if (per_dev->bio == NULL) {
-               unsigned stripes = ios->layout->num_comps /
-                                                    ios->layout->mirrors_p1;
-               unsigned pages_in_stripe = stripes *
+               unsigned pages_in_stripe = ios->layout->group_width *
                                       (ios->layout->stripe_unit / PAGE_SIZE);
                 unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) /
-                                   stripes;
+                                   ios->layout->group_width;
  
                 if (BIO_MAX_PAGES_KMALLOC < bio_size)
                         bio_size = BIO_MAX_PAGES_KMALLOC;
@@ -632,6 +627,7 @@ static int _add_stripe_unit(struct objio_state *ios,  unsigned *cur_pg,
         }
         BUG_ON(cur_len);
  
+       per_dev->length += len;
         *cur_pg = pg;
         return 0;
  }
@@ -650,7 +646,7 @@ static int _prepare_one_group(struct objio_state *ios, u64 length,
         int ret = 0;
  
         while (length) {
-               struct _objio_per_comp *per_dev = &ios->per_dev[dev];
+               struct _objio_per_comp *per_dev = &ios->per_dev[dev - first_dev];
                 unsigned cur_len, page_off = 0;
  
                 if (!per_dev->length) {
@@ -670,8 +666,8 @@ static int _prepare_one_group(struct objio_state *ios, u64 length,
                                 cur_len = stripe_unit;
                         }
  
-                       if (max_comp < dev)
-                               max_comp = dev;
+                       if (max_comp < dev - first_dev)
+                               max_comp = dev - first_dev;
                 } else {
                         cur_len = stripe_unit;
                 }
@@ -806,7 +802,7 @@ static int _read_mirrors(struct objio_state *ios, unsigned cur_comp)
         struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
         unsigned dev = per_dev->dev;
         struct pnfs_osd_object_cred *cred =
-                       &ios->layout->comps[dev];
+                       &ios->layout->comps[cur_comp];
         struct osd_obj_id obj = {
                 .partition = cred->oc_object_id.oid_partition_id,
                 .id = cred->oc_object_id.oid_object_id,
@@ -904,7 +900,7 @@ static int _write_mirrors(struct objio_state *ios, unsigned cur_comp)
         for (; cur_comp < last_comp; ++cur_comp, ++dev) {
                 struct osd_request *or = NULL;
                 struct pnfs_osd_object_cred *cred =
-                                       &ios->layout->comps[dev];
+                                       &ios->layout->comps[cur_comp];
                 struct osd_obj_id obj = {
                         .partition = cred->oc_object_id.oid_partition_id,
                         .id = cred->oc_object_id.oid_object_id,
diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c

index 16fc758e91233a7c931cdda1d05148ad8dc5ae44..b3918f7ac34d0eaa0f778fa0bb170fe5dedddcfe 100644 (file)
--- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
+++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
@@ -170,6 +170,9 @@ int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout,
         p = _osd_xdr_decode_data_map(p, &layout->olo_map);
         layout->olo_comps_index = be32_to_cpup(p++);
         layout->olo_num_comps = be32_to_cpup(p++);
+       dprintk("%s: olo_comps_index=%d olo_num_comps=%d\n", __func__,
+               layout->olo_comps_index, layout->olo_num_comps);
+
         iter->total_comps = layout->olo_num_comps;
         return 0;
  }
diff --git a/fs/nfs/super.c b/fs/nfs/super.c

index b961ceac66b43ade04dc3cc960e1e382530f8e66..9b7dd7013b155e2f0e38a783b0aebe44446b9655 100644 (file)
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2035,9 +2035,6 @@ static inline void nfs_initialise_sb(struct super_block *sb)
                 sb->s_blocksize = nfs_block_bits(server->wsize,
                                                  &sb->s_blocksize_bits);
  
-       if (server->flags & NFS_MOUNT_NOAC)
-               sb->s_flags |= MS_SYNCHRONOUS;
-
         sb->s_bdi = &server->backing_dev_info;
  
         nfs_super_set_maxbytes(sb, server->maxfilesize);
@@ -2249,6 +2246,10 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
         if (server->flags & NFS_MOUNT_UNSHARED)
                 compare_super = NULL;
  
+       /* -o noac implies -o sync */
+       if (server->flags & NFS_MOUNT_NOAC)
+               sb_mntdata.mntflags |= MS_SYNCHRONOUS;
+
         /* Get a superblock - note that we may end up sharing one that already exists */
         s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata);
         if (IS_ERR(s)) {
@@ -2361,6 +2362,10 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags,
         if (server->flags & NFS_MOUNT_UNSHARED)
                 compare_super = NULL;
  
+       /* -o noac implies -o sync */
+       if (server->flags & NFS_MOUNT_NOAC)
+               sb_mntdata.mntflags |= MS_SYNCHRONOUS;
+
         /* Get a superblock - note that we may end up sharing one that already exists */
         s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata);
         if (IS_ERR(s)) {
@@ -2628,6 +2633,10 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags,
         if (server->flags & NFS4_MOUNT_UNSHARED)
                 compare_super = NULL;
  
+       /* -o noac implies -o sync */
+       if (server->flags & NFS_MOUNT_NOAC)
+               sb_mntdata.mntflags |= MS_SYNCHRONOUS;
+
         /* Get a superblock - note that we may end up sharing one that already exists */
         s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
         if (IS_ERR(s)) {
@@ -2916,6 +2925,10 @@ nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
         if (server->flags & NFS4_MOUNT_UNSHARED)
                 compare_super = NULL;
  
+       /* -o noac implies -o sync */
+       if (server->flags & NFS_MOUNT_NOAC)
+               sb_mntdata.mntflags |= MS_SYNCHRONOUS;
+
         /* Get a superblock - note that we may end up sharing one that already exists */
         s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
         if (IS_ERR(s)) {
@@ -3003,6 +3016,10 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
         if (server->flags & NFS4_MOUNT_UNSHARED)
                 compare_super = NULL;
  
+       /* -o noac implies -o sync */
+       if (server->flags & NFS_MOUNT_NOAC)
+               sb_mntdata.mntflags |= MS_SYNCHRONOUS;
+
         /* Get a superblock - note that we may end up sharing one that already exists */
         s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
         if (IS_ERR(s)) {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c

index b39b37f8091306548dd6516b3865002b1bb72ade..c9bd2a6b7d4ba757729fa28fa185d84edd745d70 100644 (file)
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -958,7 +958,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
                 if (!data)
                         goto out_bad;
                 data->pagevec[0] = page;
-               nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags);
+               nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags);
                 list_add(&data->list, res);
                 requests++;
                 nbytes -= len;
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h

index 45174b53437730b350d97658318721579448d20f..feb361e252acfe801d186849430c553e39a4268e 100644 (file)
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -335,9 +335,9 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
  #define DBGKEY(key)  ((char *)(key))
  #define DBGKEY1(key) ((char *)(key))
  
-#define ubifs_dbg_msg(fmt, ...) do {               \
-       if (0)                                     \
-               pr_debug(fmt "\n", ##__VA_ARGS__); \
+#define ubifs_dbg_msg(fmt, ...) do {                        \
+       if (0)                                              \
+               printk(KERN_DEBUG fmt "\n", ##__VA_ARGS__); \
  } while (0)
  
  #define dbg_dump_stack()
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile

index 75bb316529ddabb27855025fd59832bc4a52a4c8..427a4e82a588759dbfb49394f73eca9400d455e7 100644 (file)
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -16,44 +16,53 @@
  # Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  #
  
-ccflags-y := -I$(src) -I$(src)/linux-2.6
-ccflags-$(CONFIG_XFS_DEBUG) += -g
+ccflags-y += -I$(src)                  # needed for trace events
  
-XFS_LINUX := linux-2.6
+ccflags-$(CONFIG_XFS_DEBUG) += -g
  
  obj-$(CONFIG_XFS_FS)           += xfs.o
  
-xfs-y                          += linux-2.6/xfs_trace.o
-
-xfs-$(CONFIG_XFS_QUOTA)                += $(addprefix quota/, \
-                                  xfs_dquot.o \
-                                  xfs_dquot_item.o \
-                                  xfs_trans_dquot.o \
-                                  xfs_qm_syscalls.o \
-                                  xfs_qm_bhv.o \
-                                  xfs_qm.o)
-xfs-$(CONFIG_XFS_QUOTA)                += linux-2.6/xfs_quotaops.o
-
-ifeq ($(CONFIG_XFS_QUOTA),y)
-xfs-$(CONFIG_PROC_FS)          += quota/xfs_qm_stats.o
-endif
-
-xfs-$(CONFIG_XFS_RT)           += xfs_rtalloc.o
-xfs-$(CONFIG_XFS_POSIX_ACL)    += $(XFS_LINUX)/xfs_acl.o
-xfs-$(CONFIG_PROC_FS)          += $(XFS_LINUX)/xfs_stats.o
-xfs-$(CONFIG_SYSCTL)           += $(XFS_LINUX)/xfs_sysctl.o
-xfs-$(CONFIG_COMPAT)           += $(XFS_LINUX)/xfs_ioctl32.o
+# this one should be compiled first, as the tracing macros can easily blow up
+xfs-y                          += xfs_trace.o
  
+# highlevel code
+xfs-y                          += xfs_aops.o \
+                                  xfs_bit.o \
+                                  xfs_buf.o \
+                                  xfs_dfrag.o \
+                                  xfs_discard.o \
+                                  xfs_error.o \
+                                  xfs_export.o \
+                                  xfs_file.o \
+                                  xfs_filestream.o \
+                                  xfs_fsops.o \
+                                  xfs_fs_subr.o \
+                                  xfs_globals.o \
+                                  xfs_iget.o \
+                                  xfs_ioctl.o \
+                                  xfs_iomap.o \
+                                  xfs_iops.o \
+                                  xfs_itable.o \
+                                  xfs_message.o \
+                                  xfs_mru_cache.o \
+                                  xfs_super.o \
+                                  xfs_sync.o \
+                                  xfs_xattr.o \
+                                  xfs_rename.o \
+                                  xfs_rw.o \
+                                  xfs_utils.o \
+                                  xfs_vnodeops.o \
+                                  kmem.o \
+                                  uuid.o
  
+# code shared with libxfs
  xfs-y                          += xfs_alloc.o \
                                    xfs_alloc_btree.o \
                                    xfs_attr.o \
                                    xfs_attr_leaf.o \
-                                  xfs_bit.o \
                                    xfs_bmap.o \
                                    xfs_bmap_btree.o \
                                    xfs_btree.o \
-                                  xfs_buf_item.o \
                                    xfs_da_btree.o \
                                    xfs_dir2.o \
                                    xfs_dir2_block.o \
@@ -61,49 +70,37 @@ xfs-y                               += xfs_alloc.o \
                                    xfs_dir2_leaf.o \
                                    xfs_dir2_node.o \
                                    xfs_dir2_sf.o \
-                                  xfs_error.o \
-                                  xfs_extfree_item.o \
-                                  xfs_filestream.o \
-                                  xfs_fsops.o \
                                    xfs_ialloc.o \
                                    xfs_ialloc_btree.o \
-                                  xfs_iget.o \
                                    xfs_inode.o \
-                                  xfs_inode_item.o \
-                                  xfs_iomap.o \
-                                  xfs_itable.o \
-                                  xfs_dfrag.o \
-                                  xfs_log.o \
-                                  xfs_log_cil.o \
                                    xfs_log_recover.o \
                                    xfs_mount.o \
-                                  xfs_mru_cache.o \
-                                  xfs_rename.o \
-                                  xfs_trans.o \
+                                  xfs_trans.o
+
+# low-level transaction/log code
+xfs-y                          += xfs_log.o \
+                                  xfs_log_cil.o \
+                                  xfs_buf_item.o \
+                                  xfs_extfree_item.o \
+                                  xfs_inode_item.o \
                                    xfs_trans_ail.o \
                                    xfs_trans_buf.o \
                                    xfs_trans_extfree.o \
                                    xfs_trans_inode.o \
-                                  xfs_utils.o \
-                                  xfs_vnodeops.o \
-                                  xfs_rw.o
-
-# Objects in linux/
-xfs-y                          += $(addprefix $(XFS_LINUX)/, \
-                                  kmem.o \
-                                  xfs_aops.o \
-                                  xfs_buf.o \
-                                  xfs_discard.o \
-                                  xfs_export.o \
-                                  xfs_file.o \
-                                  xfs_fs_subr.o \
-                                  xfs_globals.o \
-                                  xfs_ioctl.o \
-                                  xfs_iops.o \
-                                  xfs_message.o \
-                                  xfs_super.o \
-                                  xfs_sync.o \
-                                  xfs_xattr.o)
  
-# Objects in support/
-xfs-y                          += support/uuid.o
+# optional features
+xfs-$(CONFIG_XFS_QUOTA)                += xfs_dquot.o \
+                                  xfs_dquot_item.o \
+                                  xfs_trans_dquot.o \
+                                  xfs_qm_syscalls.o \
+                                  xfs_qm_bhv.o \
+                                  xfs_qm.o \
+                                  xfs_quotaops.o
+ifeq ($(CONFIG_XFS_QUOTA),y)
+xfs-$(CONFIG_PROC_FS)          += xfs_qm_stats.o
+endif
+xfs-$(CONFIG_XFS_RT)           += xfs_rtalloc.o
+xfs-$(CONFIG_XFS_POSIX_ACL)    += xfs_acl.o
+xfs-$(CONFIG_PROC_FS)          += xfs_stats.o
+xfs-$(CONFIG_SYSCTL)           += xfs_sysctl.o
+xfs-$(CONFIG_COMPAT)           += xfs_ioctl32.o
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c

new file mode 100644 (file)

index 0000000..a907de5
--- /dev/null
+++ b/fs/xfs/kmem.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/blkdev.h>
+#include <linux/backing-dev.h>
+#include "time.h"
+#include "kmem.h"
+#include "xfs_message.h"
+
+/*
+ * Greedy allocation.  May fail and may return vmalloced memory.
+ *
+ * Must be freed using kmem_free_large.
+ */
+void *
+kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
+{
+       void            *ptr;
+       size_t          kmsize = maxsize;
+
+       while (!(ptr = kmem_zalloc_large(kmsize))) {
+               if ((kmsize >>= 1) <= minsize)
+                       kmsize = minsize;
+       }
+       if (ptr)
+               *size = kmsize;
+       return ptr;
+}
+
+void *
+kmem_alloc(size_t size, unsigned int __nocast flags)
+{
+       int     retries = 0;
+       gfp_t   lflags = kmem_flags_convert(flags);
+       void    *ptr;
+
+       do {
+               ptr = kmalloc(size, lflags);
+               if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
+                       return ptr;
+               if (!(++retries % 100))
+                       xfs_err(NULL,
+               "possible memory allocation deadlock in %s (mode:0x%x)",
+                                       __func__, lflags);
+               congestion_wait(BLK_RW_ASYNC, HZ/50);
+       } while (1);
+}
+
+void *
+kmem_zalloc(size_t size, unsigned int __nocast flags)
+{
+       void    *ptr;
+
+       ptr = kmem_alloc(size, flags);
+       if (ptr)
+               memset((char *)ptr, 0, (int)size);
+       return ptr;
+}
+
+void
+kmem_free(const void *ptr)
+{
+       if (!is_vmalloc_addr(ptr)) {
+               kfree(ptr);
+       } else {
+               vfree(ptr);
+       }
+}
+
+void *
+kmem_realloc(const void *ptr, size_t newsize, size_t oldsize,
+            unsigned int __nocast flags)
+{
+       void    *new;
+
+       new = kmem_alloc(newsize, flags);
+       if (ptr) {
+               if (new)
+                       memcpy(new, ptr,
+                               ((oldsize < newsize) ? oldsize : newsize));
+               kmem_free(ptr);
+       }
+       return new;
+}
+
+void *
+kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
+{
+       int     retries = 0;
+       gfp_t   lflags = kmem_flags_convert(flags);
+       void    *ptr;
+
+       do {
+               ptr = kmem_cache_alloc(zone, lflags);
+               if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
+                       return ptr;
+               if (!(++retries % 100))
+                       xfs_err(NULL,
+               "possible memory allocation deadlock in %s (mode:0x%x)",
+                                       __func__, lflags);
+               congestion_wait(BLK_RW_ASYNC, HZ/50);
+       } while (1);
+}
+
+void *
+kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags)
+{
+       void    *ptr;
+
+       ptr = kmem_zone_alloc(zone, flags);
+       if (ptr)
+               memset((char *)ptr, 0, kmem_cache_size(zone));
+       return ptr;
+}
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h

new file mode 100644 (file)

index 0000000..f7c8f7a
--- /dev/null
+++ b/fs/xfs/kmem.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SUPPORT_KMEM_H__
+#define __XFS_SUPPORT_KMEM_H__
+
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+
+/*
+ * General memory allocation interfaces
+ */
+
+#define KM_SLEEP       0x0001u
+#define KM_NOSLEEP     0x0002u
+#define KM_NOFS                0x0004u
+#define KM_MAYFAIL     0x0008u
+
+/*
+ * We use a special process flag to avoid recursive callbacks into
+ * the filesystem during transactions.  We will also issue our own
+ * warnings, so we explicitly skip any generic ones (silly of us).
+ */
+static inline gfp_t
+kmem_flags_convert(unsigned int __nocast flags)
+{
+       gfp_t   lflags;
+
+       BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL));
+
+       if (flags & KM_NOSLEEP) {
+               lflags = GFP_ATOMIC | __GFP_NOWARN;
+       } else {
+               lflags = GFP_KERNEL | __GFP_NOWARN;
+               if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
+                       lflags &= ~__GFP_FS;
+       }
+       return lflags;
+}
+
+extern void *kmem_alloc(size_t, unsigned int __nocast);
+extern void *kmem_zalloc(size_t, unsigned int __nocast);
+extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast);
+extern void  kmem_free(const void *);
+
+static inline void *kmem_zalloc_large(size_t size)
+{
+       void *ptr;
+
+       ptr = vmalloc(size);
+       if (ptr)
+               memset(ptr, 0, size);
+       return ptr;
+}
+static inline void kmem_free_large(void *ptr)
+{
+       vfree(ptr);
+}
+
+extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
+
+/*
+ * Zone interfaces
+ */
+
+#define KM_ZONE_HWALIGN        SLAB_HWCACHE_ALIGN
+#define KM_ZONE_RECLAIM        SLAB_RECLAIM_ACCOUNT
+#define KM_ZONE_SPREAD SLAB_MEM_SPREAD
+
+#define kmem_zone      kmem_cache
+#define kmem_zone_t    struct kmem_cache
+
+static inline kmem_zone_t *
+kmem_zone_init(int size, char *zone_name)
+{
+       return kmem_cache_create(zone_name, size, 0, 0, NULL);
+}
+
+static inline kmem_zone_t *
+kmem_zone_init_flags(int size, char *zone_name, unsigned long flags,
+                    void (*construct)(void *))
+{
+       return kmem_cache_create(zone_name, size, 0, flags, construct);
+}
+
+static inline void
+kmem_zone_free(kmem_zone_t *zone, void *ptr)
+{
+       kmem_cache_free(zone, ptr);
+}
+
+static inline void
+kmem_zone_destroy(kmem_zone_t *zone)
+{
+       if (zone)
+               kmem_cache_destroy(zone);
+}
+
+extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
+extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
+
+static inline int
+kmem_shake_allow(gfp_t gfp_mask)
+{
+       return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS));
+}
+
+#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c

deleted file mode 100644 (file)

index a907de5..0000000
--- a/fs/xfs/linux-2.6/kmem.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/slab.h>
-#include <linux/swap.h>
-#include <linux/blkdev.h>
-#include <linux/backing-dev.h>
-#include "time.h"
-#include "kmem.h"
-#include "xfs_message.h"
-
-/*
- * Greedy allocation.  May fail and may return vmalloced memory.
- *
- * Must be freed using kmem_free_large.
- */
-void *
-kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
-{
-       void            *ptr;
-       size_t          kmsize = maxsize;
-
-       while (!(ptr = kmem_zalloc_large(kmsize))) {
-               if ((kmsize >>= 1) <= minsize)
-                       kmsize = minsize;
-       }
-       if (ptr)
-               *size = kmsize;
-       return ptr;
-}
-
-void *
-kmem_alloc(size_t size, unsigned int __nocast flags)
-{
-       int     retries = 0;
-       gfp_t   lflags = kmem_flags_convert(flags);
-       void    *ptr;
-
-       do {
-               ptr = kmalloc(size, lflags);
-               if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
-                       return ptr;
-               if (!(++retries % 100))
-                       xfs_err(NULL,
-               "possible memory allocation deadlock in %s (mode:0x%x)",
-                                       __func__, lflags);
-               congestion_wait(BLK_RW_ASYNC, HZ/50);
-       } while (1);
-}
-
-void *
-kmem_zalloc(size_t size, unsigned int __nocast flags)
-{
-       void    *ptr;
-
-       ptr = kmem_alloc(size, flags);
-       if (ptr)
-               memset((char *)ptr, 0, (int)size);
-       return ptr;
-}
-
-void
-kmem_free(const void *ptr)
-{
-       if (!is_vmalloc_addr(ptr)) {
-               kfree(ptr);
-       } else {
-               vfree(ptr);
-       }
-}
-
-void *
-kmem_realloc(const void *ptr, size_t newsize, size_t oldsize,
-            unsigned int __nocast flags)
-{
-       void    *new;
-
-       new = kmem_alloc(newsize, flags);
-       if (ptr) {
-               if (new)
-                       memcpy(new, ptr,
-                               ((oldsize < newsize) ? oldsize : newsize));
-               kmem_free(ptr);
-       }
-       return new;
-}
-
-void *
-kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
-{
-       int     retries = 0;
-       gfp_t   lflags = kmem_flags_convert(flags);
-       void    *ptr;
-
-       do {
-               ptr = kmem_cache_alloc(zone, lflags);
-               if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
-                       return ptr;
-               if (!(++retries % 100))
-                       xfs_err(NULL,
-               "possible memory allocation deadlock in %s (mode:0x%x)",
-                                       __func__, lflags);
-               congestion_wait(BLK_RW_ASYNC, HZ/50);
-       } while (1);
-}
-
-void *
-kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags)
-{
-       void    *ptr;
-
-       ptr = kmem_zone_alloc(zone, flags);
-       if (ptr)
-               memset((char *)ptr, 0, kmem_cache_size(zone));
-       return ptr;
-}
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h

deleted file mode 100644 (file)

index f7c8f7a..0000000
--- a/fs/xfs/linux-2.6/kmem.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPPORT_KMEM_H__
-#define __XFS_SUPPORT_KMEM_H__
-
-#include <linux/slab.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/vmalloc.h>
-
-/*
- * General memory allocation interfaces
- */
-
-#define KM_SLEEP       0x0001u
-#define KM_NOSLEEP     0x0002u
-#define KM_NOFS                0x0004u
-#define KM_MAYFAIL     0x0008u
-
-/*
- * We use a special process flag to avoid recursive callbacks into
- * the filesystem during transactions.  We will also issue our own
- * warnings, so we explicitly skip any generic ones (silly of us).
- */
-static inline gfp_t
-kmem_flags_convert(unsigned int __nocast flags)
-{
-       gfp_t   lflags;
-
-       BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL));
-
-       if (flags & KM_NOSLEEP) {
-               lflags = GFP_ATOMIC | __GFP_NOWARN;
-       } else {
-               lflags = GFP_KERNEL | __GFP_NOWARN;
-               if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
-                       lflags &= ~__GFP_FS;
-       }
-       return lflags;
-}
-
-extern void *kmem_alloc(size_t, unsigned int __nocast);
-extern void *kmem_zalloc(size_t, unsigned int __nocast);
-extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast);
-extern void  kmem_free(const void *);
-
-static inline void *kmem_zalloc_large(size_t size)
-{
-       void *ptr;
-
-       ptr = vmalloc(size);
-       if (ptr)
-               memset(ptr, 0, size);
-       return ptr;
-}
-static inline void kmem_free_large(void *ptr)
-{
-       vfree(ptr);
-}
-
-extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
-
-/*
- * Zone interfaces
- */
-
-#define KM_ZONE_HWALIGN        SLAB_HWCACHE_ALIGN
-#define KM_ZONE_RECLAIM        SLAB_RECLAIM_ACCOUNT
-#define KM_ZONE_SPREAD SLAB_MEM_SPREAD
-
-#define kmem_zone      kmem_cache
-#define kmem_zone_t    struct kmem_cache
-
-static inline kmem_zone_t *
-kmem_zone_init(int size, char *zone_name)
-{
-       return kmem_cache_create(zone_name, size, 0, 0, NULL);
-}
-
-static inline kmem_zone_t *
-kmem_zone_init_flags(int size, char *zone_name, unsigned long flags,
-                    void (*construct)(void *))
-{
-       return kmem_cache_create(zone_name, size, 0, flags, construct);
-}
-
-static inline void
-kmem_zone_free(kmem_zone_t *zone, void *ptr)
-{
-       kmem_cache_free(zone, ptr);
-}
-
-static inline void
-kmem_zone_destroy(kmem_zone_t *zone)
-{
-       if (zone)
-               kmem_cache_destroy(zone);
-}
-
-extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
-extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
-
-static inline int
-kmem_shake_allow(gfp_t gfp_mask)
-{
-       return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS));
-}
-
-#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h

deleted file mode 100644 (file)

index ff6a198..0000000
--- a/fs/xfs/linux-2.6/mrlock.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPPORT_MRLOCK_H__
-#define __XFS_SUPPORT_MRLOCK_H__
-
-#include <linux/rwsem.h>
-
-typedef struct {
-       struct rw_semaphore     mr_lock;
-#ifdef DEBUG
-       int                     mr_writer;
-#endif
-} mrlock_t;
-
-#ifdef DEBUG
-#define mrinit(mrp, name)      \
-       do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0)
-#else
-#define mrinit(mrp, name)      \
-       do { init_rwsem(&(mrp)->mr_lock); } while (0)
-#endif
-
-#define mrlock_init(mrp, t,n,s)        mrinit(mrp, n)
-#define mrfree(mrp)            do { } while (0)
-
-static inline void mraccess_nested(mrlock_t *mrp, int subclass)
-{
-       down_read_nested(&mrp->mr_lock, subclass);
-}
-
-static inline void mrupdate_nested(mrlock_t *mrp, int subclass)
-{
-       down_write_nested(&mrp->mr_lock, subclass);
-#ifdef DEBUG
-       mrp->mr_writer = 1;
-#endif
-}
-
-static inline int mrtryaccess(mrlock_t *mrp)
-{
-       return down_read_trylock(&mrp->mr_lock);
-}
-
-static inline int mrtryupdate(mrlock_t *mrp)
-{
-       if (!down_write_trylock(&mrp->mr_lock))
-               return 0;
-#ifdef DEBUG
-       mrp->mr_writer = 1;
-#endif
-       return 1;
-}
-
-static inline void mrunlock_excl(mrlock_t *mrp)
-{
-#ifdef DEBUG
-       mrp->mr_writer = 0;
-#endif
-       up_write(&mrp->mr_lock);
-}
-
-static inline void mrunlock_shared(mrlock_t *mrp)
-{
-       up_read(&mrp->mr_lock);
-}
-
-static inline void mrdemote(mrlock_t *mrp)
-{
-#ifdef DEBUG
-       mrp->mr_writer = 0;
-#endif
-       downgrade_write(&mrp->mr_lock);
-}
-
-#endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff --git a/fs/xfs/linux-2.6/time.h b/fs/xfs/linux-2.6/time.h

deleted file mode 100644 (file)

index 387e695..0000000
--- a/fs/xfs/linux-2.6/time.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPPORT_TIME_H__
-#define __XFS_SUPPORT_TIME_H__
-
-#include <linux/sched.h>
-#include <linux/time.h>
-
-typedef struct timespec timespec_t;
-
-static inline void delay(long ticks)
-{
-       schedule_timeout_uninterruptible(ticks);
-}
-
-static inline void nanotime(struct timespec *tvp)
-{
-       *tvp = CURRENT_TIME;
-}
-
-#endif /* __XFS_SUPPORT_TIME_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c

deleted file mode 100644 (file)

index b6c4b37..0000000
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ /dev/null
@@ -1,420 +0,0 @@
-/*
- * Copyright (c) 2008, Christoph Hellwig
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_acl.h"
-#include "xfs_attr.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_vnodeops.h"
-#include "xfs_trace.h"
-#include <linux/slab.h>
-#include <linux/xattr.h>
-#include <linux/posix_acl_xattr.h>
-
-
-/*
- * Locking scheme:
- *  - all ACL updates are protected by inode->i_mutex, which is taken before
- *    calling into this file.
- */
-
-STATIC struct posix_acl *
-xfs_acl_from_disk(struct xfs_acl *aclp)
-{
-       struct posix_acl_entry *acl_e;
-       struct posix_acl *acl;
-       struct xfs_acl_entry *ace;
-       int count, i;
-
-       count = be32_to_cpu(aclp->acl_cnt);
-
-       acl = posix_acl_alloc(count, GFP_KERNEL);
-       if (!acl)
-               return ERR_PTR(-ENOMEM);
-
-       for (i = 0; i < count; i++) {
-               acl_e = &acl->a_entries[i];
-               ace = &aclp->acl_entry[i];
-
-               /*
-                * The tag is 32 bits on disk and 16 bits in core.
-                *
-                * Because every access to it goes through the core
-                * format first this is not a problem.
-                */
-               acl_e->e_tag = be32_to_cpu(ace->ae_tag);
-               acl_e->e_perm = be16_to_cpu(ace->ae_perm);
-
-               switch (acl_e->e_tag) {
-               case ACL_USER:
-               case ACL_GROUP:
-                       acl_e->e_id = be32_to_cpu(ace->ae_id);
-                       break;
-               case ACL_USER_OBJ:
-               case ACL_GROUP_OBJ:
-               case ACL_MASK:
-               case ACL_OTHER:
-                       acl_e->e_id = ACL_UNDEFINED_ID;
-                       break;
-               default:
-                       goto fail;
-               }
-       }
-       return acl;
-
-fail:
-       posix_acl_release(acl);
-       return ERR_PTR(-EINVAL);
-}
-
-STATIC void
-xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
-{
-       const struct posix_acl_entry *acl_e;
-       struct xfs_acl_entry *ace;
-       int i;
-
-       aclp->acl_cnt = cpu_to_be32(acl->a_count);
-       for (i = 0; i < acl->a_count; i++) {
-               ace = &aclp->acl_entry[i];
-               acl_e = &acl->a_entries[i];
-
-               ace->ae_tag = cpu_to_be32(acl_e->e_tag);
-               ace->ae_id = cpu_to_be32(acl_e->e_id);
-               ace->ae_perm = cpu_to_be16(acl_e->e_perm);
-       }
-}
-
-struct posix_acl *
-xfs_get_acl(struct inode *inode, int type)
-{
-       struct xfs_inode *ip = XFS_I(inode);
-       struct posix_acl *acl;
-       struct xfs_acl *xfs_acl;
-       int len = sizeof(struct xfs_acl);
-       unsigned char *ea_name;
-       int error;
-
-       acl = get_cached_acl(inode, type);
-       if (acl != ACL_NOT_CACHED)
-               return acl;
-
-       trace_xfs_get_acl(ip);
-
-       switch (type) {
-       case ACL_TYPE_ACCESS:
-               ea_name = SGI_ACL_FILE;
-               break;
-       case ACL_TYPE_DEFAULT:
-               ea_name = SGI_ACL_DEFAULT;
-               break;
-       default:
-               BUG();
-       }
-
-       /*
-        * If we have a cached ACLs value just return it, not need to
-        * go out to the disk.
-        */
-
-       xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
-       if (!xfs_acl)
-               return ERR_PTR(-ENOMEM);
-
-       error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl,
-                                                       &len, ATTR_ROOT);
-       if (error) {
-               /*
-                * If the attribute doesn't exist make sure we have a negative
-                * cache entry, for any other error assume it is transient and
-                * leave the cache entry as ACL_NOT_CACHED.
-                */
-               if (error == -ENOATTR) {
-                       acl = NULL;
-                       goto out_update_cache;
-               }
-               goto out;
-       }
-
-       acl = xfs_acl_from_disk(xfs_acl);
-       if (IS_ERR(acl))
-               goto out;
-
- out_update_cache:
-       set_cached_acl(inode, type, acl);
- out:
-       kfree(xfs_acl);
-       return acl;
-}
-
-STATIC int
-xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
-{
-       struct xfs_inode *ip = XFS_I(inode);
-       unsigned char *ea_name;
-       int error;
-
-       if (S_ISLNK(inode->i_mode))
-               return -EOPNOTSUPP;
-
-       switch (type) {
-       case ACL_TYPE_ACCESS:
-               ea_name = SGI_ACL_FILE;
-               break;
-       case ACL_TYPE_DEFAULT:
-               if (!S_ISDIR(inode->i_mode))
-                       return acl ? -EACCES : 0;
-               ea_name = SGI_ACL_DEFAULT;
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       if (acl) {
-               struct xfs_acl *xfs_acl;
-               int len;
-
-               xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
-               if (!xfs_acl)
-                       return -ENOMEM;
-
-               xfs_acl_to_disk(xfs_acl, acl);
-               len = sizeof(struct xfs_acl) -
-                       (sizeof(struct xfs_acl_entry) *
-                        (XFS_ACL_MAX_ENTRIES - acl->a_count));
-
-               error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
-                               len, ATTR_ROOT);
-
-               kfree(xfs_acl);
-       } else {
-               /*
-                * A NULL ACL argument means we want to remove the ACL.
-                */
-               error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT);
-
-               /*
-                * If the attribute didn't exist to start with that's fine.
-                */
-               if (error == -ENOATTR)
-                       error = 0;
-       }
-
-       if (!error)
-               set_cached_acl(inode, type, acl);
-       return error;
-}
-
-static int
-xfs_set_mode(struct inode *inode, umode_t mode)
-{
-       int error = 0;
-
-       if (mode != inode->i_mode) {
-               struct iattr iattr;
-
-               iattr.ia_valid = ATTR_MODE | ATTR_CTIME;
-               iattr.ia_mode = mode;
-               iattr.ia_ctime = current_fs_time(inode->i_sb);
-
-               error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
-       }
-
-       return error;
-}
-
-static int
-xfs_acl_exists(struct inode *inode, unsigned char *name)
-{
-       int len = sizeof(struct xfs_acl);
-
-       return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
-                           ATTR_ROOT|ATTR_KERNOVAL) == 0);
-}
-
-int
-posix_acl_access_exists(struct inode *inode)
-{
-       return xfs_acl_exists(inode, SGI_ACL_FILE);
-}
-
-int
-posix_acl_default_exists(struct inode *inode)
-{
-       if (!S_ISDIR(inode->i_mode))
-               return 0;
-       return xfs_acl_exists(inode, SGI_ACL_DEFAULT);
-}
-
-/*
- * No need for i_mutex because the inode is not yet exposed to the VFS.
- */
-int
-xfs_inherit_acl(struct inode *inode, struct posix_acl *acl)
-{
-       umode_t mode = inode->i_mode;
-       int error = 0, inherit = 0;
-
-       if (S_ISDIR(inode->i_mode)) {
-               error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
-               if (error)
-                       goto out;
-       }
-
-       error = posix_acl_create(&acl, GFP_KERNEL, &mode);
-       if (error < 0)
-               return error;
-
-       /*
-        * If posix_acl_create returns a positive value we need to
-        * inherit a permission that can't be represented using the Unix
-        * mode bits and we actually need to set an ACL.
-        */
-       if (error > 0)
-               inherit = 1;
-
-       error = xfs_set_mode(inode, mode);
-       if (error)
-               goto out;
-
-       if (inherit)
-               error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl);
-
-out:
-       posix_acl_release(acl);
-       return error;
-}
-
-int
-xfs_acl_chmod(struct inode *inode)
-{
-       struct posix_acl *acl;
-       int error;
-
-       if (S_ISLNK(inode->i_mode))
-               return -EOPNOTSUPP;
-
-       acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
-       if (IS_ERR(acl) || !acl)
-               return PTR_ERR(acl);
-
-       error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
-       if (error)
-               return error;
-
-       error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl);
-       posix_acl_release(acl);
-       return error;
-}
-
-static int
-xfs_xattr_acl_get(struct dentry *dentry, const char *name,
-               void *value, size_t size, int type)
-{
-       struct posix_acl *acl;
-       int error;
-
-       acl = xfs_get_acl(dentry->d_inode, type);
-       if (IS_ERR(acl))
-               return PTR_ERR(acl);
-       if (acl == NULL)
-               return -ENODATA;
-
-       error = posix_acl_to_xattr(acl, value, size);
-       posix_acl_release(acl);
-
-       return error;
-}
-
-static int
-xfs_xattr_acl_set(struct dentry *dentry, const char *name,
-               const void *value, size_t size, int flags, int type)
-{
-       struct inode *inode = dentry->d_inode;
-       struct posix_acl *acl = NULL;
-       int error = 0;
-
-       if (flags & XATTR_CREATE)
-               return -EINVAL;
-       if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
-               return value ? -EACCES : 0;
-       if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
-               return -EPERM;
-
-       if (!value)
-               goto set_acl;
-
-       acl = posix_acl_from_xattr(value, size);
-       if (!acl) {
-               /*
-                * acl_set_file(3) may request that we set default ACLs with
-                * zero length -- defend (gracefully) against that here.
-                */
-               goto out;
-       }
-       if (IS_ERR(acl)) {
-               error = PTR_ERR(acl);
-               goto out;
-       }
-
-       error = posix_acl_valid(acl);
-       if (error)
-               goto out_release;
-
-       error = -EINVAL;
-       if (acl->a_count > XFS_ACL_MAX_ENTRIES)
-               goto out_release;
-
-       if (type == ACL_TYPE_ACCESS) {
-               umode_t mode = inode->i_mode;
-               error = posix_acl_equiv_mode(acl, &mode);
-
-               if (error <= 0) {
-                       posix_acl_release(acl);
-                       acl = NULL;
-
-                       if (error < 0)
-                               return error;
-               }
-
-               error = xfs_set_mode(inode, mode);
-               if (error)
-                       goto out_release;
-       }
-
- set_acl:
-       error = xfs_set_acl(inode, type, acl);
- out_release:
-       posix_acl_release(acl);
- out:
-       return error;
-}
-
-const struct xattr_handler xfs_xattr_acl_access_handler = {
-       .prefix = POSIX_ACL_XATTR_ACCESS,
-       .flags  = ACL_TYPE_ACCESS,
-       .get    = xfs_xattr_acl_get,
-       .set    = xfs_xattr_acl_set,
-};
-
-const struct xattr_handler xfs_xattr_acl_default_handler = {
-       .prefix = POSIX_ACL_XATTR_DEFAULT,
-       .flags  = ACL_TYPE_DEFAULT,
-       .get    = xfs_xattr_acl_get,
-       .set    = xfs_xattr_acl_set,
-};
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c

deleted file mode 100644 (file)

index 63e971e..0000000
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ /dev/null
@@ -1,1499 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_trans.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
-#include "xfs_rw.h"
-#include "xfs_iomap.h"
-#include "xfs_vnodeops.h"
-#include "xfs_trace.h"
-#include "xfs_bmap.h"
-#include <linux/gfp.h>
-#include <linux/mpage.h>
-#include <linux/pagevec.h>
-#include <linux/writeback.h>
-
-
-/*
- * Prime number of hash buckets since address is used as the key.
- */
-#define NVSYNC         37
-#define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC])
-static wait_queue_head_t xfs_ioend_wq[NVSYNC];
-
-void __init
-xfs_ioend_init(void)
-{
-       int i;
-
-       for (i = 0; i < NVSYNC; i++)
-               init_waitqueue_head(&xfs_ioend_wq[i]);
-}
-
-void
-xfs_ioend_wait(
-       xfs_inode_t     *ip)
-{
-       wait_queue_head_t *wq = to_ioend_wq(ip);
-
-       wait_event(*wq, (atomic_read(&ip->i_iocount) == 0));
-}
-
-STATIC void
-xfs_ioend_wake(
-       xfs_inode_t     *ip)
-{
-       if (atomic_dec_and_test(&ip->i_iocount))
-               wake_up(to_ioend_wq(ip));
-}
-
-void
-xfs_count_page_state(
-       struct page             *page,
-       int                     *delalloc,
-       int                     *unwritten)
-{
-       struct buffer_head      *bh, *head;
-
-       *delalloc = *unwritten = 0;
-
-       bh = head = page_buffers(page);
-       do {
-               if (buffer_unwritten(bh))
-                       (*unwritten) = 1;
-               else if (buffer_delay(bh))
-                       (*delalloc) = 1;
-       } while ((bh = bh->b_this_page) != head);
-}
-
-STATIC struct block_device *
-xfs_find_bdev_for_inode(
-       struct inode            *inode)
-{
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-
-       if (XFS_IS_REALTIME_INODE(ip))
-               return mp->m_rtdev_targp->bt_bdev;
-       else
-               return mp->m_ddev_targp->bt_bdev;
-}
-
-/*
- * We're now finished for good with this ioend structure.
- * Update the page state via the associated buffer_heads,
- * release holds on the inode and bio, and finally free
- * up memory.  Do not use the ioend after this.
- */
-STATIC void
-xfs_destroy_ioend(
-       xfs_ioend_t             *ioend)
-{
-       struct buffer_head      *bh, *next;
-       struct xfs_inode        *ip = XFS_I(ioend->io_inode);
-
-       for (bh = ioend->io_buffer_head; bh; bh = next) {
-               next = bh->b_private;
-               bh->b_end_io(bh, !ioend->io_error);
-       }
-
-       /*
-        * Volume managers supporting multiple paths can send back ENODEV
-        * when the final path disappears.  In this case continuing to fill
-        * the page cache with dirty data which cannot be written out is
-        * evil, so prevent that.
-        */
-       if (unlikely(ioend->io_error == -ENODEV)) {
-               xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ,
-                                     __FILE__, __LINE__);
-       }
-
-       xfs_ioend_wake(ip);
-       mempool_free(ioend, xfs_ioend_pool);
-}
-
-/*
- * If the end of the current ioend is beyond the current EOF,
- * return the new EOF value, otherwise zero.
- */
-STATIC xfs_fsize_t
-xfs_ioend_new_eof(
-       xfs_ioend_t             *ioend)
-{
-       xfs_inode_t             *ip = XFS_I(ioend->io_inode);
-       xfs_fsize_t             isize;
-       xfs_fsize_t             bsize;
-
-       bsize = ioend->io_offset + ioend->io_size;
-       isize = MAX(ip->i_size, ip->i_new_size);
-       isize = MIN(isize, bsize);
-       return isize > ip->i_d.di_size ? isize : 0;
-}
-
-/*
- * Update on-disk file size now that data has been written to disk.  The
- * current in-memory file size is i_size.  If a write is beyond eof i_new_size
- * will be the intended file size until i_size is updated.  If this write does
- * not extend all the way to the valid file size then restrict this update to
- * the end of the write.
- *
- * This function does not block as blocking on the inode lock in IO completion
- * can lead to IO completion order dependency deadlocks.. If it can't get the
- * inode ilock it will return EAGAIN. Callers must handle this.
- */
-STATIC int
-xfs_setfilesize(
-       xfs_ioend_t             *ioend)
-{
-       xfs_inode_t             *ip = XFS_I(ioend->io_inode);
-       xfs_fsize_t             isize;
-
-       if (unlikely(ioend->io_error))
-               return 0;
-
-       if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
-               return EAGAIN;
-
-       isize = xfs_ioend_new_eof(ioend);
-       if (isize) {
-               trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
-               ip->i_d.di_size = isize;
-               xfs_mark_inode_dirty(ip);
-       }
-
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       return 0;
-}
-
-/*
- * Schedule IO completion handling on the final put of an ioend.
- */
-STATIC void
-xfs_finish_ioend(
-       struct xfs_ioend        *ioend)
-{
-       if (atomic_dec_and_test(&ioend->io_remaining)) {
-               if (ioend->io_type == IO_UNWRITTEN)
-                       queue_work(xfsconvertd_workqueue, &ioend->io_work);
-               else
-                       queue_work(xfsdatad_workqueue, &ioend->io_work);
-       }
-}
-
-/*
- * IO write completion.
- */
-STATIC void
-xfs_end_io(
-       struct work_struct *work)
-{
-       xfs_ioend_t     *ioend = container_of(work, xfs_ioend_t, io_work);
-       struct xfs_inode *ip = XFS_I(ioend->io_inode);
-       int             error = 0;
-
-       /*
-        * For unwritten extents we need to issue transactions to convert a
-        * range to normal written extens after the data I/O has finished.
-        */
-       if (ioend->io_type == IO_UNWRITTEN &&
-           likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
-
-               error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
-                                                ioend->io_size);
-               if (error)
-                       ioend->io_error = error;
-       }
-
-       /*
-        * We might have to update the on-disk file size after extending
-        * writes.
-        */
-       error = xfs_setfilesize(ioend);
-       ASSERT(!error || error == EAGAIN);
-
-       /*
-        * If we didn't complete processing of the ioend, requeue it to the
-        * tail of the workqueue for another attempt later. Otherwise destroy
-        * it.
-        */
-       if (error == EAGAIN) {
-               atomic_inc(&ioend->io_remaining);
-               xfs_finish_ioend(ioend);
-               /* ensure we don't spin on blocked ioends */
-               delay(1);
-       } else {
-               if (ioend->io_iocb)
-                       aio_complete(ioend->io_iocb, ioend->io_result, 0);
-               xfs_destroy_ioend(ioend);
-       }
-}
-
-/*
- * Call IO completion handling in caller context on the final put of an ioend.
- */
-STATIC void
-xfs_finish_ioend_sync(
-       struct xfs_ioend        *ioend)
-{
-       if (atomic_dec_and_test(&ioend->io_remaining))
-               xfs_end_io(&ioend->io_work);
-}
-
-/*
- * Allocate and initialise an IO completion structure.
- * We need to track unwritten extent write completion here initially.
- * We'll need to extend this for updating the ondisk inode size later
- * (vs. incore size).
- */
-STATIC xfs_ioend_t *
-xfs_alloc_ioend(
-       struct inode            *inode,
-       unsigned int            type)
-{
-       xfs_ioend_t             *ioend;
-
-       ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
-
-       /*
-        * Set the count to 1 initially, which will prevent an I/O
-        * completion callback from happening before we have started
-        * all the I/O from calling the completion routine too early.
-        */
-       atomic_set(&ioend->io_remaining, 1);
-       ioend->io_error = 0;
-       ioend->io_list = NULL;
-       ioend->io_type = type;
-       ioend->io_inode = inode;
-       ioend->io_buffer_head = NULL;
-       ioend->io_buffer_tail = NULL;
-       atomic_inc(&XFS_I(ioend->io_inode)->i_iocount);
-       ioend->io_offset = 0;
-       ioend->io_size = 0;
-       ioend->io_iocb = NULL;
-       ioend->io_result = 0;
-
-       INIT_WORK(&ioend->io_work, xfs_end_io);
-       return ioend;
-}
-
-STATIC int
-xfs_map_blocks(
-       struct inode            *inode,
-       loff_t                  offset,
-       struct xfs_bmbt_irec    *imap,
-       int                     type,
-       int                     nonblocking)
-{
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       ssize_t                 count = 1 << inode->i_blkbits;
-       xfs_fileoff_t           offset_fsb, end_fsb;
-       int                     error = 0;
-       int                     bmapi_flags = XFS_BMAPI_ENTIRE;
-       int                     nimaps = 1;
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -XFS_ERROR(EIO);
-
-       if (type == IO_UNWRITTEN)
-               bmapi_flags |= XFS_BMAPI_IGSTATE;
-
-       if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
-               if (nonblocking)
-                       return -XFS_ERROR(EAGAIN);
-               xfs_ilock(ip, XFS_ILOCK_SHARED);
-       }
-
-       ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
-              (ip->i_df.if_flags & XFS_IFEXTENTS));
-       ASSERT(offset <= mp->m_maxioffset);
-
-       if (offset + count > mp->m_maxioffset)
-               count = mp->m_maxioffset - offset;
-       end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
-       offset_fsb = XFS_B_TO_FSBT(mp, offset);
-       error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
-                         bmapi_flags,  NULL, 0, imap, &nimaps, NULL);
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
-       if (error)
-               return -XFS_ERROR(error);
-
-       if (type == IO_DELALLOC &&
-           (!nimaps || isnullstartblock(imap->br_startblock))) {
-               error = xfs_iomap_write_allocate(ip, offset, count, imap);
-               if (!error)
-                       trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
-               return -XFS_ERROR(error);
-       }
-
-#ifdef DEBUG
-       if (type == IO_UNWRITTEN) {
-               ASSERT(nimaps);
-               ASSERT(imap->br_startblock != HOLESTARTBLOCK);
-               ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
-       }
-#endif
-       if (nimaps)
-               trace_xfs_map_blocks_found(ip, offset, count, type, imap);
-       return 0;
-}
-
-STATIC int
-xfs_imap_valid(
-       struct inode            *inode,
-       struct xfs_bmbt_irec    *imap,
-       xfs_off_t               offset)
-{
-       offset >>= inode->i_blkbits;
-
-       return offset >= imap->br_startoff &&
-               offset < imap->br_startoff + imap->br_blockcount;
-}
-
-/*
- * BIO completion handler for buffered IO.
- */
-STATIC void
-xfs_end_bio(
-       struct bio              *bio,
-       int                     error)
-{
-       xfs_ioend_t             *ioend = bio->bi_private;
-
-       ASSERT(atomic_read(&bio->bi_cnt) >= 1);
-       ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error;
-
-       /* Toss bio and pass work off to an xfsdatad thread */
-       bio->bi_private = NULL;
-       bio->bi_end_io = NULL;
-       bio_put(bio);
-
-       xfs_finish_ioend(ioend);
-}
-
-STATIC void
-xfs_submit_ioend_bio(
-       struct writeback_control *wbc,
-       xfs_ioend_t             *ioend,
-       struct bio              *bio)
-{
-       atomic_inc(&ioend->io_remaining);
-       bio->bi_private = ioend;
-       bio->bi_end_io = xfs_end_bio;
-
-       /*
-        * If the I/O is beyond EOF we mark the inode dirty immediately
-        * but don't update the inode size until I/O completion.
-        */
-       if (xfs_ioend_new_eof(ioend))
-               xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
-
-       submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
-}
-
-STATIC struct bio *
-xfs_alloc_ioend_bio(
-       struct buffer_head      *bh)
-{
-       int                     nvecs = bio_get_nr_vecs(bh->b_bdev);
-       struct bio              *bio = bio_alloc(GFP_NOIO, nvecs);
-
-       ASSERT(bio->bi_private == NULL);
-       bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
-       bio->bi_bdev = bh->b_bdev;
-       return bio;
-}
-
-STATIC void
-xfs_start_buffer_writeback(
-       struct buffer_head      *bh)
-{
-       ASSERT(buffer_mapped(bh));
-       ASSERT(buffer_locked(bh));
-       ASSERT(!buffer_delay(bh));
-       ASSERT(!buffer_unwritten(bh));
-
-       mark_buffer_async_write(bh);
-       set_buffer_uptodate(bh);
-       clear_buffer_dirty(bh);
-}
-
-STATIC void
-xfs_start_page_writeback(
-       struct page             *page,
-       int                     clear_dirty,
-       int                     buffers)
-{
-       ASSERT(PageLocked(page));
-       ASSERT(!PageWriteback(page));
-       if (clear_dirty)
-               clear_page_dirty_for_io(page);
-       set_page_writeback(page);
-       unlock_page(page);
-       /* If no buffers on the page are to be written, finish it here */
-       if (!buffers)
-               end_page_writeback(page);
-}
-
-static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
-{
-       return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
-}
-
-/*
- * Submit all of the bios for all of the ioends we have saved up, covering the
- * initial writepage page and also any probed pages.
- *
- * Because we may have multiple ioends spanning a page, we need to start
- * writeback on all the buffers before we submit them for I/O. If we mark the
- * buffers as we got, then we can end up with a page that only has buffers
- * marked async write and I/O complete on can occur before we mark the other
- * buffers async write.
- *
- * The end result of this is that we trip a bug in end_page_writeback() because
- * we call it twice for the one page as the code in end_buffer_async_write()
- * assumes that all buffers on the page are started at the same time.
- *
- * The fix is two passes across the ioend list - one to start writeback on the
- * buffer_heads, and then submit them for I/O on the second pass.
- */
-STATIC void
-xfs_submit_ioend(
-       struct writeback_control *wbc,
-       xfs_ioend_t             *ioend)
-{
-       xfs_ioend_t             *head = ioend;
-       xfs_ioend_t             *next;
-       struct buffer_head      *bh;
-       struct bio              *bio;
-       sector_t                lastblock = 0;
-
-       /* Pass 1 - start writeback */
-       do {
-               next = ioend->io_list;
-               for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
-                       xfs_start_buffer_writeback(bh);
-       } while ((ioend = next) != NULL);
-
-       /* Pass 2 - submit I/O */
-       ioend = head;
-       do {
-               next = ioend->io_list;
-               bio = NULL;
-
-               for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
-
-                       if (!bio) {
- retry:
-                               bio = xfs_alloc_ioend_bio(bh);
-                       } else if (bh->b_blocknr != lastblock + 1) {
-                               xfs_submit_ioend_bio(wbc, ioend, bio);
-                               goto retry;
-                       }
-
-                       if (bio_add_buffer(bio, bh) != bh->b_size) {
-                               xfs_submit_ioend_bio(wbc, ioend, bio);
-                               goto retry;
-                       }
-
-                       lastblock = bh->b_blocknr;
-               }
-               if (bio)
-                       xfs_submit_ioend_bio(wbc, ioend, bio);
-               xfs_finish_ioend(ioend);
-       } while ((ioend = next) != NULL);
-}
-
-/*
- * Cancel submission of all buffer_heads so far in this endio.
- * Toss the endio too.  Only ever called for the initial page
- * in a writepage request, so only ever one page.
- */
-STATIC void
-xfs_cancel_ioend(
-       xfs_ioend_t             *ioend)
-{
-       xfs_ioend_t             *next;
-       struct buffer_head      *bh, *next_bh;
-
-       do {
-               next = ioend->io_list;
-               bh = ioend->io_buffer_head;
-               do {
-                       next_bh = bh->b_private;
-                       clear_buffer_async_write(bh);
-                       unlock_buffer(bh);
-               } while ((bh = next_bh) != NULL);
-
-               xfs_ioend_wake(XFS_I(ioend->io_inode));
-               mempool_free(ioend, xfs_ioend_pool);
-       } while ((ioend = next) != NULL);
-}
-
-/*
- * Test to see if we've been building up a completion structure for
- * earlier buffers -- if so, we try to append to this ioend if we
- * can, otherwise we finish off any current ioend and start another.
- * Return true if we've finished the given ioend.
- */
-STATIC void
-xfs_add_to_ioend(
-       struct inode            *inode,
-       struct buffer_head      *bh,
-       xfs_off_t               offset,
-       unsigned int            type,
-       xfs_ioend_t             **result,
-       int                     need_ioend)
-{
-       xfs_ioend_t             *ioend = *result;
-
-       if (!ioend || need_ioend || type != ioend->io_type) {
-               xfs_ioend_t     *previous = *result;
-
-               ioend = xfs_alloc_ioend(inode, type);
-               ioend->io_offset = offset;
-               ioend->io_buffer_head = bh;
-               ioend->io_buffer_tail = bh;
-               if (previous)
-                       previous->io_list = ioend;
-               *result = ioend;
-       } else {
-               ioend->io_buffer_tail->b_private = bh;
-               ioend->io_buffer_tail = bh;
-       }
-
-       bh->b_private = NULL;
-       ioend->io_size += bh->b_size;
-}
-
-STATIC void
-xfs_map_buffer(
-       struct inode            *inode,
-       struct buffer_head      *bh,
-       struct xfs_bmbt_irec    *imap,
-       xfs_off_t               offset)
-{
-       sector_t                bn;
-       struct xfs_mount        *m = XFS_I(inode)->i_mount;
-       xfs_off_t               iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff);
-       xfs_daddr_t             iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock);
-
-       ASSERT(imap->br_startblock != HOLESTARTBLOCK);
-       ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
-
-       bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
-             ((offset - iomap_offset) >> inode->i_blkbits);
-
-       ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
-
-       bh->b_blocknr = bn;
-       set_buffer_mapped(bh);
-}
-
-STATIC void
-xfs_map_at_offset(
-       struct inode            *inode,
-       struct buffer_head      *bh,
-       struct xfs_bmbt_irec    *imap,
-       xfs_off_t               offset)
-{
-       ASSERT(imap->br_startblock != HOLESTARTBLOCK);
-       ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
-
-       xfs_map_buffer(inode, bh, imap, offset);
-       set_buffer_mapped(bh);
-       clear_buffer_delay(bh);
-       clear_buffer_unwritten(bh);
-}
-
-/*
- * Test if a given page is suitable for writing as part of an unwritten
- * or delayed allocate extent.
- */
-STATIC int
-xfs_is_delayed_page(
-       struct page             *page,
-       unsigned int            type)
-{
-       if (PageWriteback(page))
-               return 0;
-
-       if (page->mapping && page_has_buffers(page)) {
-               struct buffer_head      *bh, *head;
-               int                     acceptable = 0;
-
-               bh = head = page_buffers(page);
-               do {
-                       if (buffer_unwritten(bh))
-                               acceptable = (type == IO_UNWRITTEN);
-                       else if (buffer_delay(bh))
-                               acceptable = (type == IO_DELALLOC);
-                       else if (buffer_dirty(bh) && buffer_mapped(bh))
-                               acceptable = (type == IO_OVERWRITE);
-                       else
-                               break;
-               } while ((bh = bh->b_this_page) != head);
-
-               if (acceptable)
-                       return 1;
-       }
-
-       return 0;
-}
-
-/*
- * Allocate & map buffers for page given the extent map. Write it out.
- * except for the original page of a writepage, this is called on
- * delalloc/unwritten pages only, for the original page it is possible
- * that the page has no mapping at all.
- */
-STATIC int
-xfs_convert_page(
-       struct inode            *inode,
-       struct page             *page,
-       loff_t                  tindex,
-       struct xfs_bmbt_irec    *imap,
-       xfs_ioend_t             **ioendp,
-       struct writeback_control *wbc)
-{
-       struct buffer_head      *bh, *head;
-       xfs_off_t               end_offset;
-       unsigned long           p_offset;
-       unsigned int            type;
-       int                     len, page_dirty;
-       int                     count = 0, done = 0, uptodate = 1;
-       xfs_off_t               offset = page_offset(page);
-
-       if (page->index != tindex)
-               goto fail;
-       if (!trylock_page(page))
-               goto fail;
-       if (PageWriteback(page))
-               goto fail_unlock_page;
-       if (page->mapping != inode->i_mapping)
-               goto fail_unlock_page;
-       if (!xfs_is_delayed_page(page, (*ioendp)->io_type))
-               goto fail_unlock_page;
-
-       /*
-        * page_dirty is initially a count of buffers on the page before
-        * EOF and is decremented as we move each into a cleanable state.
-        *
-        * Derivation:
-        *
-        * End offset is the highest offset that this page should represent.
-        * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
-        * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
-        * hence give us the correct page_dirty count. On any other page,
-        * it will be zero and in that case we need page_dirty to be the
-        * count of buffers on the page.
-        */
-       end_offset = min_t(unsigned long long,
-                       (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
-                       i_size_read(inode));
-
-       len = 1 << inode->i_blkbits;
-       p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
-                                       PAGE_CACHE_SIZE);
-       p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
-       page_dirty = p_offset / len;
-
-       bh = head = page_buffers(page);
-       do {
-               if (offset >= end_offset)
-                       break;
-               if (!buffer_uptodate(bh))
-                       uptodate = 0;
-               if (!(PageUptodate(page) || buffer_uptodate(bh))) {
-                       done = 1;
-                       continue;
-               }
-
-               if (buffer_unwritten(bh) || buffer_delay(bh) ||
-                   buffer_mapped(bh)) {
-                       if (buffer_unwritten(bh))
-                               type = IO_UNWRITTEN;
-                       else if (buffer_delay(bh))
-                               type = IO_DELALLOC;
-                       else
-                               type = IO_OVERWRITE;
-
-                       if (!xfs_imap_valid(inode, imap, offset)) {
-                               done = 1;
-                               continue;
-                       }
-
-                       lock_buffer(bh);
-                       if (type != IO_OVERWRITE)
-                               xfs_map_at_offset(inode, bh, imap, offset);
-                       xfs_add_to_ioend(inode, bh, offset, type,
-                                        ioendp, done);
-
-                       page_dirty--;
-                       count++;
-               } else {
-                       done = 1;
-               }
-       } while (offset += len, (bh = bh->b_this_page) != head);
-
-       if (uptodate && bh == head)
-               SetPageUptodate(page);
-
-       if (count) {
-               if (--wbc->nr_to_write <= 0 &&
-                   wbc->sync_mode == WB_SYNC_NONE)
-                       done = 1;
-       }
-       xfs_start_page_writeback(page, !page_dirty, count);
-
-       return done;
- fail_unlock_page:
-       unlock_page(page);
- fail:
-       return 1;
-}
-
-/*
- * Convert & write out a cluster of pages in the same extent as defined
- * by mp and following the start page.
- */
-STATIC void
-xfs_cluster_write(
-       struct inode            *inode,
-       pgoff_t                 tindex,
-       struct xfs_bmbt_irec    *imap,
-       xfs_ioend_t             **ioendp,
-       struct writeback_control *wbc,
-       pgoff_t                 tlast)
-{
-       struct pagevec          pvec;
-       int                     done = 0, i;
-
-       pagevec_init(&pvec, 0);
-       while (!done && tindex <= tlast) {
-               unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
-
-               if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
-                       break;
-
-               for (i = 0; i < pagevec_count(&pvec); i++) {
-                       done = xfs_convert_page(inode, pvec.pages[i], tindex++,
-                                       imap, ioendp, wbc);
-                       if (done)
-                               break;
-               }
-
-               pagevec_release(&pvec);
-               cond_resched();
-       }
-}
-
-STATIC void
-xfs_vm_invalidatepage(
-       struct page             *page,
-       unsigned long           offset)
-{
-       trace_xfs_invalidatepage(page->mapping->host, page, offset);
-       block_invalidatepage(page, offset);
-}
-
-/*
- * If the page has delalloc buffers on it, we need to punch them out before we
- * invalidate the page. If we don't, we leave a stale delalloc mapping on the
- * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
- * is done on that same region - the delalloc extent is returned when none is
- * supposed to be there.
- *
- * We prevent this by truncating away the delalloc regions on the page before
- * invalidating it. Because they are delalloc, we can do this without needing a
- * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
- * truncation without a transaction as there is no space left for block
- * reservation (typically why we see a ENOSPC in writeback).
- *
- * This is not a performance critical path, so for now just do the punching a
- * buffer head at a time.
- */
-STATIC void
-xfs_aops_discard_page(
-       struct page             *page)
-{
-       struct inode            *inode = page->mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct buffer_head      *bh, *head;
-       loff_t                  offset = page_offset(page);
-
-       if (!xfs_is_delayed_page(page, IO_DELALLOC))
-               goto out_invalidate;
-
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               goto out_invalidate;
-
-       xfs_alert(ip->i_mount,
-               "page discard on page %p, inode 0x%llx, offset %llu.",
-                       page, ip->i_ino, offset);
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       bh = head = page_buffers(page);
-       do {
-               int             error;
-               xfs_fileoff_t   start_fsb;
-
-               if (!buffer_delay(bh))
-                       goto next_buffer;
-
-               start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
-               error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
-               if (error) {
-                       /* something screwed, just bail */
-                       if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-                               xfs_alert(ip->i_mount,
-                       "page discard unable to remove delalloc mapping.");
-                       }
-                       break;
-               }
-next_buffer:
-               offset += 1 << inode->i_blkbits;
-
-       } while ((bh = bh->b_this_page) != head);
-
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-out_invalidate:
-       xfs_vm_invalidatepage(page, 0);
-       return;
-}
-
-/*
- * Write out a dirty page.
- *
- * For delalloc space on the page we need to allocate space and flush it.
- * For unwritten space on the page we need to start the conversion to
- * regular allocated space.
- * For any other dirty buffer heads on the page we should flush them.
- */
-STATIC int
-xfs_vm_writepage(
-       struct page             *page,
-       struct writeback_control *wbc)
-{
-       struct inode            *inode = page->mapping->host;
-       struct buffer_head      *bh, *head;
-       struct xfs_bmbt_irec    imap;
-       xfs_ioend_t             *ioend = NULL, *iohead = NULL;
-       loff_t                  offset;
-       unsigned int            type;
-       __uint64_t              end_offset;
-       pgoff_t                 end_index, last_index;
-       ssize_t                 len;
-       int                     err, imap_valid = 0, uptodate = 1;
-       int                     count = 0;
-       int                     nonblocking = 0;
-
-       trace_xfs_writepage(inode, page, 0);
-
-       ASSERT(page_has_buffers(page));
-
-       /*
-        * Refuse to write the page out if we are called from reclaim context.
-        *
-        * This avoids stack overflows when called from deeply used stacks in
-        * random callers for direct reclaim or memcg reclaim.  We explicitly
-        * allow reclaim from kswapd as the stack usage there is relatively low.
-        *
-        * This should really be done by the core VM, but until that happens
-        * filesystems like XFS, btrfs and ext4 have to take care of this
-        * by themselves.
-        */
-       if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC)
-               goto redirty;
-
-       /*
-        * Given that we do not allow direct reclaim to call us, we should
-        * never be called while in a filesystem transaction.
-        */
-       if (WARN_ON(current->flags & PF_FSTRANS))
-               goto redirty;
-
-       /* Is this page beyond the end of the file? */
-       offset = i_size_read(inode);
-       end_index = offset >> PAGE_CACHE_SHIFT;
-       last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
-       if (page->index >= end_index) {
-               if ((page->index >= end_index + 1) ||
-                   !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
-                       unlock_page(page);
-                       return 0;
-               }
-       }
-
-       end_offset = min_t(unsigned long long,
-                       (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
-                       offset);
-       len = 1 << inode->i_blkbits;
-
-       bh = head = page_buffers(page);
-       offset = page_offset(page);
-       type = IO_OVERWRITE;
-
-       if (wbc->sync_mode == WB_SYNC_NONE)
-               nonblocking = 1;
-
-       do {
-               int new_ioend = 0;
-
-               if (offset >= end_offset)
-                       break;
-               if (!buffer_uptodate(bh))
-                       uptodate = 0;
-
-               /*
-                * set_page_dirty dirties all buffers in a page, independent
-                * of their state.  The dirty state however is entirely
-                * meaningless for holes (!mapped && uptodate), so skip
-                * buffers covering holes here.
-                */
-               if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
-                       imap_valid = 0;
-                       continue;
-               }
-
-               if (buffer_unwritten(bh)) {
-                       if (type != IO_UNWRITTEN) {
-                               type = IO_UNWRITTEN;
-                               imap_valid = 0;
-                       }
-               } else if (buffer_delay(bh)) {
-                       if (type != IO_DELALLOC) {
-                               type = IO_DELALLOC;
-                               imap_valid = 0;
-                       }
-               } else if (buffer_uptodate(bh)) {
-                       if (type != IO_OVERWRITE) {
-                               type = IO_OVERWRITE;
-                               imap_valid = 0;
-                       }
-               } else {
-                       if (PageUptodate(page)) {
-                               ASSERT(buffer_mapped(bh));
-                               imap_valid = 0;
-                       }
-                       continue;
-               }
-
-               if (imap_valid)
-                       imap_valid = xfs_imap_valid(inode, &imap, offset);
-               if (!imap_valid) {
-                       /*
-                        * If we didn't have a valid mapping then we need to
-                        * put the new mapping into a separate ioend structure.
-                        * This ensures non-contiguous extents always have
-                        * separate ioends, which is particularly important
-                        * for unwritten extent conversion at I/O completion
-                        * time.
-                        */
-                       new_ioend = 1;
-                       err = xfs_map_blocks(inode, offset, &imap, type,
-                                            nonblocking);
-                       if (err)
-                               goto error;
-                       imap_valid = xfs_imap_valid(inode, &imap, offset);
-               }
-               if (imap_valid) {
-                       lock_buffer(bh);
-                       if (type != IO_OVERWRITE)
-                               xfs_map_at_offset(inode, bh, &imap, offset);
-                       xfs_add_to_ioend(inode, bh, offset, type, &ioend,
-                                        new_ioend);
-                       count++;
-               }
-
-               if (!iohead)
-                       iohead = ioend;
-
-       } while (offset += len, ((bh = bh->b_this_page) != head));
-
-       if (uptodate && bh == head)
-               SetPageUptodate(page);
-
-       xfs_start_page_writeback(page, 1, count);
-
-       if (ioend && imap_valid) {
-               xfs_off_t               end_index;
-
-               end_index = imap.br_startoff + imap.br_blockcount;
-
-               /* to bytes */
-               end_index <<= inode->i_blkbits;
-
-               /* to pages */
-               end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
-
-               /* check against file size */
-               if (end_index > last_index)
-                       end_index = last_index;
-
-               xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
-                                 wbc, end_index);
-       }
-
-       if (iohead)
-               xfs_submit_ioend(wbc, iohead);
-
-       return 0;
-
-error:
-       if (iohead)
-               xfs_cancel_ioend(iohead);
-
-       if (err == -EAGAIN)
-               goto redirty;
-
-       xfs_aops_discard_page(page);
-       ClearPageUptodate(page);
-       unlock_page(page);
-       return err;
-
-redirty:
-       redirty_page_for_writepage(wbc, page);
-       unlock_page(page);
-       return 0;
-}
-
-STATIC int
-xfs_vm_writepages(
-       struct address_space    *mapping,
-       struct writeback_control *wbc)
-{
-       xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
-       return generic_writepages(mapping, wbc);
-}
-
-/*
- * Called to move a page into cleanable state - and from there
- * to be released. The page should already be clean. We always
- * have buffer heads in this call.
- *
- * Returns 1 if the page is ok to release, 0 otherwise.
- */
-STATIC int
-xfs_vm_releasepage(
-       struct page             *page,
-       gfp_t                   gfp_mask)
-{
-       int                     delalloc, unwritten;
-
-       trace_xfs_releasepage(page->mapping->host, page, 0);
-
-       xfs_count_page_state(page, &delalloc, &unwritten);
-
-       if (WARN_ON(delalloc))
-               return 0;
-       if (WARN_ON(unwritten))
-               return 0;
-
-       return try_to_free_buffers(page);
-}
-
-STATIC int
-__xfs_get_blocks(
-       struct inode            *inode,
-       sector_t                iblock,
-       struct buffer_head      *bh_result,
-       int                     create,
-       int                     direct)
-{
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       xfs_fileoff_t           offset_fsb, end_fsb;
-       int                     error = 0;
-       int                     lockmode = 0;
-       struct xfs_bmbt_irec    imap;
-       int                     nimaps = 1;
-       xfs_off_t               offset;
-       ssize_t                 size;
-       int                     new = 0;
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -XFS_ERROR(EIO);
-
-       offset = (xfs_off_t)iblock << inode->i_blkbits;
-       ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
-       size = bh_result->b_size;
-
-       if (!create && direct && offset >= i_size_read(inode))
-               return 0;
-
-       if (create) {
-               lockmode = XFS_ILOCK_EXCL;
-               xfs_ilock(ip, lockmode);
-       } else {
-               lockmode = xfs_ilock_map_shared(ip);
-       }
-
-       ASSERT(offset <= mp->m_maxioffset);
-       if (offset + size > mp->m_maxioffset)
-               size = mp->m_maxioffset - offset;
-       end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
-       offset_fsb = XFS_B_TO_FSBT(mp, offset);
-
-       error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
-                         XFS_BMAPI_ENTIRE,  NULL, 0, &imap, &nimaps, NULL);
-       if (error)
-               goto out_unlock;
-
-       if (create &&
-           (!nimaps ||
-            (imap.br_startblock == HOLESTARTBLOCK ||
-             imap.br_startblock == DELAYSTARTBLOCK))) {
-               if (direct) {
-                       error = xfs_iomap_write_direct(ip, offset, size,
-                                                      &imap, nimaps);
-               } else {
-                       error = xfs_iomap_write_delay(ip, offset, size, &imap);
-               }
-               if (error)
-                       goto out_unlock;
-
-               trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap);
-       } else if (nimaps) {
-               trace_xfs_get_blocks_found(ip, offset, size, 0, &imap);
-       } else {
-               trace_xfs_get_blocks_notfound(ip, offset, size);
-               goto out_unlock;
-       }
-       xfs_iunlock(ip, lockmode);
-
-       if (imap.br_startblock != HOLESTARTBLOCK &&
-           imap.br_startblock != DELAYSTARTBLOCK) {
-               /*
-                * For unwritten extents do not report a disk address on
-                * the read case (treat as if we're reading into a hole).
-                */
-               if (create || !ISUNWRITTEN(&imap))
-                       xfs_map_buffer(inode, bh_result, &imap, offset);
-               if (create && ISUNWRITTEN(&imap)) {
-                       if (direct)
-                               bh_result->b_private = inode;
-                       set_buffer_unwritten(bh_result);
-               }
-       }
-
-       /*
-        * If this is a realtime file, data may be on a different device.
-        * to that pointed to from the buffer_head b_bdev currently.
-        */
-       bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
-
-       /*
-        * If we previously allocated a block out beyond eof and we are now
-        * coming back to use it then we will need to flag it as new even if it
-        * has a disk address.
-        *
-        * With sub-block writes into unwritten extents we also need to mark
-        * the buffer as new so that the unwritten parts of the buffer gets
-        * correctly zeroed.
-        */
-       if (create &&
-           ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
-            (offset >= i_size_read(inode)) ||
-            (new || ISUNWRITTEN(&imap))))
-               set_buffer_new(bh_result);
-
-       if (imap.br_startblock == DELAYSTARTBLOCK) {
-               BUG_ON(direct);
-               if (create) {
-                       set_buffer_uptodate(bh_result);
-                       set_buffer_mapped(bh_result);
-                       set_buffer_delay(bh_result);
-               }
-       }
-
-       /*
-        * If this is O_DIRECT or the mpage code calling tell them how large
-        * the mapping is, so that we can avoid repeated get_blocks calls.
-        */
-       if (direct || size > (1 << inode->i_blkbits)) {
-               xfs_off_t               mapping_size;
-
-               mapping_size = imap.br_startoff + imap.br_blockcount - iblock;
-               mapping_size <<= inode->i_blkbits;
-
-               ASSERT(mapping_size > 0);
-               if (mapping_size > size)
-                       mapping_size = size;
-               if (mapping_size > LONG_MAX)
-                       mapping_size = LONG_MAX;
-
-               bh_result->b_size = mapping_size;
-       }
-
-       return 0;
-
-out_unlock:
-       xfs_iunlock(ip, lockmode);
-       return -error;
-}
-
-int
-xfs_get_blocks(
-       struct inode            *inode,
-       sector_t                iblock,
-       struct buffer_head      *bh_result,
-       int                     create)
-{
-       return __xfs_get_blocks(inode, iblock, bh_result, create, 0);
-}
-
-STATIC int
-xfs_get_blocks_direct(
-       struct inode            *inode,
-       sector_t                iblock,
-       struct buffer_head      *bh_result,
-       int                     create)
-{
-       return __xfs_get_blocks(inode, iblock, bh_result, create, 1);
-}
-
-/*
- * Complete a direct I/O write request.
- *
- * If the private argument is non-NULL __xfs_get_blocks signals us that we
- * need to issue a transaction to convert the range from unwritten to written
- * extents.  In case this is regular synchronous I/O we just call xfs_end_io
- * to do this and we are done.  But in case this was a successful AIO
- * request this handler is called from interrupt context, from which we
- * can't start transactions.  In that case offload the I/O completion to
- * the workqueues we also use for buffered I/O completion.
- */
-STATIC void
-xfs_end_io_direct_write(
-       struct kiocb            *iocb,
-       loff_t                  offset,
-       ssize_t                 size,
-       void                    *private,
-       int                     ret,
-       bool                    is_async)
-{
-       struct xfs_ioend        *ioend = iocb->private;
-
-       /*
-        * blockdev_direct_IO can return an error even after the I/O
-        * completion handler was called.  Thus we need to protect
-        * against double-freeing.
-        */
-       iocb->private = NULL;
-
-       ioend->io_offset = offset;
-       ioend->io_size = size;
-       if (private && size > 0)
-               ioend->io_type = IO_UNWRITTEN;
-
-       if (is_async) {
-               /*
-                * If we are converting an unwritten extent we need to delay
-                * the AIO completion until after the unwrittent extent
-                * conversion has completed, otherwise do it ASAP.
-                */
-               if (ioend->io_type == IO_UNWRITTEN) {
-                       ioend->io_iocb = iocb;
-                       ioend->io_result = ret;
-               } else {
-                       aio_complete(iocb, ret, 0);
-               }
-               xfs_finish_ioend(ioend);
-       } else {
-               xfs_finish_ioend_sync(ioend);
-       }
-
-       /* XXX: probably should move into the real I/O completion handler */
-       inode_dio_done(ioend->io_inode);
-}
-
-STATIC ssize_t
-xfs_vm_direct_IO(
-       int                     rw,
-       struct kiocb            *iocb,
-       const struct iovec      *iov,
-       loff_t                  offset,
-       unsigned long           nr_segs)
-{
-       struct inode            *inode = iocb->ki_filp->f_mapping->host;
-       struct block_device     *bdev = xfs_find_bdev_for_inode(inode);
-       ssize_t                 ret;
-
-       if (rw & WRITE) {
-               iocb->private = xfs_alloc_ioend(inode, IO_DIRECT);
-
-               ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
-                                           offset, nr_segs,
-                                           xfs_get_blocks_direct,
-                                           xfs_end_io_direct_write, NULL, 0);
-               if (ret != -EIOCBQUEUED && iocb->private)
-                       xfs_destroy_ioend(iocb->private);
-       } else {
-               ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
-                                           offset, nr_segs,
-                                           xfs_get_blocks_direct,
-                                           NULL, NULL, 0);
-       }
-
-       return ret;
-}
-
-STATIC void
-xfs_vm_write_failed(
-       struct address_space    *mapping,
-       loff_t                  to)
-{
-       struct inode            *inode = mapping->host;
-
-       if (to > inode->i_size) {
-               /*
-                * punch out the delalloc blocks we have already allocated. We
-                * don't call xfs_setattr() to do this as we may be in the
-                * middle of a multi-iovec write and so the vfs inode->i_size
-                * will not match the xfs ip->i_size and so it will zero too
-                * much. Hence we jus truncate the page cache to zero what is
-                * necessary and punch the delalloc blocks directly.
-                */
-               struct xfs_inode        *ip = XFS_I(inode);
-               xfs_fileoff_t           start_fsb;
-               xfs_fileoff_t           end_fsb;
-               int                     error;
-
-               truncate_pagecache(inode, to, inode->i_size);
-
-               /*
-                * Check if there are any blocks that are outside of i_size
-                * that need to be trimmed back.
-                */
-               start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1;
-               end_fsb = XFS_B_TO_FSB(ip->i_mount, to);
-               if (end_fsb <= start_fsb)
-                       return;
-
-               xfs_ilock(ip, XFS_ILOCK_EXCL);
-               error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
-                                                       end_fsb - start_fsb);
-               if (error) {
-                       /* something screwed, just bail */
-                       if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-                               xfs_alert(ip->i_mount,
-                       "xfs_vm_write_failed: unable to clean up ino %lld",
-                                               ip->i_ino);
-                       }
-               }
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       }
-}
-
-STATIC int
-xfs_vm_write_begin(
-       struct file             *file,
-       struct address_space    *mapping,
-       loff_t                  pos,
-       unsigned                len,
-       unsigned                flags,
-       struct page             **pagep,
-       void                    **fsdata)
-{
-       int                     ret;
-
-       ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS,
-                               pagep, xfs_get_blocks);
-       if (unlikely(ret))
-               xfs_vm_write_failed(mapping, pos + len);
-       return ret;
-}
-
-STATIC int
-xfs_vm_write_end(
-       struct file             *file,
-       struct address_space    *mapping,
-       loff_t                  pos,
-       unsigned                len,
-       unsigned                copied,
-       struct page             *page,
-       void                    *fsdata)
-{
-       int                     ret;
-
-       ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
-       if (unlikely(ret < len))
-               xfs_vm_write_failed(mapping, pos + len);
-       return ret;
-}
-
-STATIC sector_t
-xfs_vm_bmap(
-       struct address_space    *mapping,
-       sector_t                block)
-{
-       struct inode            *inode = (struct inode *)mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-
-       trace_xfs_vm_bmap(XFS_I(inode));
-       xfs_ilock(ip, XFS_IOLOCK_SHARED);
-       xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF);
-       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-       return generic_block_bmap(mapping, block, xfs_get_blocks);
-}
-
-STATIC int
-xfs_vm_readpage(
-       struct file             *unused,
-       struct page             *page)
-{
-       return mpage_readpage(page, xfs_get_blocks);
-}
-
-STATIC int
-xfs_vm_readpages(
-       struct file             *unused,
-       struct address_space    *mapping,
-       struct list_head        *pages,
-       unsigned                nr_pages)
-{
-       return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
-}
-
-const struct address_space_operations xfs_address_space_operations = {
-       .readpage               = xfs_vm_readpage,
-       .readpages              = xfs_vm_readpages,
-       .writepage              = xfs_vm_writepage,
-       .writepages             = xfs_vm_writepages,
-       .releasepage            = xfs_vm_releasepage,
-       .invalidatepage         = xfs_vm_invalidatepage,
-       .write_begin            = xfs_vm_write_begin,
-       .write_end              = xfs_vm_write_end,
-       .bmap                   = xfs_vm_bmap,
-       .direct_IO              = xfs_vm_direct_IO,
-       .migratepage            = buffer_migrate_page,
-       .is_partially_uptodate  = block_is_partially_uptodate,
-       .error_remove_page      = generic_error_remove_page,
-};
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h

deleted file mode 100644 (file)

index 71f721e..0000000
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2005-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_AOPS_H__
-#define __XFS_AOPS_H__
-
-extern struct workqueue_struct *xfsdatad_workqueue;
-extern struct workqueue_struct *xfsconvertd_workqueue;
-extern mempool_t *xfs_ioend_pool;
-
-/*
- * Types of I/O for bmap clustering and I/O completion tracking.
- */
-enum {
-       IO_DIRECT = 0,  /* special case for direct I/O ioends */
-       IO_DELALLOC,    /* mapping covers delalloc region */
-       IO_UNWRITTEN,   /* mapping covers allocated but uninitialized data */
-       IO_OVERWRITE,   /* mapping covers already allocated extent */
-};
-
-#define XFS_IO_TYPES \
-       { 0,                    "" }, \
-       { IO_DELALLOC,          "delalloc" }, \
-       { IO_UNWRITTEN,         "unwritten" }, \
-       { IO_OVERWRITE,         "overwrite" }
-
-/*
- * xfs_ioend struct manages large extent writes for XFS.
- * It can manage several multi-page bio's at once.
- */
-typedef struct xfs_ioend {
-       struct xfs_ioend        *io_list;       /* next ioend in chain */
-       unsigned int            io_type;        /* delalloc / unwritten */
-       int                     io_error;       /* I/O error code */
-       atomic_t                io_remaining;   /* hold count */
-       struct inode            *io_inode;      /* file being written to */
-       struct buffer_head      *io_buffer_head;/* buffer linked list head */
-       struct buffer_head      *io_buffer_tail;/* buffer linked list tail */
-       size_t                  io_size;        /* size of the extent */
-       xfs_off_t               io_offset;      /* offset in the file */
-       struct work_struct      io_work;        /* xfsdatad work queue */
-       struct kiocb            *io_iocb;
-       int                     io_result;
-} xfs_ioend_t;
-
-extern const struct address_space_operations xfs_address_space_operations;
-extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
-
-extern void xfs_ioend_init(void);
-extern void xfs_ioend_wait(struct xfs_inode *);
-
-extern void xfs_count_page_state(struct page *, int *, int *);
-
-#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c

deleted file mode 100644 (file)

index d1fe745..0000000
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ /dev/null
@@ -1,1877 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include <linux/stddef.h>
-#include <linux/errno.h>
-#include <linux/gfp.h>
-#include <linux/pagemap.h>
-#include <linux/init.h>
-#include <linux/vmalloc.h>
-#include <linux/bio.h>
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include <linux/workqueue.h>
-#include <linux/percpu.h>
-#include <linux/blkdev.h>
-#include <linux/hash.h>
-#include <linux/kthread.h>
-#include <linux/migrate.h>
-#include <linux/backing-dev.h>
-#include <linux/freezer.h>
-
-#include "xfs_sb.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_trace.h"
-
-static kmem_zone_t *xfs_buf_zone;
-STATIC int xfsbufd(void *);
-STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
-
-static struct workqueue_struct *xfslogd_workqueue;
-struct workqueue_struct *xfsdatad_workqueue;
-struct workqueue_struct *xfsconvertd_workqueue;
-
-#ifdef XFS_BUF_LOCK_TRACKING
-# define XB_SET_OWNER(bp)      ((bp)->b_last_holder = current->pid)
-# define XB_CLEAR_OWNER(bp)    ((bp)->b_last_holder = -1)
-# define XB_GET_OWNER(bp)      ((bp)->b_last_holder)
-#else
-# define XB_SET_OWNER(bp)      do { } while (0)
-# define XB_CLEAR_OWNER(bp)    do { } while (0)
-# define XB_GET_OWNER(bp)      do { } while (0)
-#endif
-
-#define xb_to_gfp(flags) \
-       ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \
-         ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
-
-#define xb_to_km(flags) \
-        (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
-
-#define xfs_buf_allocate(flags) \
-       kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags))
-#define xfs_buf_deallocate(bp) \
-       kmem_zone_free(xfs_buf_zone, (bp));
-
-static inline int
-xfs_buf_is_vmapped(
-       struct xfs_buf  *bp)
-{
-       /*
-        * Return true if the buffer is vmapped.
-        *
-        * The XBF_MAPPED flag is set if the buffer should be mapped, but the
-        * code is clever enough to know it doesn't have to map a single page,
-        * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1.
-        */
-       return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1;
-}
-
-static inline int
-xfs_buf_vmap_len(
-       struct xfs_buf  *bp)
-{
-       return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
-}
-
-/*
- * xfs_buf_lru_add - add a buffer to the LRU.
- *
- * The LRU takes a new reference to the buffer so that it will only be freed
- * once the shrinker takes the buffer off the LRU.
- */
-STATIC void
-xfs_buf_lru_add(
-       struct xfs_buf  *bp)
-{
-       struct xfs_buftarg *btp = bp->b_target;
-
-       spin_lock(&btp->bt_lru_lock);
-       if (list_empty(&bp->b_lru)) {
-               atomic_inc(&bp->b_hold);
-               list_add_tail(&bp->b_lru, &btp->bt_lru);
-               btp->bt_lru_nr++;
-       }
-       spin_unlock(&btp->bt_lru_lock);
-}
-
-/*
- * xfs_buf_lru_del - remove a buffer from the LRU
- *
- * The unlocked check is safe here because it only occurs when there are not
- * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
- * to optimise the shrinker removing the buffer from the LRU and calling
- * xfs_buf_free(). i.e. it removes an unnecessary round trip on the
- * bt_lru_lock.
- */
-STATIC void
-xfs_buf_lru_del(
-       struct xfs_buf  *bp)
-{
-       struct xfs_buftarg *btp = bp->b_target;
-
-       if (list_empty(&bp->b_lru))
-               return;
-
-       spin_lock(&btp->bt_lru_lock);
-       if (!list_empty(&bp->b_lru)) {
-               list_del_init(&bp->b_lru);
-               btp->bt_lru_nr--;
-       }
-       spin_unlock(&btp->bt_lru_lock);
-}
-
-/*
- * When we mark a buffer stale, we remove the buffer from the LRU and clear the
- * b_lru_ref count so that the buffer is freed immediately when the buffer
- * reference count falls to zero. If the buffer is already on the LRU, we need
- * to remove the reference that LRU holds on the buffer.
- *
- * This prevents build-up of stale buffers on the LRU.
- */
-void
-xfs_buf_stale(
-       struct xfs_buf  *bp)
-{
-       bp->b_flags |= XBF_STALE;
-       atomic_set(&(bp)->b_lru_ref, 0);
-       if (!list_empty(&bp->b_lru)) {
-               struct xfs_buftarg *btp = bp->b_target;
-
-               spin_lock(&btp->bt_lru_lock);
-               if (!list_empty(&bp->b_lru)) {
-                       list_del_init(&bp->b_lru);
-                       btp->bt_lru_nr--;
-                       atomic_dec(&bp->b_hold);
-               }
-               spin_unlock(&btp->bt_lru_lock);
-       }
-       ASSERT(atomic_read(&bp->b_hold) >= 1);
-}
-
-STATIC void
-_xfs_buf_initialize(
-       xfs_buf_t               *bp,
-       xfs_buftarg_t           *target,
-       xfs_off_t               range_base,
-       size_t                  range_length,
-       xfs_buf_flags_t         flags)
-{
-       /*
-        * We don't want certain flags to appear in b_flags.
-        */
-       flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD);
-
-       memset(bp, 0, sizeof(xfs_buf_t));
-       atomic_set(&bp->b_hold, 1);
-       atomic_set(&bp->b_lru_ref, 1);
-       init_completion(&bp->b_iowait);
-       INIT_LIST_HEAD(&bp->b_lru);
-       INIT_LIST_HEAD(&bp->b_list);
-       RB_CLEAR_NODE(&bp->b_rbnode);
-       sema_init(&bp->b_sema, 0); /* held, no waiters */
-       XB_SET_OWNER(bp);
-       bp->b_target = target;
-       bp->b_file_offset = range_base;
-       /*
-        * Set buffer_length and count_desired to the same value initially.
-        * I/O routines should use count_desired, which will be the same in
-        * most cases but may be reset (e.g. XFS recovery).
-        */
-       bp->b_buffer_length = bp->b_count_desired = range_length;
-       bp->b_flags = flags;
-       bp->b_bn = XFS_BUF_DADDR_NULL;
-       atomic_set(&bp->b_pin_count, 0);
-       init_waitqueue_head(&bp->b_waiters);
-
-       XFS_STATS_INC(xb_create);
-
-       trace_xfs_buf_init(bp, _RET_IP_);
-}
-
-/*
- *     Allocate a page array capable of holding a specified number
- *     of pages, and point the page buf at it.
- */
-STATIC int
-_xfs_buf_get_pages(
-       xfs_buf_t               *bp,
-       int                     page_count,
-       xfs_buf_flags_t         flags)
-{
-       /* Make sure that we have a page list */
-       if (bp->b_pages == NULL) {
-               bp->b_offset = xfs_buf_poff(bp->b_file_offset);
-               bp->b_page_count = page_count;
-               if (page_count <= XB_PAGES) {
-                       bp->b_pages = bp->b_page_array;
-               } else {
-                       bp->b_pages = kmem_alloc(sizeof(struct page *) *
-                                       page_count, xb_to_km(flags));
-                       if (bp->b_pages == NULL)
-                               return -ENOMEM;
-               }
-               memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
-       }
-       return 0;
-}
-
-/*
- *     Frees b_pages if it was allocated.
- */
-STATIC void
-_xfs_buf_free_pages(
-       xfs_buf_t       *bp)
-{
-       if (bp->b_pages != bp->b_page_array) {
-               kmem_free(bp->b_pages);
-               bp->b_pages = NULL;
-       }
-}
-
-/*
- *     Releases the specified buffer.
- *
- *     The modification state of any associated pages is left unchanged.
- *     The buffer most not be on any hash - use xfs_buf_rele instead for
- *     hashed and refcounted buffers
- */
-void
-xfs_buf_free(
-       xfs_buf_t               *bp)
-{
-       trace_xfs_buf_free(bp, _RET_IP_);
-
-       ASSERT(list_empty(&bp->b_lru));
-
-       if (bp->b_flags & _XBF_PAGES) {
-               uint            i;
-
-               if (xfs_buf_is_vmapped(bp))
-                       vm_unmap_ram(bp->b_addr - bp->b_offset,
-                                       bp->b_page_count);
-
-               for (i = 0; i < bp->b_page_count; i++) {
-                       struct page     *page = bp->b_pages[i];
-
-                       __free_page(page);
-               }
-       } else if (bp->b_flags & _XBF_KMEM)
-               kmem_free(bp->b_addr);
-       _xfs_buf_free_pages(bp);
-       xfs_buf_deallocate(bp);
-}
-
-/*
- * Allocates all the pages for buffer in question and builds it's page list.
- */
-STATIC int
-xfs_buf_allocate_memory(
-       xfs_buf_t               *bp,
-       uint                    flags)
-{
-       size_t                  size = bp->b_count_desired;
-       size_t                  nbytes, offset;
-       gfp_t                   gfp_mask = xb_to_gfp(flags);
-       unsigned short          page_count, i;
-       xfs_off_t               end;
-       int                     error;
-
-       /*
-        * for buffers that are contained within a single page, just allocate
-        * the memory from the heap - there's no need for the complexity of
-        * page arrays to keep allocation down to order 0.
-        */
-       if (bp->b_buffer_length < PAGE_SIZE) {
-               bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags));
-               if (!bp->b_addr) {
-                       /* low memory - use alloc_page loop instead */
-                       goto use_alloc_page;
-               }
-
-               if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) &
-                                                               PAGE_MASK) !=
-                   ((unsigned long)bp->b_addr & PAGE_MASK)) {
-                       /* b_addr spans two pages - use alloc_page instead */
-                       kmem_free(bp->b_addr);
-                       bp->b_addr = NULL;
-                       goto use_alloc_page;
-               }
-               bp->b_offset = offset_in_page(bp->b_addr);
-               bp->b_pages = bp->b_page_array;
-               bp->b_pages[0] = virt_to_page(bp->b_addr);
-               bp->b_page_count = 1;
-               bp->b_flags |= XBF_MAPPED | _XBF_KMEM;
-               return 0;
-       }
-
-use_alloc_page:
-       end = bp->b_file_offset + bp->b_buffer_length;
-       page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
-       error = _xfs_buf_get_pages(bp, page_count, flags);
-       if (unlikely(error))
-               return error;
-
-       offset = bp->b_offset;
-       bp->b_flags |= _XBF_PAGES;
-
-       for (i = 0; i < bp->b_page_count; i++) {
-               struct page     *page;
-               uint            retries = 0;
-retry:
-               page = alloc_page(gfp_mask);
-               if (unlikely(page == NULL)) {
-                       if (flags & XBF_READ_AHEAD) {
-                               bp->b_page_count = i;
-                               error = ENOMEM;
-                               goto out_free_pages;
-                       }
-
-                       /*
-                        * This could deadlock.
-                        *
-                        * But until all the XFS lowlevel code is revamped to
-                        * handle buffer allocation failures we can't do much.
-                        */
-                       if (!(++retries % 100))
-                               xfs_err(NULL,
-               "possible memory allocation deadlock in %s (mode:0x%x)",
-                                       __func__, gfp_mask);
-
-                       XFS_STATS_INC(xb_page_retries);
-                       congestion_wait(BLK_RW_ASYNC, HZ/50);
-                       goto retry;
-               }
-
-               XFS_STATS_INC(xb_page_found);
-
-               nbytes = min_t(size_t, size, PAGE_SIZE - offset);
-               size -= nbytes;
-               bp->b_pages[i] = page;
-               offset = 0;
-       }
-       return 0;
-
-out_free_pages:
-       for (i = 0; i < bp->b_page_count; i++)
-               __free_page(bp->b_pages[i]);
-       return error;
-}
-
-/*
- *     Map buffer into kernel address-space if necessary.
- */
-STATIC int
-_xfs_buf_map_pages(
-       xfs_buf_t               *bp,
-       uint                    flags)
-{
-       ASSERT(bp->b_flags & _XBF_PAGES);
-       if (bp->b_page_count == 1) {
-               /* A single page buffer is always mappable */
-               bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
-               bp->b_flags |= XBF_MAPPED;
-       } else if (flags & XBF_MAPPED) {
-               int retried = 0;
-
-               do {
-                       bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
-                                               -1, PAGE_KERNEL);
-                       if (bp->b_addr)
-                               break;
-                       vm_unmap_aliases();
-               } while (retried++ <= 1);
-
-               if (!bp->b_addr)
-                       return -ENOMEM;
-               bp->b_addr += bp->b_offset;
-               bp->b_flags |= XBF_MAPPED;
-       }
-
-       return 0;
-}
-
-/*
- *     Finding and Reading Buffers
- */
-
-/*
- *     Look up, and creates if absent, a lockable buffer for
- *     a given range of an inode.  The buffer is returned
- *     locked.  If other overlapping buffers exist, they are
- *     released before the new buffer is created and locked,
- *     which may imply that this call will block until those buffers
- *     are unlocked.  No I/O is implied by this call.
- */
-xfs_buf_t *
-_xfs_buf_find(
-       xfs_buftarg_t           *btp,   /* block device target          */
-       xfs_off_t               ioff,   /* starting offset of range     */
-       size_t                  isize,  /* length of range              */
-       xfs_buf_flags_t         flags,
-       xfs_buf_t               *new_bp)
-{
-       xfs_off_t               range_base;
-       size_t                  range_length;
-       struct xfs_perag        *pag;
-       struct rb_node          **rbp;
-       struct rb_node          *parent;
-       xfs_buf_t               *bp;
-
-       range_base = (ioff << BBSHIFT);
-       range_length = (isize << BBSHIFT);
-
-       /* Check for IOs smaller than the sector size / not sector aligned */
-       ASSERT(!(range_length < (1 << btp->bt_sshift)));
-       ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
-
-       /* get tree root */
-       pag = xfs_perag_get(btp->bt_mount,
-                               xfs_daddr_to_agno(btp->bt_mount, ioff));
-
-       /* walk tree */
-       spin_lock(&pag->pag_buf_lock);
-       rbp = &pag->pag_buf_tree.rb_node;
-       parent = NULL;
-       bp = NULL;
-       while (*rbp) {
-               parent = *rbp;
-               bp = rb_entry(parent, struct xfs_buf, b_rbnode);
-
-               if (range_base < bp->b_file_offset)
-                       rbp = &(*rbp)->rb_left;
-               else if (range_base > bp->b_file_offset)
-                       rbp = &(*rbp)->rb_right;
-               else {
-                       /*
-                        * found a block offset match. If the range doesn't
-                        * match, the only way this is allowed is if the buffer
-                        * in the cache is stale and the transaction that made
-                        * it stale has not yet committed. i.e. we are
-                        * reallocating a busy extent. Skip this buffer and
-                        * continue searching to the right for an exact match.
-                        */
-                       if (bp->b_buffer_length != range_length) {
-                               ASSERT(bp->b_flags & XBF_STALE);
-                               rbp = &(*rbp)->rb_right;
-                               continue;
-                       }
-                       atomic_inc(&bp->b_hold);
-                       goto found;
-               }
-       }
-
-       /* No match found */
-       if (new_bp) {
-               _xfs_buf_initialize(new_bp, btp, range_base,
-                               range_length, flags);
-               rb_link_node(&new_bp->b_rbnode, parent, rbp);
-               rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
-               /* the buffer keeps the perag reference until it is freed */
-               new_bp->b_pag = pag;
-               spin_unlock(&pag->pag_buf_lock);
-       } else {
-               XFS_STATS_INC(xb_miss_locked);
-               spin_unlock(&pag->pag_buf_lock);
-               xfs_perag_put(pag);
-       }
-       return new_bp;
-
-found:
-       spin_unlock(&pag->pag_buf_lock);
-       xfs_perag_put(pag);
-
-       if (!xfs_buf_trylock(bp)) {
-               if (flags & XBF_TRYLOCK) {
-                       xfs_buf_rele(bp);
-                       XFS_STATS_INC(xb_busy_locked);
-                       return NULL;
-               }
-               xfs_buf_lock(bp);
-               XFS_STATS_INC(xb_get_locked_waited);
-       }
-
-       /*
-        * if the buffer is stale, clear all the external state associated with
-        * it. We need to keep flags such as how we allocated the buffer memory
-        * intact here.
-        */
-       if (bp->b_flags & XBF_STALE) {
-               ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
-               bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES;
-       }
-
-       trace_xfs_buf_find(bp, flags, _RET_IP_);
-       XFS_STATS_INC(xb_get_locked);
-       return bp;
-}
-
-/*
- *     Assembles a buffer covering the specified range.
- *     Storage in memory for all portions of the buffer will be allocated,
- *     although backing storage may not be.
- */
-xfs_buf_t *
-xfs_buf_get(
-       xfs_buftarg_t           *target,/* target for buffer            */
-       xfs_off_t               ioff,   /* starting offset of range     */
-       size_t                  isize,  /* length of range              */
-       xfs_buf_flags_t         flags)
-{
-       xfs_buf_t               *bp, *new_bp;
-       int                     error = 0;
-
-       new_bp = xfs_buf_allocate(flags);
-       if (unlikely(!new_bp))
-               return NULL;
-
-       bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
-       if (bp == new_bp) {
-               error = xfs_buf_allocate_memory(bp, flags);
-               if (error)
-                       goto no_buffer;
-       } else {
-               xfs_buf_deallocate(new_bp);
-               if (unlikely(bp == NULL))
-                       return NULL;
-       }
-
-       if (!(bp->b_flags & XBF_MAPPED)) {
-               error = _xfs_buf_map_pages(bp, flags);
-               if (unlikely(error)) {
-                       xfs_warn(target->bt_mount,
-                               "%s: failed to map pages\n", __func__);
-                       goto no_buffer;
-               }
-       }
-
-       XFS_STATS_INC(xb_get);
-
-       /*
-        * Always fill in the block number now, the mapped cases can do
-        * their own overlay of this later.
-        */
-       bp->b_bn = ioff;
-       bp->b_count_desired = bp->b_buffer_length;
-
-       trace_xfs_buf_get(bp, flags, _RET_IP_);
-       return bp;
-
- no_buffer:
-       if (flags & (XBF_LOCK | XBF_TRYLOCK))
-               xfs_buf_unlock(bp);
-       xfs_buf_rele(bp);
-       return NULL;
-}
-
-STATIC int
-_xfs_buf_read(
-       xfs_buf_t               *bp,
-       xfs_buf_flags_t         flags)
-{
-       int                     status;
-
-       ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
-       ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
-
-       bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD);
-       bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
-
-       status = xfs_buf_iorequest(bp);
-       if (status || XFS_BUF_ISERROR(bp) || (flags & XBF_ASYNC))
-               return status;
-       return xfs_buf_iowait(bp);
-}
-
-xfs_buf_t *
-xfs_buf_read(
-       xfs_buftarg_t           *target,
-       xfs_off_t               ioff,
-       size_t                  isize,
-       xfs_buf_flags_t         flags)
-{
-       xfs_buf_t               *bp;
-
-       flags |= XBF_READ;
-
-       bp = xfs_buf_get(target, ioff, isize, flags);
-       if (bp) {
-               trace_xfs_buf_read(bp, flags, _RET_IP_);
-
-               if (!XFS_BUF_ISDONE(bp)) {
-                       XFS_STATS_INC(xb_get_read);
-                       _xfs_buf_read(bp, flags);
-               } else if (flags & XBF_ASYNC) {
-                       /*
-                        * Read ahead call which is already satisfied,
-                        * drop the buffer
-                        */
-                       goto no_buffer;
-               } else {
-                       /* We do not want read in the flags */
-                       bp->b_flags &= ~XBF_READ;
-               }
-       }
-
-       return bp;
-
- no_buffer:
-       if (flags & (XBF_LOCK | XBF_TRYLOCK))
-               xfs_buf_unlock(bp);
-       xfs_buf_rele(bp);
-       return NULL;
-}
-
-/*
- *     If we are not low on memory then do the readahead in a deadlock
- *     safe manner.
- */
-void
-xfs_buf_readahead(
-       xfs_buftarg_t           *target,
-       xfs_off_t               ioff,
-       size_t                  isize)
-{
-       if (bdi_read_congested(target->bt_bdi))
-               return;
-
-       xfs_buf_read(target, ioff, isize,
-                    XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK);
-}
-
-/*
- * Read an uncached buffer from disk. Allocates and returns a locked
- * buffer containing the disk contents or nothing.
- */
-struct xfs_buf *
-xfs_buf_read_uncached(
-       struct xfs_mount        *mp,
-       struct xfs_buftarg      *target,
-       xfs_daddr_t             daddr,
-       size_t                  length,
-       int                     flags)
-{
-       xfs_buf_t               *bp;
-       int                     error;
-
-       bp = xfs_buf_get_uncached(target, length, flags);
-       if (!bp)
-               return NULL;
-
-       /* set up the buffer for a read IO */
-       XFS_BUF_SET_ADDR(bp, daddr);
-       XFS_BUF_READ(bp);
-       XFS_BUF_BUSY(bp);
-
-       xfsbdstrat(mp, bp);
-       error = xfs_buf_iowait(bp);
-       if (error || bp->b_error) {
-               xfs_buf_relse(bp);
-               return NULL;
-       }
-       return bp;
-}
-
-xfs_buf_t *
-xfs_buf_get_empty(
-       size_t                  len,
-       xfs_buftarg_t           *target)
-{
-       xfs_buf_t               *bp;
-
-       bp = xfs_buf_allocate(0);
-       if (bp)
-               _xfs_buf_initialize(bp, target, 0, len, 0);
-       return bp;
-}
-
-/*
- * Return a buffer allocated as an empty buffer and associated to external
- * memory via xfs_buf_associate_memory() back to it's empty state.
- */
-void
-xfs_buf_set_empty(
-       struct xfs_buf          *bp,
-       size_t                  len)
-{
-       if (bp->b_pages)
-               _xfs_buf_free_pages(bp);
-
-       bp->b_pages = NULL;
-       bp->b_page_count = 0;
-       bp->b_addr = NULL;
-       bp->b_file_offset = 0;
-       bp->b_buffer_length = bp->b_count_desired = len;
-       bp->b_bn = XFS_BUF_DADDR_NULL;
-       bp->b_flags &= ~XBF_MAPPED;
-}
-
-static inline struct page *
-mem_to_page(
-       void                    *addr)
-{
-       if ((!is_vmalloc_addr(addr))) {
-               return virt_to_page(addr);
-       } else {
-               return vmalloc_to_page(addr);
-       }
-}
-
-int
-xfs_buf_associate_memory(
-       xfs_buf_t               *bp,
-       void                    *mem,
-       size_t                  len)
-{
-       int                     rval;
-       int                     i = 0;
-       unsigned long           pageaddr;
-       unsigned long           offset;
-       size_t                  buflen;
-       int                     page_count;
-
-       pageaddr = (unsigned long)mem & PAGE_MASK;
-       offset = (unsigned long)mem - pageaddr;
-       buflen = PAGE_ALIGN(len + offset);
-       page_count = buflen >> PAGE_SHIFT;
-
-       /* Free any previous set of page pointers */
-       if (bp->b_pages)
-               _xfs_buf_free_pages(bp);
-
-       bp->b_pages = NULL;
-       bp->b_addr = mem;
-
-       rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK);
-       if (rval)
-               return rval;
-
-       bp->b_offset = offset;
-
-       for (i = 0; i < bp->b_page_count; i++) {
-               bp->b_pages[i] = mem_to_page((void *)pageaddr);
-               pageaddr += PAGE_SIZE;
-       }
-
-       bp->b_count_desired = len;
-       bp->b_buffer_length = buflen;
-       bp->b_flags |= XBF_MAPPED;
-
-       return 0;
-}
-
-xfs_buf_t *
-xfs_buf_get_uncached(
-       struct xfs_buftarg      *target,
-       size_t                  len,
-       int                     flags)
-{
-       unsigned long           page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
-       int                     error, i;
-       xfs_buf_t               *bp;
-
-       bp = xfs_buf_allocate(0);
-       if (unlikely(bp == NULL))
-               goto fail;
-       _xfs_buf_initialize(bp, target, 0, len, 0);
-
-       error = _xfs_buf_get_pages(bp, page_count, 0);
-       if (error)
-               goto fail_free_buf;
-
-       for (i = 0; i < page_count; i++) {
-               bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
-               if (!bp->b_pages[i])
-                       goto fail_free_mem;
-       }
-       bp->b_flags |= _XBF_PAGES;
-
-       error = _xfs_buf_map_pages(bp, XBF_MAPPED);
-       if (unlikely(error)) {
-               xfs_warn(target->bt_mount,
-                       "%s: failed to map pages\n", __func__);
-               goto fail_free_mem;
-       }
-
-       trace_xfs_buf_get_uncached(bp, _RET_IP_);
-       return bp;
-
- fail_free_mem:
-       while (--i >= 0)
-               __free_page(bp->b_pages[i]);
-       _xfs_buf_free_pages(bp);
- fail_free_buf:
-       xfs_buf_deallocate(bp);
- fail:
-       return NULL;
-}
-
-/*
- *     Increment reference count on buffer, to hold the buffer concurrently
- *     with another thread which may release (free) the buffer asynchronously.
- *     Must hold the buffer already to call this function.
- */
-void
-xfs_buf_hold(
-       xfs_buf_t               *bp)
-{
-       trace_xfs_buf_hold(bp, _RET_IP_);
-       atomic_inc(&bp->b_hold);
-}
-
-/*
- *     Releases a hold on the specified buffer.  If the
- *     the hold count is 1, calls xfs_buf_free.
- */
-void
-xfs_buf_rele(
-       xfs_buf_t               *bp)
-{
-       struct xfs_perag        *pag = bp->b_pag;
-
-       trace_xfs_buf_rele(bp, _RET_IP_);
-
-       if (!pag) {
-               ASSERT(list_empty(&bp->b_lru));
-               ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
-               if (atomic_dec_and_test(&bp->b_hold))
-                       xfs_buf_free(bp);
-               return;
-       }
-
-       ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
-
-       ASSERT(atomic_read(&bp->b_hold) > 0);
-       if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
-               if (!(bp->b_flags & XBF_STALE) &&
-                          atomic_read(&bp->b_lru_ref)) {
-                       xfs_buf_lru_add(bp);
-                       spin_unlock(&pag->pag_buf_lock);
-               } else {
-                       xfs_buf_lru_del(bp);
-                       ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
-                       rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
-                       spin_unlock(&pag->pag_buf_lock);
-                       xfs_perag_put(pag);
-                       xfs_buf_free(bp);
-               }
-       }
-}
-
-
-/*
- *     Lock a buffer object, if it is not already locked.
- *
- *     If we come across a stale, pinned, locked buffer, we know that we are
- *     being asked to lock a buffer that has been reallocated. Because it is
- *     pinned, we know that the log has not been pushed to disk and hence it
- *     will still be locked.  Rather than continuing to have trylock attempts
- *     fail until someone else pushes the log, push it ourselves before
- *     returning.  This means that the xfsaild will not get stuck trying
- *     to push on stale inode buffers.
- */
-int
-xfs_buf_trylock(
-       struct xfs_buf          *bp)
-{
-       int                     locked;
-
-       locked = down_trylock(&bp->b_sema) == 0;
-       if (locked)
-               XB_SET_OWNER(bp);
-       else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
-               xfs_log_force(bp->b_target->bt_mount, 0);
-
-       trace_xfs_buf_trylock(bp, _RET_IP_);
-       return locked;
-}
-
-/*
- *     Lock a buffer object.
- *
- *     If we come across a stale, pinned, locked buffer, we know that we
- *     are being asked to lock a buffer that has been reallocated. Because
- *     it is pinned, we know that the log has not been pushed to disk and
- *     hence it will still be locked. Rather than sleeping until someone
- *     else pushes the log, push it ourselves before trying to get the lock.
- */
-void
-xfs_buf_lock(
-       struct xfs_buf          *bp)
-{
-       trace_xfs_buf_lock(bp, _RET_IP_);
-
-       if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
-               xfs_log_force(bp->b_target->bt_mount, 0);
-       down(&bp->b_sema);
-       XB_SET_OWNER(bp);
-
-       trace_xfs_buf_lock_done(bp, _RET_IP_);
-}
-
-/*
- *     Releases the lock on the buffer object.
- *     If the buffer is marked delwri but is not queued, do so before we
- *     unlock the buffer as we need to set flags correctly.  We also need to
- *     take a reference for the delwri queue because the unlocker is going to
- *     drop their's and they don't know we just queued it.
- */
-void
-xfs_buf_unlock(
-       struct xfs_buf          *bp)
-{
-       if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
-               atomic_inc(&bp->b_hold);
-               bp->b_flags |= XBF_ASYNC;
-               xfs_buf_delwri_queue(bp, 0);
-       }
-
-       XB_CLEAR_OWNER(bp);
-       up(&bp->b_sema);
-
-       trace_xfs_buf_unlock(bp, _RET_IP_);
-}
-
-STATIC void
-xfs_buf_wait_unpin(
-       xfs_buf_t               *bp)
-{
-       DECLARE_WAITQUEUE       (wait, current);
-
-       if (atomic_read(&bp->b_pin_count) == 0)
-               return;
-
-       add_wait_queue(&bp->b_waiters, &wait);
-       for (;;) {
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               if (atomic_read(&bp->b_pin_count) == 0)
-                       break;
-               io_schedule();
-       }
-       remove_wait_queue(&bp->b_waiters, &wait);
-       set_current_state(TASK_RUNNING);
-}
-
-/*
- *     Buffer Utility Routines
- */
-
-STATIC void
-xfs_buf_iodone_work(
-       struct work_struct      *work)
-{
-       xfs_buf_t               *bp =
-               container_of(work, xfs_buf_t, b_iodone_work);
-
-       if (bp->b_iodone)
-               (*(bp->b_iodone))(bp);
-       else if (bp->b_flags & XBF_ASYNC)
-               xfs_buf_relse(bp);
-}
-
-void
-xfs_buf_ioend(
-       xfs_buf_t               *bp,
-       int                     schedule)
-{
-       trace_xfs_buf_iodone(bp, _RET_IP_);
-
-       bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
-       if (bp->b_error == 0)
-               bp->b_flags |= XBF_DONE;
-
-       if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
-               if (schedule) {
-                       INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work);
-                       queue_work(xfslogd_workqueue, &bp->b_iodone_work);
-               } else {
-                       xfs_buf_iodone_work(&bp->b_iodone_work);
-               }
-       } else {
-               complete(&bp->b_iowait);
-       }
-}
-
-void
-xfs_buf_ioerror(
-       xfs_buf_t               *bp,
-       int                     error)
-{
-       ASSERT(error >= 0 && error <= 0xffff);
-       bp->b_error = (unsigned short)error;
-       trace_xfs_buf_ioerror(bp, error, _RET_IP_);
-}
-
-int
-xfs_bwrite(
-       struct xfs_mount        *mp,
-       struct xfs_buf          *bp)
-{
-       int                     error;
-
-       bp->b_flags |= XBF_WRITE;
-       bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
-
-       xfs_buf_delwri_dequeue(bp);
-       xfs_bdstrat_cb(bp);
-
-       error = xfs_buf_iowait(bp);
-       if (error)
-               xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
-       xfs_buf_relse(bp);
-       return error;
-}
-
-void
-xfs_bdwrite(
-       void                    *mp,
-       struct xfs_buf          *bp)
-{
-       trace_xfs_buf_bdwrite(bp, _RET_IP_);
-
-       bp->b_flags &= ~XBF_READ;
-       bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
-
-       xfs_buf_delwri_queue(bp, 1);
-}
-
-/*
- * Called when we want to stop a buffer from getting written or read.
- * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
- * so that the proper iodone callbacks get called.
- */
-STATIC int
-xfs_bioerror(
-       xfs_buf_t *bp)
-{
-#ifdef XFSERRORDEBUG
-       ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
-#endif
-
-       /*
-        * No need to wait until the buffer is unpinned, we aren't flushing it.
-        */
-       XFS_BUF_ERROR(bp, EIO);
-
-       /*
-        * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
-        */
-       XFS_BUF_UNREAD(bp);
-       XFS_BUF_UNDELAYWRITE(bp);
-       XFS_BUF_UNDONE(bp);
-       XFS_BUF_STALE(bp);
-
-       xfs_buf_ioend(bp, 0);
-
-       return EIO;
-}
-
-/*
- * Same as xfs_bioerror, except that we are releasing the buffer
- * here ourselves, and avoiding the xfs_buf_ioend call.
- * This is meant for userdata errors; metadata bufs come with
- * iodone functions attached, so that we can track down errors.
- */
-STATIC int
-xfs_bioerror_relse(
-       struct xfs_buf  *bp)
-{
-       int64_t         fl = XFS_BUF_BFLAGS(bp);
-       /*
-        * No need to wait until the buffer is unpinned.
-        * We aren't flushing it.
-        *
-        * chunkhold expects B_DONE to be set, whether
-        * we actually finish the I/O or not. We don't want to
-        * change that interface.
-        */
-       XFS_BUF_UNREAD(bp);
-       XFS_BUF_UNDELAYWRITE(bp);
-       XFS_BUF_DONE(bp);
-       XFS_BUF_STALE(bp);
-       bp->b_iodone = NULL;
-       if (!(fl & XBF_ASYNC)) {
-               /*
-                * Mark b_error and B_ERROR _both_.
-                * Lot's of chunkcache code assumes that.
-                * There's no reason to mark error for
-                * ASYNC buffers.
-                */
-               XFS_BUF_ERROR(bp, EIO);
-               XFS_BUF_FINISH_IOWAIT(bp);
-       } else {
-               xfs_buf_relse(bp);
-       }
-
-       return EIO;
-}
-
-
-/*
- * All xfs metadata buffers except log state machine buffers
- * get this attached as their b_bdstrat callback function.
- * This is so that we can catch a buffer
- * after prematurely unpinning it to forcibly shutdown the filesystem.
- */
-int
-xfs_bdstrat_cb(
-       struct xfs_buf  *bp)
-{
-       if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
-               trace_xfs_bdstrat_shut(bp, _RET_IP_);
-               /*
-                * Metadata write that didn't get logged but
-                * written delayed anyway. These aren't associated
-                * with a transaction, and can be ignored.
-                */
-               if (!bp->b_iodone && !XFS_BUF_ISREAD(bp))
-                       return xfs_bioerror_relse(bp);
-               else
-                       return xfs_bioerror(bp);
-       }
-
-       xfs_buf_iorequest(bp);
-       return 0;
-}
-
-/*
- * Wrapper around bdstrat so that we can stop data from going to disk in case
- * we are shutting down the filesystem.  Typically user data goes thru this
- * path; one of the exceptions is the superblock.
- */
-void
-xfsbdstrat(
-       struct xfs_mount        *mp,
-       struct xfs_buf          *bp)
-{
-       if (XFS_FORCED_SHUTDOWN(mp)) {
-               trace_xfs_bdstrat_shut(bp, _RET_IP_);
-               xfs_bioerror_relse(bp);
-               return;
-       }
-
-       xfs_buf_iorequest(bp);
-}
-
-STATIC void
-_xfs_buf_ioend(
-       xfs_buf_t               *bp,
-       int                     schedule)
-{
-       if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
-               xfs_buf_ioend(bp, schedule);
-}
-
-STATIC void
-xfs_buf_bio_end_io(
-       struct bio              *bio,
-       int                     error)
-{
-       xfs_buf_t               *bp = (xfs_buf_t *)bio->bi_private;
-
-       xfs_buf_ioerror(bp, -error);
-
-       if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
-               invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
-
-       _xfs_buf_ioend(bp, 1);
-       bio_put(bio);
-}
-
-STATIC void
-_xfs_buf_ioapply(
-       xfs_buf_t               *bp)
-{
-       int                     rw, map_i, total_nr_pages, nr_pages;
-       struct bio              *bio;
-       int                     offset = bp->b_offset;
-       int                     size = bp->b_count_desired;
-       sector_t                sector = bp->b_bn;
-
-       total_nr_pages = bp->b_page_count;
-       map_i = 0;
-
-       if (bp->b_flags & XBF_WRITE) {
-               if (bp->b_flags & XBF_SYNCIO)
-                       rw = WRITE_SYNC;
-               else
-                       rw = WRITE;
-               if (bp->b_flags & XBF_FUA)
-                       rw |= REQ_FUA;
-               if (bp->b_flags & XBF_FLUSH)
-                       rw |= REQ_FLUSH;
-       } else if (bp->b_flags & XBF_READ_AHEAD) {
-               rw = READA;
-       } else {
-               rw = READ;
-       }
-
-       /* we only use the buffer cache for meta-data */
-       rw |= REQ_META;
-
-next_chunk:
-       atomic_inc(&bp->b_io_remaining);
-       nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
-       if (nr_pages > total_nr_pages)
-               nr_pages = total_nr_pages;
-
-       bio = bio_alloc(GFP_NOIO, nr_pages);
-       bio->bi_bdev = bp->b_target->bt_bdev;
-       bio->bi_sector = sector;
-       bio->bi_end_io = xfs_buf_bio_end_io;
-       bio->bi_private = bp;
-
-
-       for (; size && nr_pages; nr_pages--, map_i++) {
-               int     rbytes, nbytes = PAGE_SIZE - offset;
-
-               if (nbytes > size)
-                       nbytes = size;
-
-               rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset);
-               if (rbytes < nbytes)
-                       break;
-
-               offset = 0;
-               sector += nbytes >> BBSHIFT;
-               size -= nbytes;
-               total_nr_pages--;
-       }
-
-       if (likely(bio->bi_size)) {
-               if (xfs_buf_is_vmapped(bp)) {
-                       flush_kernel_vmap_range(bp->b_addr,
-                                               xfs_buf_vmap_len(bp));
-               }
-               submit_bio(rw, bio);
-               if (size)
-                       goto next_chunk;
-       } else {
-               xfs_buf_ioerror(bp, EIO);
-               bio_put(bio);
-       }
-}
-
-int
-xfs_buf_iorequest(
-       xfs_buf_t               *bp)
-{
-       trace_xfs_buf_iorequest(bp, _RET_IP_);
-
-       if (bp->b_flags & XBF_DELWRI) {
-               xfs_buf_delwri_queue(bp, 1);
-               return 0;
-       }
-
-       if (bp->b_flags & XBF_WRITE) {
-               xfs_buf_wait_unpin(bp);
-       }
-
-       xfs_buf_hold(bp);
-
-       /* Set the count to 1 initially, this will stop an I/O
-        * completion callout which happens before we have started
-        * all the I/O from calling xfs_buf_ioend too early.
-        */
-       atomic_set(&bp->b_io_remaining, 1);
-       _xfs_buf_ioapply(bp);
-       _xfs_buf_ioend(bp, 0);
-
-       xfs_buf_rele(bp);
-       return 0;
-}
-
-/*
- *     Waits for I/O to complete on the buffer supplied.
- *     It returns immediately if no I/O is pending.
- *     It returns the I/O error code, if any, or 0 if there was no error.
- */
-int
-xfs_buf_iowait(
-       xfs_buf_t               *bp)
-{
-       trace_xfs_buf_iowait(bp, _RET_IP_);
-
-       wait_for_completion(&bp->b_iowait);
-
-       trace_xfs_buf_iowait_done(bp, _RET_IP_);
-       return bp->b_error;
-}
-
-xfs_caddr_t
-xfs_buf_offset(
-       xfs_buf_t               *bp,
-       size_t                  offset)
-{
-       struct page             *page;
-
-       if (bp->b_flags & XBF_MAPPED)
-               return XFS_BUF_PTR(bp) + offset;
-
-       offset += bp->b_offset;
-       page = bp->b_pages[offset >> PAGE_SHIFT];
-       return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1));
-}
-
-/*
- *     Move data into or out of a buffer.
- */
-void
-xfs_buf_iomove(
-       xfs_buf_t               *bp,    /* buffer to process            */
-       size_t                  boff,   /* starting buffer offset       */
-       size_t                  bsize,  /* length to copy               */
-       void                    *data,  /* data address                 */
-       xfs_buf_rw_t            mode)   /* read/write/zero flag         */
-{
-       size_t                  bend, cpoff, csize;
-       struct page             *page;
-
-       bend = boff + bsize;
-       while (boff < bend) {
-               page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
-               cpoff = xfs_buf_poff(boff + bp->b_offset);
-               csize = min_t(size_t,
-                             PAGE_SIZE-cpoff, bp->b_count_desired-boff);
-
-               ASSERT(((csize + cpoff) <= PAGE_SIZE));
-
-               switch (mode) {
-               case XBRW_ZERO:
-                       memset(page_address(page) + cpoff, 0, csize);
-                       break;
-               case XBRW_READ:
-                       memcpy(data, page_address(page) + cpoff, csize);
-                       break;
-               case XBRW_WRITE:
-                       memcpy(page_address(page) + cpoff, data, csize);
-               }
-
-               boff += csize;
-               data += csize;
-       }
-}
-
-/*
- *     Handling of buffer targets (buftargs).
- */
-
-/*
- * Wait for any bufs with callbacks that have been submitted but have not yet
- * returned. These buffers will have an elevated hold count, so wait on those
- * while freeing all the buffers only held by the LRU.
- */
-void
-xfs_wait_buftarg(
-       struct xfs_buftarg      *btp)
-{
-       struct xfs_buf          *bp;
-
-restart:
-       spin_lock(&btp->bt_lru_lock);
-       while (!list_empty(&btp->bt_lru)) {
-               bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
-               if (atomic_read(&bp->b_hold) > 1) {
-                       spin_unlock(&btp->bt_lru_lock);
-                       delay(100);
-                       goto restart;
-               }
-               /*
-                * clear the LRU reference count so the bufer doesn't get
-                * ignored in xfs_buf_rele().
-                */
-               atomic_set(&bp->b_lru_ref, 0);
-               spin_unlock(&btp->bt_lru_lock);
-               xfs_buf_rele(bp);
-               spin_lock(&btp->bt_lru_lock);
-       }
-       spin_unlock(&btp->bt_lru_lock);
-}
-
-int
-xfs_buftarg_shrink(
-       struct shrinker         *shrink,
-       struct shrink_control   *sc)
-{
-       struct xfs_buftarg      *btp = container_of(shrink,
-                                       struct xfs_buftarg, bt_shrinker);
-       struct xfs_buf          *bp;
-       int nr_to_scan = sc->nr_to_scan;
-       LIST_HEAD(dispose);
-
-       if (!nr_to_scan)
-               return btp->bt_lru_nr;
-
-       spin_lock(&btp->bt_lru_lock);
-       while (!list_empty(&btp->bt_lru)) {
-               if (nr_to_scan-- <= 0)
-                       break;
-
-               bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
-
-               /*
-                * Decrement the b_lru_ref count unless the value is already
-                * zero. If the value is already zero, we need to reclaim the
-                * buffer, otherwise it gets another trip through the LRU.
-                */
-               if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
-                       list_move_tail(&bp->b_lru, &btp->bt_lru);
-                       continue;
-               }
-
-               /*
-                * remove the buffer from the LRU now to avoid needing another
-                * lock round trip inside xfs_buf_rele().
-                */
-               list_move(&bp->b_lru, &dispose);
-               btp->bt_lru_nr--;
-       }
-       spin_unlock(&btp->bt_lru_lock);
-
-       while (!list_empty(&dispose)) {
-               bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
-               list_del_init(&bp->b_lru);
-               xfs_buf_rele(bp);
-       }
-
-       return btp->bt_lru_nr;
-}
-
-void
-xfs_free_buftarg(
-       struct xfs_mount        *mp,
-       struct xfs_buftarg      *btp)
-{
-       unregister_shrinker(&btp->bt_shrinker);
-
-       xfs_flush_buftarg(btp, 1);
-       if (mp->m_flags & XFS_MOUNT_BARRIER)
-               xfs_blkdev_issue_flush(btp);
-
-       kthread_stop(btp->bt_task);
-       kmem_free(btp);
-}
-
-STATIC int
-xfs_setsize_buftarg_flags(
-       xfs_buftarg_t           *btp,
-       unsigned int            blocksize,
-       unsigned int            sectorsize,
-       int                     verbose)
-{
-       btp->bt_bsize = blocksize;
-       btp->bt_sshift = ffs(sectorsize) - 1;
-       btp->bt_smask = sectorsize - 1;
-
-       if (set_blocksize(btp->bt_bdev, sectorsize)) {
-               xfs_warn(btp->bt_mount,
-                       "Cannot set_blocksize to %u on device %s\n",
-                       sectorsize, XFS_BUFTARG_NAME(btp));
-               return EINVAL;
-       }
-
-       return 0;
-}
-
-/*
- *     When allocating the initial buffer target we have not yet
- *     read in the superblock, so don't know what sized sectors
- *     are being used is at this early stage.  Play safe.
- */
-STATIC int
-xfs_setsize_buftarg_early(
-       xfs_buftarg_t           *btp,
-       struct block_device     *bdev)
-{
-       return xfs_setsize_buftarg_flags(btp,
-                       PAGE_SIZE, bdev_logical_block_size(bdev), 0);
-}
-
-int
-xfs_setsize_buftarg(
-       xfs_buftarg_t           *btp,
-       unsigned int            blocksize,
-       unsigned int            sectorsize)
-{
-       return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);
-}
-
-STATIC int
-xfs_alloc_delwrite_queue(
-       xfs_buftarg_t           *btp,
-       const char              *fsname)
-{
-       INIT_LIST_HEAD(&btp->bt_delwrite_queue);
-       spin_lock_init(&btp->bt_delwrite_lock);
-       btp->bt_flags = 0;
-       btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
-       if (IS_ERR(btp->bt_task))
-               return PTR_ERR(btp->bt_task);
-       return 0;
-}
-
-xfs_buftarg_t *
-xfs_alloc_buftarg(
-       struct xfs_mount        *mp,
-       struct block_device     *bdev,
-       int                     external,
-       const char              *fsname)
-{
-       xfs_buftarg_t           *btp;
-
-       btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
-
-       btp->bt_mount = mp;
-       btp->bt_dev =  bdev->bd_dev;
-       btp->bt_bdev = bdev;
-       btp->bt_bdi = blk_get_backing_dev_info(bdev);
-       if (!btp->bt_bdi)
-               goto error;
-
-       INIT_LIST_HEAD(&btp->bt_lru);
-       spin_lock_init(&btp->bt_lru_lock);
-       if (xfs_setsize_buftarg_early(btp, bdev))
-               goto error;
-       if (xfs_alloc_delwrite_queue(btp, fsname))
-               goto error;
-       btp->bt_shrinker.shrink = xfs_buftarg_shrink;
-       btp->bt_shrinker.seeks = DEFAULT_SEEKS;
-       register_shrinker(&btp->bt_shrinker);
-       return btp;
-
-error:
-       kmem_free(btp);
-       return NULL;
-}
-
-
-/*
- *     Delayed write buffer handling
- */
-STATIC void
-xfs_buf_delwri_queue(
-       xfs_buf_t               *bp,
-       int                     unlock)
-{
-       struct list_head        *dwq = &bp->b_target->bt_delwrite_queue;
-       spinlock_t              *dwlk = &bp->b_target->bt_delwrite_lock;
-
-       trace_xfs_buf_delwri_queue(bp, _RET_IP_);
-
-       ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));
-
-       spin_lock(dwlk);
-       /* If already in the queue, dequeue and place at tail */
-       if (!list_empty(&bp->b_list)) {
-               ASSERT(bp->b_flags & _XBF_DELWRI_Q);
-               if (unlock)
-                       atomic_dec(&bp->b_hold);
-               list_del(&bp->b_list);
-       }
-
-       if (list_empty(dwq)) {
-               /* start xfsbufd as it is about to have something to do */
-               wake_up_process(bp->b_target->bt_task);
-       }
-
-       bp->b_flags |= _XBF_DELWRI_Q;
-       list_add_tail(&bp->b_list, dwq);
-       bp->b_queuetime = jiffies;
-       spin_unlock(dwlk);
-
-       if (unlock)
-               xfs_buf_unlock(bp);
-}
-
-void
-xfs_buf_delwri_dequeue(
-       xfs_buf_t               *bp)
-{
-       spinlock_t              *dwlk = &bp->b_target->bt_delwrite_lock;
-       int                     dequeued = 0;
-
-       spin_lock(dwlk);
-       if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) {
-               ASSERT(bp->b_flags & _XBF_DELWRI_Q);
-               list_del_init(&bp->b_list);
-               dequeued = 1;
-       }
-       bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
-       spin_unlock(dwlk);
-
-       if (dequeued)
-               xfs_buf_rele(bp);
-
-       trace_xfs_buf_delwri_dequeue(bp, _RET_IP_);
-}
-
-/*
- * If a delwri buffer needs to be pushed before it has aged out, then promote
- * it to the head of the delwri queue so that it will be flushed on the next
- * xfsbufd run. We do this by resetting the queuetime of the buffer to be older
- * than the age currently needed to flush the buffer. Hence the next time the
- * xfsbufd sees it is guaranteed to be considered old enough to flush.
- */
-void
-xfs_buf_delwri_promote(
-       struct xfs_buf  *bp)
-{
-       struct xfs_buftarg *btp = bp->b_target;
-       long            age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1;
-
-       ASSERT(bp->b_flags & XBF_DELWRI);
-       ASSERT(bp->b_flags & _XBF_DELWRI_Q);
-
-       /*
-        * Check the buffer age before locking the delayed write queue as we
-        * don't need to promote buffers that are already past the flush age.
-        */
-       if (bp->b_queuetime < jiffies - age)
-               return;
-       bp->b_queuetime = jiffies - age;
-       spin_lock(&btp->bt_delwrite_lock);
-       list_move(&bp->b_list, &btp->bt_delwrite_queue);
-       spin_unlock(&btp->bt_delwrite_lock);
-}
-
-STATIC void
-xfs_buf_runall_queues(
-       struct workqueue_struct *queue)
-{
-       flush_workqueue(queue);
-}
-
-/*
- * Move as many buffers as specified to the supplied list
- * idicating if we skipped any buffers to prevent deadlocks.
- */
-STATIC int
-xfs_buf_delwri_split(
-       xfs_buftarg_t   *target,
-       struct list_head *list,
-       unsigned long   age)
-{
-       xfs_buf_t       *bp, *n;
-       struct list_head *dwq = &target->bt_delwrite_queue;
-       spinlock_t      *dwlk = &target->bt_delwrite_lock;
-       int             skipped = 0;
-       int             force;
-
-       force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
-       INIT_LIST_HEAD(list);
-       spin_lock(dwlk);
-       list_for_each_entry_safe(bp, n, dwq, b_list) {
-               ASSERT(bp->b_flags & XBF_DELWRI);
-
-               if (!XFS_BUF_ISPINNED(bp) && xfs_buf_trylock(bp)) {
-                       if (!force &&
-                           time_before(jiffies, bp->b_queuetime + age)) {
-                               xfs_buf_unlock(bp);
-                               break;
-                       }
-
-                       bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q);
-                       bp->b_flags |= XBF_WRITE;
-                       list_move_tail(&bp->b_list, list);
-                       trace_xfs_buf_delwri_split(bp, _RET_IP_);
-               } else
-                       skipped++;
-       }
-       spin_unlock(dwlk);
-
-       return skipped;
-
-}
-
-/*
- * Compare function is more complex than it needs to be because
- * the return value is only 32 bits and we are doing comparisons
- * on 64 bit values
- */
-static int
-xfs_buf_cmp(
-       void            *priv,
-       struct list_head *a,
-       struct list_head *b)
-{
-       struct xfs_buf  *ap = container_of(a, struct xfs_buf, b_list);
-       struct xfs_buf  *bp = container_of(b, struct xfs_buf, b_list);
-       xfs_daddr_t             diff;
-
-       diff = ap->b_bn - bp->b_bn;
-       if (diff < 0)
-               return -1;
-       if (diff > 0)
-               return 1;
-       return 0;
-}
-
-STATIC int
-xfsbufd(
-       void            *data)
-{
-       xfs_buftarg_t   *target = (xfs_buftarg_t *)data;
-
-       current->flags |= PF_MEMALLOC;
-
-       set_freezable();
-
-       do {
-               long    age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
-               long    tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
-               struct list_head tmp;
-               struct blk_plug plug;
-
-               if (unlikely(freezing(current))) {
-                       set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
-                       refrigerator();
-               } else {
-                       clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
-               }
-
-               /* sleep for a long time if there is nothing to do. */
-               if (list_empty(&target->bt_delwrite_queue))
-                       tout = MAX_SCHEDULE_TIMEOUT;
-               schedule_timeout_interruptible(tout);
-
-               xfs_buf_delwri_split(target, &tmp, age);
-               list_sort(NULL, &tmp, xfs_buf_cmp);
-
-               blk_start_plug(&plug);
-               while (!list_empty(&tmp)) {
-                       struct xfs_buf *bp;
-                       bp = list_first_entry(&tmp, struct xfs_buf, b_list);
-                       list_del_init(&bp->b_list);
-                       xfs_bdstrat_cb(bp);
-               }
-               blk_finish_plug(&plug);
-       } while (!kthread_should_stop());
-
-       return 0;
-}
-
-/*
- *     Go through all incore buffers, and release buffers if they belong to
- *     the given device. This is used in filesystem error handling to
- *     preserve the consistency of its metadata.
- */
-int
-xfs_flush_buftarg(
-       xfs_buftarg_t   *target,
-       int             wait)
-{
-       xfs_buf_t       *bp;
-       int             pincount = 0;
-       LIST_HEAD(tmp_list);
-       LIST_HEAD(wait_list);
-       struct blk_plug plug;
-
-       xfs_buf_runall_queues(xfsconvertd_workqueue);
-       xfs_buf_runall_queues(xfsdatad_workqueue);
-       xfs_buf_runall_queues(xfslogd_workqueue);
-
-       set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
-       pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
-
-       /*
-        * Dropped the delayed write list lock, now walk the temporary list.
-        * All I/O is issued async and then if we need to wait for completion
-        * we do that after issuing all the IO.
-        */
-       list_sort(NULL, &tmp_list, xfs_buf_cmp);
-
-       blk_start_plug(&plug);
-       while (!list_empty(&tmp_list)) {
-               bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
-               ASSERT(target == bp->b_target);
-               list_del_init(&bp->b_list);
-               if (wait) {
-                       bp->b_flags &= ~XBF_ASYNC;
-                       list_add(&bp->b_list, &wait_list);
-               }
-               xfs_bdstrat_cb(bp);
-       }
-       blk_finish_plug(&plug);
-
-       if (wait) {
-               /* Wait for IO to complete. */
-               while (!list_empty(&wait_list)) {
-                       bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
-
-                       list_del_init(&bp->b_list);
-                       xfs_buf_iowait(bp);
-                       xfs_buf_relse(bp);
-               }
-       }
-
-       return pincount;
-}
-
-int __init
-xfs_buf_init(void)
-{
-       xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
-                                               KM_ZONE_HWALIGN, NULL);
-       if (!xfs_buf_zone)
-               goto out;
-
-       xfslogd_workqueue = alloc_workqueue("xfslogd",
-                                       WQ_MEM_RECLAIM | WQ_HIGHPRI, 1);
-       if (!xfslogd_workqueue)
-               goto out_free_buf_zone;
-
-       xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
-       if (!xfsdatad_workqueue)
-               goto out_destroy_xfslogd_workqueue;
-
-       xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
-                                               WQ_MEM_RECLAIM, 1);
-       if (!xfsconvertd_workqueue)
-               goto out_destroy_xfsdatad_workqueue;
-
-       return 0;
-
- out_destroy_xfsdatad_workqueue:
-       destroy_workqueue(xfsdatad_workqueue);
- out_destroy_xfslogd_workqueue:
-       destroy_workqueue(xfslogd_workqueue);
- out_free_buf_zone:
-       kmem_zone_destroy(xfs_buf_zone);
- out:
-       return -ENOMEM;
-}
-
-void
-xfs_buf_terminate(void)
-{
-       destroy_workqueue(xfsconvertd_workqueue);
-       destroy_workqueue(xfsdatad_workqueue);
-       destroy_workqueue(xfslogd_workqueue);
-       kmem_zone_destroy(xfs_buf_zone);
-}
-
-#ifdef CONFIG_KDB_MODULES
-struct list_head *
-xfs_get_buftarg_list(void)
-{
-       return &xfs_buftarg_list;
-}
-#endif
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h

deleted file mode 100644 (file)

index 6a83b46..0000000
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ /dev/null
@@ -1,336 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_BUF_H__
-#define __XFS_BUF_H__
-
-#include <linux/list.h>
-#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <asm/system.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/buffer_head.h>
-#include <linux/uio.h>
-
-/*
- *     Base types
- */
-
-#define XFS_BUF_DADDR_NULL     ((xfs_daddr_t) (-1LL))
-
-#define xfs_buf_ctob(pp)       ((pp) * PAGE_CACHE_SIZE)
-#define xfs_buf_btoc(dd)       (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT)
-#define xfs_buf_btoct(dd)      ((dd) >> PAGE_CACHE_SHIFT)
-#define xfs_buf_poff(aa)       ((aa) & ~PAGE_CACHE_MASK)
-
-typedef enum {
-       XBRW_READ = 1,                  /* transfer into target memory */
-       XBRW_WRITE = 2,                 /* transfer from target memory */
-       XBRW_ZERO = 3,                  /* Zero target memory */
-} xfs_buf_rw_t;
-
-#define XBF_READ       (1 << 0) /* buffer intended for reading from device */
-#define XBF_WRITE      (1 << 1) /* buffer intended for writing to device */
-#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */
-#define XBF_MAPPED     (1 << 3) /* buffer mapped (b_addr valid) */
-#define XBF_ASYNC      (1 << 4) /* initiator will not wait for completion */
-#define XBF_DONE       (1 << 5) /* all pages in the buffer uptodate */
-#define XBF_DELWRI     (1 << 6) /* buffer has dirty pages */
-#define XBF_STALE      (1 << 7) /* buffer has been staled, do not find it */
-
-/* I/O hints for the BIO layer */
-#define XBF_SYNCIO     (1 << 10)/* treat this buffer as synchronous I/O */
-#define XBF_FUA                (1 << 11)/* force cache write through mode */
-#define XBF_FLUSH      (1 << 12)/* flush the disk cache before a write */
-
-/* flags used only as arguments to access routines */
-#define XBF_LOCK       (1 << 15)/* lock requested */
-#define XBF_TRYLOCK    (1 << 16)/* lock requested, but do not wait */
-#define XBF_DONT_BLOCK (1 << 17)/* do not block in current thread */
-
-/* flags used only internally */
-#define _XBF_PAGES     (1 << 20)/* backed by refcounted pages */
-#define _XBF_KMEM      (1 << 21)/* backed by heap memory */
-#define _XBF_DELWRI_Q  (1 << 22)/* buffer on delwri queue */
-
-typedef unsigned int xfs_buf_flags_t;
-
-#define XFS_BUF_FLAGS \
-       { XBF_READ,             "READ" }, \
-       { XBF_WRITE,            "WRITE" }, \
-       { XBF_READ_AHEAD,       "READ_AHEAD" }, \
-       { XBF_MAPPED,           "MAPPED" }, \
-       { XBF_ASYNC,            "ASYNC" }, \
-       { XBF_DONE,             "DONE" }, \
-       { XBF_DELWRI,           "DELWRI" }, \
-       { XBF_STALE,            "STALE" }, \
-       { XBF_SYNCIO,           "SYNCIO" }, \
-       { XBF_FUA,              "FUA" }, \
-       { XBF_FLUSH,            "FLUSH" }, \
-       { XBF_LOCK,             "LOCK" },       /* should never be set */\
-       { XBF_TRYLOCK,          "TRYLOCK" },    /* ditto */\
-       { XBF_DONT_BLOCK,       "DONT_BLOCK" }, /* ditto */\
-       { _XBF_PAGES,           "PAGES" }, \
-       { _XBF_KMEM,            "KMEM" }, \
-       { _XBF_DELWRI_Q,        "DELWRI_Q" }
-
-typedef enum {
-       XBT_FORCE_SLEEP = 0,
-       XBT_FORCE_FLUSH = 1,
-} xfs_buftarg_flags_t;
-
-typedef struct xfs_buftarg {
-       dev_t                   bt_dev;
-       struct block_device     *bt_bdev;
-       struct backing_dev_info *bt_bdi;
-       struct xfs_mount        *bt_mount;
-       unsigned int            bt_bsize;
-       unsigned int            bt_sshift;
-       size_t                  bt_smask;
-
-       /* per device delwri queue */
-       struct task_struct      *bt_task;
-       struct list_head        bt_delwrite_queue;
-       spinlock_t              bt_delwrite_lock;
-       unsigned long           bt_flags;
-
-       /* LRU control structures */
-       struct shrinker         bt_shrinker;
-       struct list_head        bt_lru;
-       spinlock_t              bt_lru_lock;
-       unsigned int            bt_lru_nr;
-} xfs_buftarg_t;
-
-struct xfs_buf;
-typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
-
-#define XB_PAGES       2
-
-typedef struct xfs_buf {
-       /*
-        * first cacheline holds all the fields needed for an uncontended cache
-        * hit to be fully processed. The semaphore straddles the cacheline
-        * boundary, but the counter and lock sits on the first cacheline,
-        * which is the only bit that is touched if we hit the semaphore
-        * fast-path on locking.
-        */
-       struct rb_node          b_rbnode;       /* rbtree node */
-       xfs_off_t               b_file_offset;  /* offset in file */
-       size_t                  b_buffer_length;/* size of buffer in bytes */
-       atomic_t                b_hold;         /* reference count */
-       atomic_t                b_lru_ref;      /* lru reclaim ref count */
-       xfs_buf_flags_t         b_flags;        /* status flags */
-       struct semaphore        b_sema;         /* semaphore for lockables */
-
-       struct list_head        b_lru;          /* lru list */
-       wait_queue_head_t       b_waiters;      /* unpin waiters */
-       struct list_head        b_list;
-       struct xfs_perag        *b_pag;         /* contains rbtree root */
-       xfs_buftarg_t           *b_target;      /* buffer target (device) */
-       xfs_daddr_t             b_bn;           /* block number for I/O */
-       size_t                  b_count_desired;/* desired transfer size */
-       void                    *b_addr;        /* virtual address of buffer */
-       struct work_struct      b_iodone_work;
-       xfs_buf_iodone_t        b_iodone;       /* I/O completion function */
-       struct completion       b_iowait;       /* queue for I/O waiters */
-       void                    *b_fspriv;
-       struct xfs_trans        *b_transp;
-       struct page             **b_pages;      /* array of page pointers */
-       struct page             *b_page_array[XB_PAGES]; /* inline pages */
-       unsigned long           b_queuetime;    /* time buffer was queued */
-       atomic_t                b_pin_count;    /* pin count */
-       atomic_t                b_io_remaining; /* #outstanding I/O requests */
-       unsigned int            b_page_count;   /* size of page array */
-       unsigned int            b_offset;       /* page offset in first page */
-       unsigned short          b_error;        /* error code on I/O */
-#ifdef XFS_BUF_LOCK_TRACKING
-       int                     b_last_holder;
-#endif
-} xfs_buf_t;
-
-
-/* Finding and Reading Buffers */
-extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,
-                               xfs_buf_flags_t, xfs_buf_t *);
-#define xfs_incore(buftarg,blkno,len,lockit) \
-       _xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
-
-extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t,
-                               xfs_buf_flags_t);
-extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
-                               xfs_buf_flags_t);
-
-extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
-extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len);
-extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
-extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
-extern void xfs_buf_hold(xfs_buf_t *);
-extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t);
-struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp,
-                               struct xfs_buftarg *target,
-                               xfs_daddr_t daddr, size_t length, int flags);
-
-/* Releasing Buffers */
-extern void xfs_buf_free(xfs_buf_t *);
-extern void xfs_buf_rele(xfs_buf_t *);
-
-/* Locking and Unlocking Buffers */
-extern int xfs_buf_trylock(xfs_buf_t *);
-extern void xfs_buf_lock(xfs_buf_t *);
-extern void xfs_buf_unlock(xfs_buf_t *);
-#define xfs_buf_islocked(bp) \
-       ((bp)->b_sema.count <= 0)
-
-/* Buffer Read and Write Routines */
-extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
-extern void xfs_bdwrite(void *mp, xfs_buf_t *bp);
-
-extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
-extern int xfs_bdstrat_cb(struct xfs_buf *);
-
-extern void xfs_buf_ioend(xfs_buf_t *, int);
-extern void xfs_buf_ioerror(xfs_buf_t *, int);
-extern int xfs_buf_iorequest(xfs_buf_t *);
-extern int xfs_buf_iowait(xfs_buf_t *);
-extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
-                               xfs_buf_rw_t);
-#define xfs_buf_zero(bp, off, len) \
-           xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
-
-static inline int xfs_buf_geterror(xfs_buf_t *bp)
-{
-       return bp ? bp->b_error : ENOMEM;
-}
-
-/* Buffer Utility Routines */
-extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
-
-/* Delayed Write Buffer Routines */
-extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
-extern void xfs_buf_delwri_promote(xfs_buf_t *);
-
-/* Buffer Daemon Setup Routines */
-extern int xfs_buf_init(void);
-extern void xfs_buf_terminate(void);
-
-#define xfs_buf_target_name(target)    \
-       ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; })
-
-
-#define XFS_BUF_BFLAGS(bp)     ((bp)->b_flags)
-#define XFS_BUF_ZEROFLAGS(bp) \
-       ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \
-                           XBF_SYNCIO|XBF_FUA|XBF_FLUSH))
-
-void xfs_buf_stale(struct xfs_buf *bp);
-#define XFS_BUF_STALE(bp)      xfs_buf_stale(bp);
-#define XFS_BUF_UNSTALE(bp)    ((bp)->b_flags &= ~XBF_STALE)
-#define XFS_BUF_ISSTALE(bp)    ((bp)->b_flags & XBF_STALE)
-#define XFS_BUF_SUPER_STALE(bp)        do {                            \
-                                       XFS_BUF_STALE(bp);      \
-                                       xfs_buf_delwri_dequeue(bp);     \
-                                       XFS_BUF_DONE(bp);       \
-                               } while (0)
-
-#define XFS_BUF_DELAYWRITE(bp)         ((bp)->b_flags |= XBF_DELWRI)
-#define XFS_BUF_UNDELAYWRITE(bp)       xfs_buf_delwri_dequeue(bp)
-#define XFS_BUF_ISDELAYWRITE(bp)       ((bp)->b_flags & XBF_DELWRI)
-
-#define XFS_BUF_ERROR(bp,no)   xfs_buf_ioerror(bp,no)
-#define XFS_BUF_GETERROR(bp)   xfs_buf_geterror(bp)
-#define XFS_BUF_ISERROR(bp)    (xfs_buf_geterror(bp) ? 1 : 0)
-
-#define XFS_BUF_DONE(bp)       ((bp)->b_flags |= XBF_DONE)
-#define XFS_BUF_UNDONE(bp)     ((bp)->b_flags &= ~XBF_DONE)
-#define XFS_BUF_ISDONE(bp)     ((bp)->b_flags & XBF_DONE)
-
-#define XFS_BUF_BUSY(bp)       do { } while (0)
-#define XFS_BUF_UNBUSY(bp)     do { } while (0)
-#define XFS_BUF_ISBUSY(bp)     (1)
-
-#define XFS_BUF_ASYNC(bp)      ((bp)->b_flags |= XBF_ASYNC)
-#define XFS_BUF_UNASYNC(bp)    ((bp)->b_flags &= ~XBF_ASYNC)
-#define XFS_BUF_ISASYNC(bp)    ((bp)->b_flags & XBF_ASYNC)
-
-#define XFS_BUF_HOLD(bp)       xfs_buf_hold(bp)
-#define XFS_BUF_READ(bp)       ((bp)->b_flags |= XBF_READ)
-#define XFS_BUF_UNREAD(bp)     ((bp)->b_flags &= ~XBF_READ)
-#define XFS_BUF_ISREAD(bp)     ((bp)->b_flags & XBF_READ)
-
-#define XFS_BUF_WRITE(bp)      ((bp)->b_flags |= XBF_WRITE)
-#define XFS_BUF_UNWRITE(bp)    ((bp)->b_flags &= ~XBF_WRITE)
-#define XFS_BUF_ISWRITE(bp)    ((bp)->b_flags & XBF_WRITE)
-
-#define XFS_BUF_SET_START(bp)                  do { } while (0)
-
-#define XFS_BUF_PTR(bp)                        (xfs_caddr_t)((bp)->b_addr)
-#define XFS_BUF_SET_PTR(bp, val, cnt)  xfs_buf_associate_memory(bp, val, cnt)
-#define XFS_BUF_ADDR(bp)               ((bp)->b_bn)
-#define XFS_BUF_SET_ADDR(bp, bno)      ((bp)->b_bn = (xfs_daddr_t)(bno))
-#define XFS_BUF_OFFSET(bp)             ((bp)->b_file_offset)
-#define XFS_BUF_SET_OFFSET(bp, off)    ((bp)->b_file_offset = (off))
-#define XFS_BUF_COUNT(bp)              ((bp)->b_count_desired)
-#define XFS_BUF_SET_COUNT(bp, cnt)     ((bp)->b_count_desired = (cnt))
-#define XFS_BUF_SIZE(bp)               ((bp)->b_buffer_length)
-#define XFS_BUF_SET_SIZE(bp, cnt)      ((bp)->b_buffer_length = (cnt))
-
-static inline void
-xfs_buf_set_ref(
-       struct xfs_buf  *bp,
-       int             lru_ref)
-{
-       atomic_set(&bp->b_lru_ref, lru_ref);
-}
-#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)   xfs_buf_set_ref(bp, ref)
-#define XFS_BUF_SET_VTYPE(bp, type)            do { } while (0)
-
-#define XFS_BUF_ISPINNED(bp)   atomic_read(&((bp)->b_pin_count))
-
-#define XFS_BUF_FINISH_IOWAIT(bp)      complete(&bp->b_iowait);
-
-#define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target))
-#define XFS_BUF_TARGET(bp)             ((bp)->b_target)
-#define XFS_BUFTARG_NAME(target)       xfs_buf_target_name(target)
-
-static inline void xfs_buf_relse(xfs_buf_t *bp)
-{
-       xfs_buf_unlock(bp);
-       xfs_buf_rele(bp);
-}
-
-/*
- *     Handling of buftargs.
- */
-extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
-                       struct block_device *, int, const char *);
-extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
-extern void xfs_wait_buftarg(xfs_buftarg_t *);
-extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
-extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
-
-#ifdef CONFIG_KDB_MODULES
-extern struct list_head *xfs_get_buftarg_list(void);
-#endif
-
-#define xfs_getsize_buftarg(buftarg)   block_size((buftarg)->bt_bdev)
-#define xfs_readonly_buftarg(buftarg)  bdev_read_only((buftarg)->bt_bdev)
-
-#define xfs_binval(buftarg)            xfs_flush_buftarg(buftarg, 1)
-#define XFS_bflush(buftarg)            xfs_flush_buftarg(buftarg, 1)
-
-#endif /* __XFS_BUF_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c

deleted file mode 100644 (file)

index 244e797..0000000
--- a/fs/xfs/linux-2.6/xfs_discard.c
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
- * Copyright (C) 2010 Red Hat, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_sb.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_quota.h"
-#include "xfs_trans.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_inode.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
-#include "xfs_discard.h"
-#include "xfs_trace.h"
-
-STATIC int
-xfs_trim_extents(
-       struct xfs_mount        *mp,
-       xfs_agnumber_t          agno,
-       xfs_fsblock_t           start,
-       xfs_fsblock_t           len,
-       xfs_fsblock_t           minlen,
-       __uint64_t              *blocks_trimmed)
-{
-       struct block_device     *bdev = mp->m_ddev_targp->bt_bdev;
-       struct xfs_btree_cur    *cur;
-       struct xfs_buf          *agbp;
-       struct xfs_perag        *pag;
-       int                     error;
-       int                     i;
-
-       pag = xfs_perag_get(mp, agno);
-
-       error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
-       if (error || !agbp)
-               goto out_put_perag;
-
-       cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT);
-
-       /*
-        * Force out the log.  This means any transactions that might have freed
-        * space before we took the AGF buffer lock are now on disk, and the
-        * volatile disk cache is flushed.
-        */
-       xfs_log_force(mp, XFS_LOG_SYNC);
-
-       /*
-        * Look up the longest btree in the AGF and start with it.
-        */
-       error = xfs_alloc_lookup_le(cur, 0,
-                                   XFS_BUF_TO_AGF(agbp)->agf_longest, &i);
-       if (error)
-               goto out_del_cursor;
-
-       /*
-        * Loop until we are done with all extents that are large
-        * enough to be worth discarding.
-        */
-       while (i) {
-               xfs_agblock_t fbno;
-               xfs_extlen_t flen;
-
-               error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
-               if (error)
-                       goto out_del_cursor;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor);
-               ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest);
-
-               /*
-                * Too small?  Give up.
-                */
-               if (flen < minlen) {
-                       trace_xfs_discard_toosmall(mp, agno, fbno, flen);
-                       goto out_del_cursor;
-               }
-
-               /*
-                * If the extent is entirely outside of the range we are
-                * supposed to discard skip it.  Do not bother to trim
-                * down partially overlapping ranges for now.
-                */
-               if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start ||
-                   XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) {
-                       trace_xfs_discard_exclude(mp, agno, fbno, flen);
-                       goto next_extent;
-               }
-
-               /*
-                * If any blocks in the range are still busy, skip the
-                * discard and try again the next time.
-                */
-               if (xfs_alloc_busy_search(mp, agno, fbno, flen)) {
-                       trace_xfs_discard_busy(mp, agno, fbno, flen);
-                       goto next_extent;
-               }
-
-               trace_xfs_discard_extent(mp, agno, fbno, flen);
-               error = -blkdev_issue_discard(bdev,
-                               XFS_AGB_TO_DADDR(mp, agno, fbno),
-                               XFS_FSB_TO_BB(mp, flen),
-                               GFP_NOFS, 0);
-               if (error)
-                       goto out_del_cursor;
-               *blocks_trimmed += flen;
-
-next_extent:
-               error = xfs_btree_decrement(cur, 0, &i);
-               if (error)
-                       goto out_del_cursor;
-       }
-
-out_del_cursor:
-       xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
-       xfs_buf_relse(agbp);
-out_put_perag:
-       xfs_perag_put(pag);
-       return error;
-}
-
-int
-xfs_ioc_trim(
-       struct xfs_mount                *mp,
-       struct fstrim_range __user      *urange)
-{
-       struct request_queue    *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue;
-       unsigned int            granularity = q->limits.discard_granularity;
-       struct fstrim_range     range;
-       xfs_fsblock_t           start, len, minlen;
-       xfs_agnumber_t          start_agno, end_agno, agno;
-       __uint64_t              blocks_trimmed = 0;
-       int                     error, last_error = 0;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-       if (!blk_queue_discard(q))
-               return -XFS_ERROR(EOPNOTSUPP);
-       if (copy_from_user(&range, urange, sizeof(range)))
-               return -XFS_ERROR(EFAULT);
-
-       /*
-        * Truncating down the len isn't actually quite correct, but using
-        * XFS_B_TO_FSB would mean we trivially get overflows for values
-        * of ULLONG_MAX or slightly lower.  And ULLONG_MAX is the default
-        * used by the fstrim application.  In the end it really doesn't
-        * matter as trimming blocks is an advisory interface.
-        */
-       start = XFS_B_TO_FSBT(mp, range.start);
-       len = XFS_B_TO_FSBT(mp, range.len);
-       minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen));
-
-       start_agno = XFS_FSB_TO_AGNO(mp, start);
-       if (start_agno >= mp->m_sb.sb_agcount)
-               return -XFS_ERROR(EINVAL);
-
-       end_agno = XFS_FSB_TO_AGNO(mp, start + len);
-       if (end_agno >= mp->m_sb.sb_agcount)
-               end_agno = mp->m_sb.sb_agcount - 1;
-
-       for (agno = start_agno; agno <= end_agno; agno++) {
-               error = -xfs_trim_extents(mp, agno, start, len, minlen,
-                                         &blocks_trimmed);
-               if (error)
-                       last_error = error;
-       }
-
-       if (last_error)
-               return last_error;
-
-       range.len = XFS_FSB_TO_B(mp, blocks_trimmed);
-       if (copy_to_user(urange, &range, sizeof(range)))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-int
-xfs_discard_extents(
-       struct xfs_mount        *mp,
-       struct list_head        *list)
-{
-       struct xfs_busy_extent  *busyp;
-       int                     error = 0;
-
-       list_for_each_entry(busyp, list, list) {
-               trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
-                                        busyp->length);
-
-               error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
-                               XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
-                               XFS_FSB_TO_BB(mp, busyp->length),
-                               GFP_NOFS, 0);
-               if (error && error != EOPNOTSUPP) {
-                       xfs_info(mp,
-        "discard failed for extent [0x%llu,%u], error %d",
-                                (unsigned long long)busyp->bno,
-                                busyp->length,
-                                error);
-                       return error;
-               }
-       }
-
-       return 0;
-}
diff --git a/fs/xfs/linux-2.6/xfs_discard.h b/fs/xfs/linux-2.6/xfs_discard.h

deleted file mode 100644 (file)

index 344879a..0000000
--- a/fs/xfs/linux-2.6/xfs_discard.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef XFS_DISCARD_H
-#define XFS_DISCARD_H 1
-
-struct fstrim_range;
-struct list_head;
-
-extern int     xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *);
-extern int     xfs_discard_extents(struct xfs_mount *, struct list_head *);
-
-#endif /* XFS_DISCARD_H */
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c

deleted file mode 100644 (file)

index 75e5d32..0000000
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_types.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_mount.h"
-#include "xfs_export.h"
-#include "xfs_vnodeops.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_inode_item.h"
-#include "xfs_trace.h"
-
-/*
- * Note that we only accept fileids which are long enough rather than allow
- * the parent generation number to default to zero.  XFS considers zero a
- * valid generation number not an invalid/wildcard value.
- */
-static int xfs_fileid_length(int fileid_type)
-{
-       switch (fileid_type) {
-       case FILEID_INO32_GEN:
-               return 2;
-       case FILEID_INO32_GEN_PARENT:
-               return 4;
-       case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
-               return 3;
-       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
-               return 6;
-       }
-       return 255; /* invalid */
-}
-
-STATIC int
-xfs_fs_encode_fh(
-       struct dentry           *dentry,
-       __u32                   *fh,
-       int                     *max_len,
-       int                     connectable)
-{
-       struct fid              *fid = (struct fid *)fh;
-       struct xfs_fid64        *fid64 = (struct xfs_fid64 *)fh;
-       struct inode            *inode = dentry->d_inode;
-       int                     fileid_type;
-       int                     len;
-
-       /* Directories don't need their parent encoded, they have ".." */
-       if (S_ISDIR(inode->i_mode) || !connectable)
-               fileid_type = FILEID_INO32_GEN;
-       else
-               fileid_type = FILEID_INO32_GEN_PARENT;
-
-       /*
-        * If the the filesystem may contain 64bit inode numbers, we need
-        * to use larger file handles that can represent them.
-        *
-        * While we only allocate inodes that do not fit into 32 bits any
-        * large enough filesystem may contain them, thus the slightly
-        * confusing looking conditional below.
-        */
-       if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) ||
-           (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES))
-               fileid_type |= XFS_FILEID_TYPE_64FLAG;
-
-       /*
-        * Only encode if there is enough space given.  In practice
-        * this means we can't export a filesystem with 64bit inodes
-        * over NFSv2 with the subtree_check export option; the other
-        * seven combinations work.  The real answer is "don't use v2".
-        */
-       len = xfs_fileid_length(fileid_type);
-       if (*max_len < len) {
-               *max_len = len;
-               return 255;
-       }
-       *max_len = len;
-
-       switch (fileid_type) {
-       case FILEID_INO32_GEN_PARENT:
-               spin_lock(&dentry->d_lock);
-               fid->i32.parent_ino = dentry->d_parent->d_inode->i_ino;
-               fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation;
-               spin_unlock(&dentry->d_lock);
-               /*FALLTHRU*/
-       case FILEID_INO32_GEN:
-               fid->i32.ino = inode->i_ino;
-               fid->i32.gen = inode->i_generation;
-               break;
-       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
-               spin_lock(&dentry->d_lock);
-               fid64->parent_ino = dentry->d_parent->d_inode->i_ino;
-               fid64->parent_gen = dentry->d_parent->d_inode->i_generation;
-               spin_unlock(&dentry->d_lock);
-               /*FALLTHRU*/
-       case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
-               fid64->ino = inode->i_ino;
-               fid64->gen = inode->i_generation;
-               break;
-       }
-
-       return fileid_type;
-}
-
-STATIC struct inode *
-xfs_nfs_get_inode(
-       struct super_block      *sb,
-       u64                     ino,
-       u32                     generation)
- {
-       xfs_mount_t             *mp = XFS_M(sb);
-       xfs_inode_t             *ip;
-       int                     error;
-
-       /*
-        * NFS can sometimes send requests for ino 0.  Fail them gracefully.
-        */
-       if (ino == 0)
-               return ERR_PTR(-ESTALE);
-
-       /*
-        * The XFS_IGET_UNTRUSTED means that an invalid inode number is just
-        * fine and not an indication of a corrupted filesystem as clients can
-        * send invalid file handles and we have to handle it gracefully..
-        */
-       error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, 0, &ip);
-       if (error) {
-               /*
-                * EINVAL means the inode cluster doesn't exist anymore.
-                * This implies the filehandle is stale, so we should
-                * translate it here.
-                * We don't use ESTALE directly down the chain to not
-                * confuse applications using bulkstat that expect EINVAL.
-                */
-               if (error == EINVAL || error == ENOENT)
-                       error = ESTALE;
-               return ERR_PTR(-error);
-       }
-
-       if (ip->i_d.di_gen != generation) {
-               IRELE(ip);
-               return ERR_PTR(-ESTALE);
-       }
-
-       return VFS_I(ip);
-}
-
-STATIC struct dentry *
-xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
-                int fh_len, int fileid_type)
-{
-       struct xfs_fid64        *fid64 = (struct xfs_fid64 *)fid;
-       struct inode            *inode = NULL;
-
-       if (fh_len < xfs_fileid_length(fileid_type))
-               return NULL;
-
-       switch (fileid_type) {
-       case FILEID_INO32_GEN_PARENT:
-       case FILEID_INO32_GEN:
-               inode = xfs_nfs_get_inode(sb, fid->i32.ino, fid->i32.gen);
-               break;
-       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
-       case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
-               inode = xfs_nfs_get_inode(sb, fid64->ino, fid64->gen);
-               break;
-       }
-
-       return d_obtain_alias(inode);
-}
-
-STATIC struct dentry *
-xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid,
-                int fh_len, int fileid_type)
-{
-       struct xfs_fid64        *fid64 = (struct xfs_fid64 *)fid;
-       struct inode            *inode = NULL;
-
-       switch (fileid_type) {
-       case FILEID_INO32_GEN_PARENT:
-               inode = xfs_nfs_get_inode(sb, fid->i32.parent_ino,
-                                             fid->i32.parent_gen);
-               break;
-       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
-               inode = xfs_nfs_get_inode(sb, fid64->parent_ino,
-                                             fid64->parent_gen);
-               break;
-       }
-
-       return d_obtain_alias(inode);
-}
-
-STATIC struct dentry *
-xfs_fs_get_parent(
-       struct dentry           *child)
-{
-       int                     error;
-       struct xfs_inode        *cip;
-
-       error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL);
-       if (unlikely(error))
-               return ERR_PTR(-error);
-
-       return d_obtain_alias(VFS_I(cip));
-}
-
-STATIC int
-xfs_fs_nfs_commit_metadata(
-       struct inode            *inode)
-{
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       int                     error = 0;
-
-       xfs_ilock(ip, XFS_ILOCK_SHARED);
-       if (xfs_ipincount(ip)) {
-               error = _xfs_log_force_lsn(mp, ip->i_itemp->ili_last_lsn,
-                               XFS_LOG_SYNC, NULL);
-       }
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
-       return error;
-}
-
-const struct export_operations xfs_export_operations = {
-       .encode_fh              = xfs_fs_encode_fh,
-       .fh_to_dentry           = xfs_fs_fh_to_dentry,
-       .fh_to_parent           = xfs_fs_fh_to_parent,
-       .get_parent             = xfs_fs_get_parent,
-       .commit_metadata        = xfs_fs_nfs_commit_metadata,
-};
diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/linux-2.6/xfs_export.h

deleted file mode 100644 (file)

index 3272b6a..0000000
--- a/fs/xfs/linux-2.6/xfs_export.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_EXPORT_H__
-#define __XFS_EXPORT_H__
-
-/*
- * Common defines for code related to exporting XFS filesystems over NFS.
- *
- * The NFS fileid goes out on the wire as an array of
- * 32bit unsigned ints in host order.  There are 5 possible
- * formats.
- *
- * (1) fileid_type=0x00
- *     (no fileid data; handled by the generic code)
- *
- * (2) fileid_type=0x01
- *     inode-num
- *     generation
- *
- * (3) fileid_type=0x02
- *     inode-num
- *     generation
- *     parent-inode-num
- *     parent-generation
- *
- * (4) fileid_type=0x81
- *     inode-num-lo32
- *     inode-num-hi32
- *     generation
- *
- * (5) fileid_type=0x82
- *     inode-num-lo32
- *     inode-num-hi32
- *     generation
- *     parent-inode-num-lo32
- *     parent-inode-num-hi32
- *     parent-generation
- *
- * Note, the NFS filehandle also includes an fsid portion which
- * may have an inode number in it.  That number is hardcoded to
- * 32bits and there is no way for XFS to intercept it.  In
- * practice this means when exporting an XFS filesystem with 64bit
- * inodes you should either export the mountpoint (rather than
- * a subdirectory) or use the "fsid" export option.
- */
-
-struct xfs_fid64 {
-       u64 ino;
-       u32 gen;
-       u64 parent_ino;
-       u32 parent_gen;
-} __attribute__((packed));
-
-/* This flag goes on the wire.  Don't play with it. */
-#define XFS_FILEID_TYPE_64FLAG 0x80    /* NFS fileid has 64bit inodes */
-
-#endif /* __XFS_EXPORT_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c

deleted file mode 100644 (file)

index 7f7b424..0000000
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ /dev/null
@@ -1,1096 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_trans.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_inode_item.h"
-#include "xfs_bmap.h"
-#include "xfs_error.h"
-#include "xfs_vnodeops.h"
-#include "xfs_da_btree.h"
-#include "xfs_ioctl.h"
-#include "xfs_trace.h"
-
-#include <linux/dcache.h>
-#include <linux/falloc.h>
-
-static const struct vm_operations_struct xfs_file_vm_ops;
-
-/*
- * Locking primitives for read and write IO paths to ensure we consistently use
- * and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
- */
-static inline void
-xfs_rw_ilock(
-       struct xfs_inode        *ip,
-       int                     type)
-{
-       if (type & XFS_IOLOCK_EXCL)
-               mutex_lock(&VFS_I(ip)->i_mutex);
-       xfs_ilock(ip, type);
-}
-
-static inline void
-xfs_rw_iunlock(
-       struct xfs_inode        *ip,
-       int                     type)
-{
-       xfs_iunlock(ip, type);
-       if (type & XFS_IOLOCK_EXCL)
-               mutex_unlock(&VFS_I(ip)->i_mutex);
-}
-
-static inline void
-xfs_rw_ilock_demote(
-       struct xfs_inode        *ip,
-       int                     type)
-{
-       xfs_ilock_demote(ip, type);
-       if (type & XFS_IOLOCK_EXCL)
-               mutex_unlock(&VFS_I(ip)->i_mutex);
-}
-
-/*
- *     xfs_iozero
- *
- *     xfs_iozero clears the specified range of buffer supplied,
- *     and marks all the affected blocks as valid and modified.  If
- *     an affected block is not allocated, it will be allocated.  If
- *     an affected block is not completely overwritten, and is not
- *     valid before the operation, it will be read from disk before
- *     being partially zeroed.
- */
-STATIC int
-xfs_iozero(
-       struct xfs_inode        *ip,    /* inode                        */
-       loff_t                  pos,    /* offset in file               */
-       size_t                  count)  /* size of data to zero         */
-{
-       struct page             *page;
-       struct address_space    *mapping;
-       int                     status;
-
-       mapping = VFS_I(ip)->i_mapping;
-       do {
-               unsigned offset, bytes;
-               void *fsdata;
-
-               offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
-               bytes = PAGE_CACHE_SIZE - offset;
-               if (bytes > count)
-                       bytes = count;
-
-               status = pagecache_write_begin(NULL, mapping, pos, bytes,
-                                       AOP_FLAG_UNINTERRUPTIBLE,
-                                       &page, &fsdata);
-               if (status)
-                       break;
-
-               zero_user(page, offset, bytes);
-
-               status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
-                                       page, fsdata);
-               WARN_ON(status <= 0); /* can't return less than zero! */
-               pos += bytes;
-               count -= bytes;
-               status = 0;
-       } while (count);
-
-       return (-status);
-}
-
-STATIC int
-xfs_file_fsync(
-       struct file             *file,
-       loff_t                  start,
-       loff_t                  end,
-       int                     datasync)
-{
-       struct inode            *inode = file->f_mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_trans        *tp;
-       int                     error = 0;
-       int                     log_flushed = 0;
-
-       trace_xfs_file_fsync(ip);
-
-       error = filemap_write_and_wait_range(inode->i_mapping, start, end);
-       if (error)
-               return error;
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -XFS_ERROR(EIO);
-
-       xfs_iflags_clear(ip, XFS_ITRUNCATED);
-
-       xfs_ilock(ip, XFS_IOLOCK_SHARED);
-       xfs_ioend_wait(ip);
-       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-
-       if (mp->m_flags & XFS_MOUNT_BARRIER) {
-               /*
-                * If we have an RT and/or log subvolume we need to make sure
-                * to flush the write cache the device used for file data
-                * first.  This is to ensure newly written file data make
-                * it to disk before logging the new inode size in case of
-                * an extending write.
-                */
-               if (XFS_IS_REALTIME_INODE(ip))
-                       xfs_blkdev_issue_flush(mp->m_rtdev_targp);
-               else if (mp->m_logdev_targp != mp->m_ddev_targp)
-                       xfs_blkdev_issue_flush(mp->m_ddev_targp);
-       }
-
-       /*
-        * We always need to make sure that the required inode state is safe on
-        * disk.  The inode might be clean but we still might need to force the
-        * log because of committed transactions that haven't hit the disk yet.
-        * Likewise, there could be unflushed non-transactional changes to the
-        * inode core that have to go to disk and this requires us to issue
-        * a synchronous transaction to capture these changes correctly.
-        *
-        * This code relies on the assumption that if the i_update_core field
-        * of the inode is clear and the inode is unpinned then it is clean
-        * and no action is required.
-        */
-       xfs_ilock(ip, XFS_ILOCK_SHARED);
-
-       /*
-        * First check if the VFS inode is marked dirty.  All the dirtying
-        * of non-transactional updates no goes through mark_inode_dirty*,
-        * which allows us to distinguish beteeen pure timestamp updates
-        * and i_size updates which need to be caught for fdatasync.
-        * After that also theck for the dirty state in the XFS inode, which
-        * might gets cleared when the inode gets written out via the AIL
-        * or xfs_iflush_cluster.
-        */
-       if (((inode->i_state & I_DIRTY_DATASYNC) ||
-           ((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
-           ip->i_update_core) {
-               /*
-                * Kick off a transaction to log the inode core to get the
-                * updates.  The sync transaction will also force the log.
-                */
-               xfs_iunlock(ip, XFS_ILOCK_SHARED);
-               tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
-               error = xfs_trans_reserve(tp, 0,
-                               XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
-               if (error) {
-                       xfs_trans_cancel(tp, 0);
-                       return -error;
-               }
-               xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-               /*
-                * Note - it's possible that we might have pushed ourselves out
-                * of the way during trans_reserve which would flush the inode.
-                * But there's no guarantee that the inode buffer has actually
-                * gone out yet (it's delwri).  Plus the buffer could be pinned
-                * anyway if it's part of an inode in another recent
-                * transaction.  So we play it safe and fire off the
-                * transaction anyway.
-                */
-               xfs_trans_ijoin(tp, ip);
-               xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-               xfs_trans_set_sync(tp);
-               error = _xfs_trans_commit(tp, 0, &log_flushed);
-
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       } else {
-               /*
-                * Timestamps/size haven't changed since last inode flush or
-                * inode transaction commit.  That means either nothing got
-                * written or a transaction committed which caught the updates.
-                * If the latter happened and the transaction hasn't hit the
-                * disk yet, the inode will be still be pinned.  If it is,
-                * force the log.
-                */
-               if (xfs_ipincount(ip)) {
-                       error = _xfs_log_force_lsn(mp,
-                                       ip->i_itemp->ili_last_lsn,
-                                       XFS_LOG_SYNC, &log_flushed);
-               }
-               xfs_iunlock(ip, XFS_ILOCK_SHARED);
-       }
-
-       /*
-        * If we only have a single device, and the log force about was
-        * a no-op we might have to flush the data device cache here.
-        * This can only happen for fdatasync/O_DSYNC if we were overwriting
-        * an already allocated file and thus do not have any metadata to
-        * commit.
-        */
-       if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
-           mp->m_logdev_targp == mp->m_ddev_targp &&
-           !XFS_IS_REALTIME_INODE(ip) &&
-           !log_flushed)
-               xfs_blkdev_issue_flush(mp->m_ddev_targp);
-
-       return -error;
-}
-
-STATIC ssize_t
-xfs_file_aio_read(
-       struct kiocb            *iocb,
-       const struct iovec      *iovp,
-       unsigned long           nr_segs,
-       loff_t                  pos)
-{
-       struct file             *file = iocb->ki_filp;
-       struct inode            *inode = file->f_mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       size_t                  size = 0;
-       ssize_t                 ret = 0;
-       int                     ioflags = 0;
-       xfs_fsize_t             n;
-       unsigned long           seg;
-
-       XFS_STATS_INC(xs_read_calls);
-
-       BUG_ON(iocb->ki_pos != pos);
-
-       if (unlikely(file->f_flags & O_DIRECT))
-               ioflags |= IO_ISDIRECT;
-       if (file->f_mode & FMODE_NOCMTIME)
-               ioflags |= IO_INVIS;
-
-       /* START copy & waste from filemap.c */
-       for (seg = 0; seg < nr_segs; seg++) {
-               const struct iovec *iv = &iovp[seg];
-
-               /*
-                * If any segment has a negative length, or the cumulative
-                * length ever wraps negative then return -EINVAL.
-                */
-               size += iv->iov_len;
-               if (unlikely((ssize_t)(size|iv->iov_len) < 0))
-                       return XFS_ERROR(-EINVAL);
-       }
-       /* END copy & waste from filemap.c */
-
-       if (unlikely(ioflags & IO_ISDIRECT)) {
-               xfs_buftarg_t   *target =
-                       XFS_IS_REALTIME_INODE(ip) ?
-                               mp->m_rtdev_targp : mp->m_ddev_targp;
-               if ((iocb->ki_pos & target->bt_smask) ||
-                   (size & target->bt_smask)) {
-                       if (iocb->ki_pos == ip->i_size)
-                               return 0;
-                       return -XFS_ERROR(EINVAL);
-               }
-       }
-
-       n = XFS_MAXIOFFSET(mp) - iocb->ki_pos;
-       if (n <= 0 || size == 0)
-               return 0;
-
-       if (n < size)
-               size = n;
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -EIO;
-
-       if (unlikely(ioflags & IO_ISDIRECT)) {
-               xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
-
-               if (inode->i_mapping->nrpages) {
-                       ret = -xfs_flushinval_pages(ip,
-                                       (iocb->ki_pos & PAGE_CACHE_MASK),
-                                       -1, FI_REMAPF_LOCKED);
-                       if (ret) {
-                               xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
-                               return ret;
-                       }
-               }
-               xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
-       } else
-               xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-
-       trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
-
-       ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos);
-       if (ret > 0)
-               XFS_STATS_ADD(xs_read_bytes, ret);
-
-       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
-       return ret;
-}
-
-STATIC ssize_t
-xfs_file_splice_read(
-       struct file             *infilp,
-       loff_t                  *ppos,
-       struct pipe_inode_info  *pipe,
-       size_t                  count,
-       unsigned int            flags)
-{
-       struct xfs_inode        *ip = XFS_I(infilp->f_mapping->host);
-       int                     ioflags = 0;
-       ssize_t                 ret;
-
-       XFS_STATS_INC(xs_read_calls);
-
-       if (infilp->f_mode & FMODE_NOCMTIME)
-               ioflags |= IO_INVIS;
-
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               return -EIO;
-
-       xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-
-       trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
-
-       ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
-       if (ret > 0)
-               XFS_STATS_ADD(xs_read_bytes, ret);
-
-       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
-       return ret;
-}
-
-STATIC void
-xfs_aio_write_isize_update(
-       struct inode    *inode,
-       loff_t          *ppos,
-       ssize_t         bytes_written)
-{
-       struct xfs_inode        *ip = XFS_I(inode);
-       xfs_fsize_t             isize = i_size_read(inode);
-
-       if (bytes_written > 0)
-               XFS_STATS_ADD(xs_write_bytes, bytes_written);
-
-       if (unlikely(bytes_written < 0 && bytes_written != -EFAULT &&
-                                       *ppos > isize))
-               *ppos = isize;
-
-       if (*ppos > ip->i_size) {
-               xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
-               if (*ppos > ip->i_size)
-                       ip->i_size = *ppos;
-               xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
-       }
-}
-
-/*
- * If this was a direct or synchronous I/O that failed (such as ENOSPC) then
- * part of the I/O may have been written to disk before the error occurred.  In
- * this case the on-disk file size may have been adjusted beyond the in-memory
- * file size and now needs to be truncated back.
- */
-STATIC void
-xfs_aio_write_newsize_update(
-       struct xfs_inode        *ip)
-{
-       if (ip->i_new_size) {
-               xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
-               ip->i_new_size = 0;
-               if (ip->i_d.di_size > ip->i_size)
-                       ip->i_d.di_size = ip->i_size;
-               xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
-       }
-}
-
-/*
- * xfs_file_splice_write() does not use xfs_rw_ilock() because
- * generic_file_splice_write() takes the i_mutex itself. This, in theory,
- * couuld cause lock inversions between the aio_write path and the splice path
- * if someone is doing concurrent splice(2) based writes and write(2) based
- * writes to the same inode. The only real way to fix this is to re-implement
- * the generic code here with correct locking orders.
- */
-STATIC ssize_t
-xfs_file_splice_write(
-       struct pipe_inode_info  *pipe,
-       struct file             *outfilp,
-       loff_t                  *ppos,
-       size_t                  count,
-       unsigned int            flags)
-{
-       struct inode            *inode = outfilp->f_mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       xfs_fsize_t             new_size;
-       int                     ioflags = 0;
-       ssize_t                 ret;
-
-       XFS_STATS_INC(xs_write_calls);
-
-       if (outfilp->f_mode & FMODE_NOCMTIME)
-               ioflags |= IO_INVIS;
-
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               return -EIO;
-
-       xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
-       new_size = *ppos + count;
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       if (new_size > ip->i_size)
-               ip->i_new_size = new_size;
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-       trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
-
-       ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
-
-       xfs_aio_write_isize_update(inode, ppos, ret);
-       xfs_aio_write_newsize_update(ip);
-       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-       return ret;
-}
-
-/*
- * This routine is called to handle zeroing any space in the last
- * block of the file that is beyond the EOF.  We do this since the
- * size is being increased without writing anything to that block
- * and we don't want anyone to read the garbage on the disk.
- */
-STATIC int                             /* error (positive) */
-xfs_zero_last_block(
-       xfs_inode_t     *ip,
-       xfs_fsize_t     offset,
-       xfs_fsize_t     isize)
-{
-       xfs_fileoff_t   last_fsb;
-       xfs_mount_t     *mp = ip->i_mount;
-       int             nimaps;
-       int             zero_offset;
-       int             zero_len;
-       int             error = 0;
-       xfs_bmbt_irec_t imap;
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-
-       zero_offset = XFS_B_FSB_OFFSET(mp, isize);
-       if (zero_offset == 0) {
-               /*
-                * There are no extra bytes in the last block on disk to
-                * zero, so return.
-                */
-               return 0;
-       }
-
-       last_fsb = XFS_B_TO_FSBT(mp, isize);
-       nimaps = 1;
-       error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
-                         &nimaps, NULL);
-       if (error) {
-               return error;
-       }
-       ASSERT(nimaps > 0);
-       /*
-        * If the block underlying isize is just a hole, then there
-        * is nothing to zero.
-        */
-       if (imap.br_startblock == HOLESTARTBLOCK) {
-               return 0;
-       }
-       /*
-        * Zero the part of the last block beyond the EOF, and write it
-        * out sync.  We need to drop the ilock while we do this so we
-        * don't deadlock when the buffer cache calls back to us.
-        */
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-       zero_len = mp->m_sb.sb_blocksize - zero_offset;
-       if (isize + zero_len > offset)
-               zero_len = offset - isize;
-       error = xfs_iozero(ip, isize, zero_len);
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       ASSERT(error >= 0);
-       return error;
-}
-
-/*
- * Zero any on disk space between the current EOF and the new,
- * larger EOF.  This handles the normal case of zeroing the remainder
- * of the last block in the file and the unusual case of zeroing blocks
- * out beyond the size of the file.  This second case only happens
- * with fixed size extents and when the system crashes before the inode
- * size was updated but after blocks were allocated.  If fill is set,
- * then any holes in the range are filled and zeroed.  If not, the holes
- * are left alone as holes.
- */
-
-int                                    /* error (positive) */
-xfs_zero_eof(
-       xfs_inode_t     *ip,
-       xfs_off_t       offset,         /* starting I/O offset */
-       xfs_fsize_t     isize)          /* current inode size */
-{
-       xfs_mount_t     *mp = ip->i_mount;
-       xfs_fileoff_t   start_zero_fsb;
-       xfs_fileoff_t   end_zero_fsb;
-       xfs_fileoff_t   zero_count_fsb;
-       xfs_fileoff_t   last_fsb;
-       xfs_fileoff_t   zero_off;
-       xfs_fsize_t     zero_len;
-       int             nimaps;
-       int             error = 0;
-       xfs_bmbt_irec_t imap;
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-       ASSERT(offset > isize);
-
-       /*
-        * First handle zeroing the block on which isize resides.
-        * We only zero a part of that block so it is handled specially.
-        */
-       error = xfs_zero_last_block(ip, offset, isize);
-       if (error) {
-               ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-               return error;
-       }
-
-       /*
-        * Calculate the range between the new size and the old
-        * where blocks needing to be zeroed may exist.  To get the
-        * block where the last byte in the file currently resides,
-        * we need to subtract one from the size and truncate back
-        * to a block boundary.  We subtract 1 in case the size is
-        * exactly on a block boundary.
-        */
-       last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
-       start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
-       end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
-       ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
-       if (last_fsb == end_zero_fsb) {
-               /*
-                * The size was only incremented on its last block.
-                * We took care of that above, so just return.
-                */
-               return 0;
-       }
-
-       ASSERT(start_zero_fsb <= end_zero_fsb);
-       while (start_zero_fsb <= end_zero_fsb) {
-               nimaps = 1;
-               zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
-               error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
-                                 0, NULL, 0, &imap, &nimaps, NULL);
-               if (error) {
-                       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-                       return error;
-               }
-               ASSERT(nimaps > 0);
-
-               if (imap.br_state == XFS_EXT_UNWRITTEN ||
-                   imap.br_startblock == HOLESTARTBLOCK) {
-                       /*
-                        * This loop handles initializing pages that were
-                        * partially initialized by the code below this
-                        * loop. It basically zeroes the part of the page
-                        * that sits on a hole and sets the page as P_HOLE
-                        * and calls remapf if it is a mapped file.
-                        */
-                       start_zero_fsb = imap.br_startoff + imap.br_blockcount;
-                       ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
-                       continue;
-               }
-
-               /*
-                * There are blocks we need to zero.
-                * Drop the inode lock while we're doing the I/O.
-                * We'll still have the iolock to protect us.
-                */
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-               zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
-               zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
-
-               if ((zero_off + zero_len) > offset)
-                       zero_len = offset - zero_off;
-
-               error = xfs_iozero(ip, zero_off, zero_len);
-               if (error) {
-                       goto out_lock;
-               }
-
-               start_zero_fsb = imap.br_startoff + imap.br_blockcount;
-               ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
-
-               xfs_ilock(ip, XFS_ILOCK_EXCL);
-       }
-
-       return 0;
-
-out_lock:
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       ASSERT(error >= 0);
-       return error;
-}
-
-/*
- * Common pre-write limit and setup checks.
- *
- * Returns with iolock held according to @iolock.
- */
-STATIC ssize_t
-xfs_file_aio_write_checks(
-       struct file             *file,
-       loff_t                  *pos,
-       size_t                  *count,
-       int                     *iolock)
-{
-       struct inode            *inode = file->f_mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       xfs_fsize_t             new_size;
-       int                     error = 0;
-
-       error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
-       if (error) {
-               xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
-               *iolock = 0;
-               return error;
-       }
-
-       new_size = *pos + *count;
-       if (new_size > ip->i_size)
-               ip->i_new_size = new_size;
-
-       if (likely(!(file->f_mode & FMODE_NOCMTIME)))
-               file_update_time(file);
-
-       /*
-        * If the offset is beyond the size of the file, we need to zero any
-        * blocks that fall between the existing EOF and the start of this
-        * write.
-        */
-       if (*pos > ip->i_size)
-               error = -xfs_zero_eof(ip, *pos, ip->i_size);
-
-       xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
-       if (error)
-               return error;
-
-       /*
-        * If we're writing the file then make sure to clear the setuid and
-        * setgid bits if the process is not being run by root.  This keeps
-        * people from modifying setuid and setgid binaries.
-        */
-       return file_remove_suid(file);
-
-}
-
-/*
- * xfs_file_dio_aio_write - handle direct IO writes
- *
- * Lock the inode appropriately to prepare for and issue a direct IO write.
- * By separating it from the buffered write path we remove all the tricky to
- * follow locking changes and looping.
- *
- * If there are cached pages or we're extending the file, we need IOLOCK_EXCL
- * until we're sure the bytes at the new EOF have been zeroed and/or the cached
- * pages are flushed out.
- *
- * In most cases the direct IO writes will be done holding IOLOCK_SHARED
- * allowing them to be done in parallel with reads and other direct IO writes.
- * However, if the IO is not aligned to filesystem blocks, the direct IO layer
- * needs to do sub-block zeroing and that requires serialisation against other
- * direct IOs to the same block. In this case we need to serialise the
- * submission of the unaligned IOs so that we don't get racing block zeroing in
- * the dio layer.  To avoid the problem with aio, we also need to wait for
- * outstanding IOs to complete so that unwritten extent conversion is completed
- * before we try to map the overlapping block. This is currently implemented by
- * hitting it with a big hammer (i.e. xfs_ioend_wait()).
- *
- * Returns with locks held indicated by @iolock and errors indicated by
- * negative return values.
- */
-STATIC ssize_t
-xfs_file_dio_aio_write(
-       struct kiocb            *iocb,
-       const struct iovec      *iovp,
-       unsigned long           nr_segs,
-       loff_t                  pos,
-       size_t                  ocount,
-       int                     *iolock)
-{
-       struct file             *file = iocb->ki_filp;
-       struct address_space    *mapping = file->f_mapping;
-       struct inode            *inode = mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       ssize_t                 ret = 0;
-       size_t                  count = ocount;
-       int                     unaligned_io = 0;
-       struct xfs_buftarg      *target = XFS_IS_REALTIME_INODE(ip) ?
-                                       mp->m_rtdev_targp : mp->m_ddev_targp;
-
-       *iolock = 0;
-       if ((pos & target->bt_smask) || (count & target->bt_smask))
-               return -XFS_ERROR(EINVAL);
-
-       if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))
-               unaligned_io = 1;
-
-       if (unaligned_io || mapping->nrpages || pos > ip->i_size)
-               *iolock = XFS_IOLOCK_EXCL;
-       else
-               *iolock = XFS_IOLOCK_SHARED;
-       xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
-
-       ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
-       if (ret)
-               return ret;
-
-       if (mapping->nrpages) {
-               WARN_ON(*iolock != XFS_IOLOCK_EXCL);
-               ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
-                                                       FI_REMAPF_LOCKED);
-               if (ret)
-                       return ret;
-       }
-
-       /*
-        * If we are doing unaligned IO, wait for all other IO to drain,
-        * otherwise demote the lock if we had to flush cached pages
-        */
-       if (unaligned_io)
-               xfs_ioend_wait(ip);
-       else if (*iolock == XFS_IOLOCK_EXCL) {
-               xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
-               *iolock = XFS_IOLOCK_SHARED;
-       }
-
-       trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
-       ret = generic_file_direct_write(iocb, iovp,
-                       &nr_segs, pos, &iocb->ki_pos, count, ocount);
-
-       /* No fallback to buffered IO on errors for XFS. */
-       ASSERT(ret < 0 || ret == count);
-       return ret;
-}
-
-STATIC ssize_t
-xfs_file_buffered_aio_write(
-       struct kiocb            *iocb,
-       const struct iovec      *iovp,
-       unsigned long           nr_segs,
-       loff_t                  pos,
-       size_t                  ocount,
-       int                     *iolock)
-{
-       struct file             *file = iocb->ki_filp;
-       struct address_space    *mapping = file->f_mapping;
-       struct inode            *inode = mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       ssize_t                 ret;
-       int                     enospc = 0;
-       size_t                  count = ocount;
-
-       *iolock = XFS_IOLOCK_EXCL;
-       xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
-
-       ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
-       if (ret)
-               return ret;
-
-       /* We can write back this queue in page reclaim */
-       current->backing_dev_info = mapping->backing_dev_info;
-
-write_retry:
-       trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
-       ret = generic_file_buffered_write(iocb, iovp, nr_segs,
-                       pos, &iocb->ki_pos, count, ret);
-       /*
-        * if we just got an ENOSPC, flush the inode now we aren't holding any
-        * page locks and retry *once*
-        */
-       if (ret == -ENOSPC && !enospc) {
-               ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
-               if (ret)
-                       return ret;
-               enospc = 1;
-               goto write_retry;
-       }
-       current->backing_dev_info = NULL;
-       return ret;
-}
-
-STATIC ssize_t
-xfs_file_aio_write(
-       struct kiocb            *iocb,
-       const struct iovec      *iovp,
-       unsigned long           nr_segs,
-       loff_t                  pos)
-{
-       struct file             *file = iocb->ki_filp;
-       struct address_space    *mapping = file->f_mapping;
-       struct inode            *inode = mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       ssize_t                 ret;
-       int                     iolock;
-       size_t                  ocount = 0;
-
-       XFS_STATS_INC(xs_write_calls);
-
-       BUG_ON(iocb->ki_pos != pos);
-
-       ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
-       if (ret)
-               return ret;
-
-       if (ocount == 0)
-               return 0;
-
-       xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE);
-
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               return -EIO;
-
-       if (unlikely(file->f_flags & O_DIRECT))
-               ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
-                                               ocount, &iolock);
-       else
-               ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
-                                               ocount, &iolock);
-
-       xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);
-
-       if (ret <= 0)
-               goto out_unlock;
-
-       /* Handle various SYNC-type writes */
-       if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
-               loff_t end = pos + ret - 1;
-               int error;
-
-               xfs_rw_iunlock(ip, iolock);
-               error = xfs_file_fsync(file, pos, end,
-                                     (file->f_flags & __O_SYNC) ? 0 : 1);
-               xfs_rw_ilock(ip, iolock);
-               if (error)
-                       ret = error;
-       }
-
-out_unlock:
-       xfs_aio_write_newsize_update(ip);
-       xfs_rw_iunlock(ip, iolock);
-       return ret;
-}
-
-STATIC long
-xfs_file_fallocate(
-       struct file     *file,
-       int             mode,
-       loff_t          offset,
-       loff_t          len)
-{
-       struct inode    *inode = file->f_path.dentry->d_inode;
-       long            error;
-       loff_t          new_size = 0;
-       xfs_flock64_t   bf;
-       xfs_inode_t     *ip = XFS_I(inode);
-       int             cmd = XFS_IOC_RESVSP;
-       int             attr_flags = XFS_ATTR_NOLOCK;
-
-       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
-               return -EOPNOTSUPP;
-
-       bf.l_whence = 0;
-       bf.l_start = offset;
-       bf.l_len = len;
-
-       xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
-       if (mode & FALLOC_FL_PUNCH_HOLE)
-               cmd = XFS_IOC_UNRESVSP;
-
-       /* check the new inode size is valid before allocating */
-       if (!(mode & FALLOC_FL_KEEP_SIZE) &&
-           offset + len > i_size_read(inode)) {
-               new_size = offset + len;
-               error = inode_newsize_ok(inode, new_size);
-               if (error)
-                       goto out_unlock;
-       }
-
-       if (file->f_flags & O_DSYNC)
-               attr_flags |= XFS_ATTR_SYNC;
-
-       error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags);
-       if (error)
-               goto out_unlock;
-
-       /* Change file size if needed */
-       if (new_size) {
-               struct iattr iattr;
-
-               iattr.ia_valid = ATTR_SIZE;
-               iattr.ia_size = new_size;
-               error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK);
-       }
-
-out_unlock:
-       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-       return error;
-}
-
-
-STATIC int
-xfs_file_open(
-       struct inode    *inode,
-       struct file     *file)
-{
-       if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
-               return -EFBIG;
-       if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
-               return -EIO;
-       return 0;
-}
-
-STATIC int
-xfs_dir_open(
-       struct inode    *inode,
-       struct file     *file)
-{
-       struct xfs_inode *ip = XFS_I(inode);
-       int             mode;
-       int             error;
-
-       error = xfs_file_open(inode, file);
-       if (error)
-               return error;
-
-       /*
-        * If there are any blocks, read-ahead block 0 as we're almost
-        * certain to have the next operation be a read there.
-        */
-       mode = xfs_ilock_map_shared(ip);
-       if (ip->i_d.di_nextents > 0)
-               xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
-       xfs_iunlock(ip, mode);
-       return 0;
-}
-
-STATIC int
-xfs_file_release(
-       struct inode    *inode,
-       struct file     *filp)
-{
-       return -xfs_release(XFS_I(inode));
-}
-
-STATIC int
-xfs_file_readdir(
-       struct file     *filp,
-       void            *dirent,
-       filldir_t       filldir)
-{
-       struct inode    *inode = filp->f_path.dentry->d_inode;
-       xfs_inode_t     *ip = XFS_I(inode);
-       int             error;
-       size_t          bufsize;
-
-       /*
-        * The Linux API doesn't pass down the total size of the buffer
-        * we read into down to the filesystem.  With the filldir concept
-        * it's not needed for correct information, but the XFS dir2 leaf
-        * code wants an estimate of the buffer size to calculate it's
-        * readahead window and size the buffers used for mapping to
-        * physical blocks.
-        *
-        * Try to give it an estimate that's good enough, maybe at some
-        * point we can change the ->readdir prototype to include the
-        * buffer size.  For now we use the current glibc buffer size.
-        */
-       bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size);
-
-       error = xfs_readdir(ip, dirent, bufsize,
-                               (xfs_off_t *)&filp->f_pos, filldir);
-       if (error)
-               return -error;
-       return 0;
-}
-
-STATIC int
-xfs_file_mmap(
-       struct file     *filp,
-       struct vm_area_struct *vma)
-{
-       vma->vm_ops = &xfs_file_vm_ops;
-       vma->vm_flags |= VM_CAN_NONLINEAR;
-
-       file_accessed(filp);
-       return 0;
-}
-
-/*
- * mmap()d file has taken write protection fault and is being made
- * writable. We can set the page state up correctly for a writable
- * page, which means we can do correct delalloc accounting (ENOSPC
- * checking!) and unwritten extent mapping.
- */
-STATIC int
-xfs_vm_page_mkwrite(
-       struct vm_area_struct   *vma,
-       struct vm_fault         *vmf)
-{
-       return block_page_mkwrite(vma, vmf, xfs_get_blocks);
-}
-
-const struct file_operations xfs_file_operations = {
-       .llseek         = generic_file_llseek,
-       .read           = do_sync_read,
-       .write          = do_sync_write,
-       .aio_read       = xfs_file_aio_read,
-       .aio_write      = xfs_file_aio_write,
-       .splice_read    = xfs_file_splice_read,
-       .splice_write   = xfs_file_splice_write,
-       .unlocked_ioctl = xfs_file_ioctl,
-#ifdef CONFIG_COMPAT
-       .compat_ioctl   = xfs_file_compat_ioctl,
-#endif
-       .mmap           = xfs_file_mmap,
-       .open           = xfs_file_open,
-       .release        = xfs_file_release,
-       .fsync          = xfs_file_fsync,
-       .fallocate      = xfs_file_fallocate,
-};
-
-const struct file_operations xfs_dir_file_operations = {
-       .open           = xfs_dir_open,
-       .read           = generic_read_dir,
-       .readdir        = xfs_file_readdir,
-       .llseek         = generic_file_llseek,
-       .unlocked_ioctl = xfs_file_ioctl,
-#ifdef CONFIG_COMPAT
-       .compat_ioctl   = xfs_file_compat_ioctl,
-#endif
-       .fsync          = xfs_file_fsync,
-};
-
-static const struct vm_operations_struct xfs_file_vm_ops = {
-       .fault          = filemap_fault,
-       .page_mkwrite   = xfs_vm_page_mkwrite,
-};
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c

deleted file mode 100644 (file)

index ed88ed1..0000000
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_vnodeops.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_trace.h"
-
-/*
- * note: all filemap functions return negative error codes. These
- * need to be inverted before returning to the xfs core functions.
- */
-void
-xfs_tosspages(
-       xfs_inode_t     *ip,
-       xfs_off_t       first,
-       xfs_off_t       last,
-       int             fiopt)
-{
-       /* can't toss partial tail pages, so mask them out */
-       last &= ~(PAGE_SIZE - 1);
-       truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1);
-}
-
-int
-xfs_flushinval_pages(
-       xfs_inode_t     *ip,
-       xfs_off_t       first,
-       xfs_off_t       last,
-       int             fiopt)
-{
-       struct address_space *mapping = VFS_I(ip)->i_mapping;
-       int             ret = 0;
-
-       trace_xfs_pagecache_inval(ip, first, last);
-
-       xfs_iflags_clear(ip, XFS_ITRUNCATED);
-       ret = filemap_write_and_wait_range(mapping, first,
-                               last == -1 ? LLONG_MAX : last);
-       if (!ret)
-               truncate_inode_pages_range(mapping, first, last);
-       return -ret;
-}
-
-int
-xfs_flush_pages(
-       xfs_inode_t     *ip,
-       xfs_off_t       first,
-       xfs_off_t       last,
-       uint64_t        flags,
-       int             fiopt)
-{
-       struct address_space *mapping = VFS_I(ip)->i_mapping;
-       int             ret = 0;
-       int             ret2;
-
-       xfs_iflags_clear(ip, XFS_ITRUNCATED);
-       ret = -filemap_fdatawrite_range(mapping, first,
-                               last == -1 ? LLONG_MAX : last);
-       if (flags & XBF_ASYNC)
-               return ret;
-       ret2 = xfs_wait_on_pages(ip, first, last);
-       if (!ret)
-               ret = ret2;
-       return ret;
-}
-
-int
-xfs_wait_on_pages(
-       xfs_inode_t     *ip,
-       xfs_off_t       first,
-       xfs_off_t       last)
-{
-       struct address_space *mapping = VFS_I(ip)->i_mapping;
-
-       if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
-               return -filemap_fdatawait_range(mapping, first,
-                                       last == -1 ? ip->i_size - 1 : last);
-       }
-       return 0;
-}
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c

deleted file mode 100644 (file)

index 76e81cf..0000000
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_sysctl.h"
-
-/*
- * Tunable XFS parameters.  xfs_params is required even when CONFIG_SYSCTL=n,
- * other XFS code uses these values.  Times are measured in centisecs (i.e.
- * 100ths of a second).
- */
-xfs_param_t xfs_params = {
-                         /*    MIN             DFLT            MAX     */
-       .sgid_inherit   = {     0,              0,              1       },
-       .symlink_mode   = {     0,              0,              1       },
-       .panic_mask     = {     0,              0,              255     },
-       .error_level    = {     0,              3,              11      },
-       .syncd_timer    = {     1*100,          30*100,         7200*100},
-       .stats_clear    = {     0,              0,              1       },
-       .inherit_sync   = {     0,              1,              1       },
-       .inherit_nodump = {     0,              1,              1       },
-       .inherit_noatim = {     0,              1,              1       },
-       .xfs_buf_timer  = {     100/2,          1*100,          30*100  },
-       .xfs_buf_age    = {     1*100,          15*100,         7200*100},
-       .inherit_nosym  = {     0,              0,              1       },
-       .rotorstep      = {     1,              1,              255     },
-       .inherit_nodfrg = {     0,              1,              1       },
-       .fstrm_timer    = {     1,              30*100,         3600*100},
-};
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c

deleted file mode 100644 (file)

index f7ce7de..0000000
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ /dev/null
@@ -1,1556 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_ioctl.h"
-#include "xfs_rtalloc.h"
-#include "xfs_itable.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_bmap.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_dfrag.h"
-#include "xfs_fsops.h"
-#include "xfs_vnodeops.h"
-#include "xfs_discard.h"
-#include "xfs_quota.h"
-#include "xfs_inode_item.h"
-#include "xfs_export.h"
-#include "xfs_trace.h"
-
-#include <linux/capability.h>
-#include <linux/dcache.h>
-#include <linux/mount.h>
-#include <linux/namei.h>
-#include <linux/pagemap.h>
-#include <linux/slab.h>
-#include <linux/exportfs.h>
-
-/*
- * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
- * a file or fs handle.
- *
- * XFS_IOC_PATH_TO_FSHANDLE
- *    returns fs handle for a mount point or path within that mount point
- * XFS_IOC_FD_TO_HANDLE
- *    returns full handle for a FD opened in user space
- * XFS_IOC_PATH_TO_HANDLE
- *    returns full handle for a path
- */
-int
-xfs_find_handle(
-       unsigned int            cmd,
-       xfs_fsop_handlereq_t    *hreq)
-{
-       int                     hsize;
-       xfs_handle_t            handle;
-       struct inode            *inode;
-       struct file             *file = NULL;
-       struct path             path;
-       int                     error;
-       struct xfs_inode        *ip;
-
-       if (cmd == XFS_IOC_FD_TO_HANDLE) {
-               file = fget(hreq->fd);
-               if (!file)
-                       return -EBADF;
-               inode = file->f_path.dentry->d_inode;
-       } else {
-               error = user_lpath((const char __user *)hreq->path, &path);
-               if (error)
-                       return error;
-               inode = path.dentry->d_inode;
-       }
-       ip = XFS_I(inode);
-
-       /*
-        * We can only generate handles for inodes residing on a XFS filesystem,
-        * and only for regular files, directories or symbolic links.
-        */
-       error = -EINVAL;
-       if (inode->i_sb->s_magic != XFS_SB_MAGIC)
-               goto out_put;
-
-       error = -EBADF;
-       if (!S_ISREG(inode->i_mode) &&
-           !S_ISDIR(inode->i_mode) &&
-           !S_ISLNK(inode->i_mode))
-               goto out_put;
-
-
-       memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t));
-
-       if (cmd == XFS_IOC_PATH_TO_FSHANDLE) {
-               /*
-                * This handle only contains an fsid, zero the rest.
-                */
-               memset(&handle.ha_fid, 0, sizeof(handle.ha_fid));
-               hsize = sizeof(xfs_fsid_t);
-       } else {
-               int             lock_mode;
-
-               lock_mode = xfs_ilock_map_shared(ip);
-               handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
-                                       sizeof(handle.ha_fid.fid_len);
-               handle.ha_fid.fid_pad = 0;
-               handle.ha_fid.fid_gen = ip->i_d.di_gen;
-               handle.ha_fid.fid_ino = ip->i_ino;
-               xfs_iunlock_map_shared(ip, lock_mode);
-
-               hsize = XFS_HSIZE(handle);
-       }
-
-       error = -EFAULT;
-       if (copy_to_user(hreq->ohandle, &handle, hsize) ||
-           copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32)))
-               goto out_put;
-
-       error = 0;
-
- out_put:
-       if (cmd == XFS_IOC_FD_TO_HANDLE)
-               fput(file);
-       else
-               path_put(&path);
-       return error;
-}
-
-/*
- * No need to do permission checks on the various pathname components
- * as the handle operations are privileged.
- */
-STATIC int
-xfs_handle_acceptable(
-       void                    *context,
-       struct dentry           *dentry)
-{
-       return 1;
-}
-
-/*
- * Convert userspace handle data into a dentry.
- */
-struct dentry *
-xfs_handle_to_dentry(
-       struct file             *parfilp,
-       void __user             *uhandle,
-       u32                     hlen)
-{
-       xfs_handle_t            handle;
-       struct xfs_fid64        fid;
-
-       /*
-        * Only allow handle opens under a directory.
-        */
-       if (!S_ISDIR(parfilp->f_path.dentry->d_inode->i_mode))
-               return ERR_PTR(-ENOTDIR);
-
-       if (hlen != sizeof(xfs_handle_t))
-               return ERR_PTR(-EINVAL);
-       if (copy_from_user(&handle, uhandle, hlen))
-               return ERR_PTR(-EFAULT);
-       if (handle.ha_fid.fid_len !=
-           sizeof(handle.ha_fid) - sizeof(handle.ha_fid.fid_len))
-               return ERR_PTR(-EINVAL);
-
-       memset(&fid, 0, sizeof(struct fid));
-       fid.ino = handle.ha_fid.fid_ino;
-       fid.gen = handle.ha_fid.fid_gen;
-
-       return exportfs_decode_fh(parfilp->f_path.mnt, (struct fid *)&fid, 3,
-                       FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG,
-                       xfs_handle_acceptable, NULL);
-}
-
-STATIC struct dentry *
-xfs_handlereq_to_dentry(
-       struct file             *parfilp,
-       xfs_fsop_handlereq_t    *hreq)
-{
-       return xfs_handle_to_dentry(parfilp, hreq->ihandle, hreq->ihandlen);
-}
-
-int
-xfs_open_by_handle(
-       struct file             *parfilp,
-       xfs_fsop_handlereq_t    *hreq)
-{
-       const struct cred       *cred = current_cred();
-       int                     error;
-       int                     fd;
-       int                     permflag;
-       struct file             *filp;
-       struct inode            *inode;
-       struct dentry           *dentry;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-
-       dentry = xfs_handlereq_to_dentry(parfilp, hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-       inode = dentry->d_inode;
-
-       /* Restrict xfs_open_by_handle to directories & regular files. */
-       if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
-               error = -XFS_ERROR(EPERM);
-               goto out_dput;
-       }
-
-#if BITS_PER_LONG != 32
-       hreq->oflags |= O_LARGEFILE;
-#endif
-
-       /* Put open permission in namei format. */
-       permflag = hreq->oflags;
-       if ((permflag+1) & O_ACCMODE)
-               permflag++;
-       if (permflag & O_TRUNC)
-               permflag |= 2;
-
-       if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) &&
-           (permflag & FMODE_WRITE) && IS_APPEND(inode)) {
-               error = -XFS_ERROR(EPERM);
-               goto out_dput;
-       }
-
-       if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
-               error = -XFS_ERROR(EACCES);
-               goto out_dput;
-       }
-
-       /* Can't write directories. */
-       if (S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) {
-               error = -XFS_ERROR(EISDIR);
-               goto out_dput;
-       }
-
-       fd = get_unused_fd();
-       if (fd < 0) {
-               error = fd;
-               goto out_dput;
-       }
-
-       filp = dentry_open(dentry, mntget(parfilp->f_path.mnt),
-                          hreq->oflags, cred);
-       if (IS_ERR(filp)) {
-               put_unused_fd(fd);
-               return PTR_ERR(filp);
-       }
-
-       if (S_ISREG(inode->i_mode)) {
-               filp->f_flags |= O_NOATIME;
-               filp->f_mode |= FMODE_NOCMTIME;
-       }
-
-       fd_install(fd, filp);
-       return fd;
-
- out_dput:
-       dput(dentry);
-       return error;
-}
-
-/*
- * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's
- * unused first argument.
- */
-STATIC int
-do_readlink(
-       char __user             *buffer,
-       int                     buflen,
-       const char              *link)
-{
-        int len;
-
-       len = PTR_ERR(link);
-       if (IS_ERR(link))
-               goto out;
-
-       len = strlen(link);
-       if (len > (unsigned) buflen)
-               len = buflen;
-       if (copy_to_user(buffer, link, len))
-               len = -EFAULT;
- out:
-       return len;
-}
-
-
-int
-xfs_readlink_by_handle(
-       struct file             *parfilp,
-       xfs_fsop_handlereq_t    *hreq)
-{
-       struct dentry           *dentry;
-       __u32                   olen;
-       void                    *link;
-       int                     error;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-
-       dentry = xfs_handlereq_to_dentry(parfilp, hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-
-       /* Restrict this handle operation to symlinks only. */
-       if (!S_ISLNK(dentry->d_inode->i_mode)) {
-               error = -XFS_ERROR(EINVAL);
-               goto out_dput;
-       }
-
-       if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) {
-               error = -XFS_ERROR(EFAULT);
-               goto out_dput;
-       }
-
-       link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
-       if (!link) {
-               error = -XFS_ERROR(ENOMEM);
-               goto out_dput;
-       }
-
-       error = -xfs_readlink(XFS_I(dentry->d_inode), link);
-       if (error)
-               goto out_kfree;
-       error = do_readlink(hreq->ohandle, olen, link);
-       if (error)
-               goto out_kfree;
-
- out_kfree:
-       kfree(link);
- out_dput:
-       dput(dentry);
-       return error;
-}
-
-STATIC int
-xfs_fssetdm_by_handle(
-       struct file             *parfilp,
-       void                    __user *arg)
-{
-       int                     error;
-       struct fsdmidata        fsd;
-       xfs_fsop_setdm_handlereq_t dmhreq;
-       struct dentry           *dentry;
-
-       if (!capable(CAP_MKNOD))
-               return -XFS_ERROR(EPERM);
-       if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
-               return -XFS_ERROR(EFAULT);
-
-       dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-
-       if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
-               error = -XFS_ERROR(EPERM);
-               goto out;
-       }
-
-       if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) {
-               error = -XFS_ERROR(EFAULT);
-               goto out;
-       }
-
-       error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
-                                fsd.fsd_dmstate);
-
- out:
-       dput(dentry);
-       return error;
-}
-
-STATIC int
-xfs_attrlist_by_handle(
-       struct file             *parfilp,
-       void                    __user *arg)
-{
-       int                     error = -ENOMEM;
-       attrlist_cursor_kern_t  *cursor;
-       xfs_fsop_attrlist_handlereq_t al_hreq;
-       struct dentry           *dentry;
-       char                    *kbuf;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-       if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
-               return -XFS_ERROR(EFAULT);
-       if (al_hreq.buflen > XATTR_LIST_MAX)
-               return -XFS_ERROR(EINVAL);
-
-       /*
-        * Reject flags, only allow namespaces.
-        */
-       if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
-               return -XFS_ERROR(EINVAL);
-
-       dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-
-       kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL);
-       if (!kbuf)
-               goto out_dput;
-
-       cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
-       error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
-                                       al_hreq.flags, cursor);
-       if (error)
-               goto out_kfree;
-
-       if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen))
-               error = -EFAULT;
-
- out_kfree:
-       kfree(kbuf);
- out_dput:
-       dput(dentry);
-       return error;
-}
-
-int
-xfs_attrmulti_attr_get(
-       struct inode            *inode,
-       unsigned char           *name,
-       unsigned char           __user *ubuf,
-       __uint32_t              *len,
-       __uint32_t              flags)
-{
-       unsigned char           *kbuf;
-       int                     error = EFAULT;
-
-       if (*len > XATTR_SIZE_MAX)
-               return EINVAL;
-       kbuf = kmalloc(*len, GFP_KERNEL);
-       if (!kbuf)
-               return ENOMEM;
-
-       error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
-       if (error)
-               goto out_kfree;
-
-       if (copy_to_user(ubuf, kbuf, *len))
-               error = EFAULT;
-
- out_kfree:
-       kfree(kbuf);
-       return error;
-}
-
-int
-xfs_attrmulti_attr_set(
-       struct inode            *inode,
-       unsigned char           *name,
-       const unsigned char     __user *ubuf,
-       __uint32_t              len,
-       __uint32_t              flags)
-{
-       unsigned char           *kbuf;
-       int                     error = EFAULT;
-
-       if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-               return EPERM;
-       if (len > XATTR_SIZE_MAX)
-               return EINVAL;
-
-       kbuf = memdup_user(ubuf, len);
-       if (IS_ERR(kbuf))
-               return PTR_ERR(kbuf);
-
-       error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
-
-       return error;
-}
-
-int
-xfs_attrmulti_attr_remove(
-       struct inode            *inode,
-       unsigned char           *name,
-       __uint32_t              flags)
-{
-       if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-               return EPERM;
-       return xfs_attr_remove(XFS_I(inode), name, flags);
-}
-
-STATIC int
-xfs_attrmulti_by_handle(
-       struct file             *parfilp,
-       void                    __user *arg)
-{
-       int                     error;
-       xfs_attr_multiop_t      *ops;
-       xfs_fsop_attrmulti_handlereq_t am_hreq;
-       struct dentry           *dentry;
-       unsigned int            i, size;
-       unsigned char           *attr_name;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-       if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
-               return -XFS_ERROR(EFAULT);
-
-       /* overflow check */
-       if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t))
-               return -E2BIG;
-
-       dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-
-       error = E2BIG;
-       size = am_hreq.opcount * sizeof(xfs_attr_multiop_t);
-       if (!size || size > 16 * PAGE_SIZE)
-               goto out_dput;
-
-       ops = memdup_user(am_hreq.ops, size);
-       if (IS_ERR(ops)) {
-               error = PTR_ERR(ops);
-               goto out_dput;
-       }
-
-       attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
-       if (!attr_name)
-               goto out_kfree_ops;
-
-       error = 0;
-       for (i = 0; i < am_hreq.opcount; i++) {
-               ops[i].am_error = strncpy_from_user((char *)attr_name,
-                               ops[i].am_attrname, MAXNAMELEN);
-               if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
-                       error = -ERANGE;
-               if (ops[i].am_error < 0)
-                       break;
-
-               switch (ops[i].am_opcode) {
-               case ATTR_OP_GET:
-                       ops[i].am_error = xfs_attrmulti_attr_get(
-                                       dentry->d_inode, attr_name,
-                                       ops[i].am_attrvalue, &ops[i].am_length,
-                                       ops[i].am_flags);
-                       break;
-               case ATTR_OP_SET:
-                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
-                       if (ops[i].am_error)
-                               break;
-                       ops[i].am_error = xfs_attrmulti_attr_set(
-                                       dentry->d_inode, attr_name,
-                                       ops[i].am_attrvalue, ops[i].am_length,
-                                       ops[i].am_flags);
-                       mnt_drop_write(parfilp->f_path.mnt);
-                       break;
-               case ATTR_OP_REMOVE:
-                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
-                       if (ops[i].am_error)
-                               break;
-                       ops[i].am_error = xfs_attrmulti_attr_remove(
-                                       dentry->d_inode, attr_name,
-                                       ops[i].am_flags);
-                       mnt_drop_write(parfilp->f_path.mnt);
-                       break;
-               default:
-                       ops[i].am_error = EINVAL;
-               }
-       }
-
-       if (copy_to_user(am_hreq.ops, ops, size))
-               error = XFS_ERROR(EFAULT);
-
-       kfree(attr_name);
- out_kfree_ops:
-       kfree(ops);
- out_dput:
-       dput(dentry);
-       return -error;
-}
-
-int
-xfs_ioc_space(
-       struct xfs_inode        *ip,
-       struct inode            *inode,
-       struct file             *filp,
-       int                     ioflags,
-       unsigned int            cmd,
-       xfs_flock64_t           *bf)
-{
-       int                     attr_flags = 0;
-       int                     error;
-
-       /*
-        * Only allow the sys admin to reserve space unless
-        * unwritten extents are enabled.
-        */
-       if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) &&
-           !capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-
-       if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
-               return -XFS_ERROR(EPERM);
-
-       if (!(filp->f_mode & FMODE_WRITE))
-               return -XFS_ERROR(EBADF);
-
-       if (!S_ISREG(inode->i_mode))
-               return -XFS_ERROR(EINVAL);
-
-       if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
-               attr_flags |= XFS_ATTR_NONBLOCK;
-
-       if (filp->f_flags & O_DSYNC)
-               attr_flags |= XFS_ATTR_SYNC;
-
-       if (ioflags & IO_INVIS)
-               attr_flags |= XFS_ATTR_DMI;
-
-       error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags);
-       return -error;
-}
-
-STATIC int
-xfs_ioc_bulkstat(
-       xfs_mount_t             *mp,
-       unsigned int            cmd,
-       void                    __user *arg)
-{
-       xfs_fsop_bulkreq_t      bulkreq;
-       int                     count;  /* # of records returned */
-       xfs_ino_t               inlast; /* last inode number */
-       int                     done;
-       int                     error;
-
-       /* done = 1 if there are more stats to get and if bulkstat */
-       /* should be called again (unused here, but used in dmapi) */
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -EPERM;
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -XFS_ERROR(EIO);
-
-       if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t)))
-               return -XFS_ERROR(EFAULT);
-
-       if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
-               return -XFS_ERROR(EFAULT);
-
-       if ((count = bulkreq.icount) <= 0)
-               return -XFS_ERROR(EINVAL);
-
-       if (bulkreq.ubuffer == NULL)
-               return -XFS_ERROR(EINVAL);
-
-       if (cmd == XFS_IOC_FSINUMBERS)
-               error = xfs_inumbers(mp, &inlast, &count,
-                                       bulkreq.ubuffer, xfs_inumbers_fmt);
-       else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
-               error = xfs_bulkstat_single(mp, &inlast,
-                                               bulkreq.ubuffer, &done);
-       else    /* XFS_IOC_FSBULKSTAT */
-               error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
-                                    sizeof(xfs_bstat_t), bulkreq.ubuffer,
-                                    &done);
-
-       if (error)
-               return -error;
-
-       if (bulkreq.ocount != NULL) {
-               if (copy_to_user(bulkreq.lastip, &inlast,
-                                               sizeof(xfs_ino_t)))
-                       return -XFS_ERROR(EFAULT);
-
-               if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
-                       return -XFS_ERROR(EFAULT);
-       }
-
-       return 0;
-}
-
-STATIC int
-xfs_ioc_fsgeometry_v1(
-       xfs_mount_t             *mp,
-       void                    __user *arg)
-{
-       xfs_fsop_geom_t         fsgeo;
-       int                     error;
-
-       error = xfs_fs_geometry(mp, &fsgeo, 3);
-       if (error)
-               return -error;
-
-       /*
-        * Caller should have passed an argument of type
-        * xfs_fsop_geom_v1_t.  This is a proper subset of the
-        * xfs_fsop_geom_t that xfs_fs_geometry() fills in.
-        */
-       if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-STATIC int
-xfs_ioc_fsgeometry(
-       xfs_mount_t             *mp,
-       void                    __user *arg)
-{
-       xfs_fsop_geom_t         fsgeo;
-       int                     error;
-
-       error = xfs_fs_geometry(mp, &fsgeo, 4);
-       if (error)
-               return -error;
-
-       if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-/*
- * Linux extended inode flags interface.
- */
-
-STATIC unsigned int
-xfs_merge_ioc_xflags(
-       unsigned int    flags,
-       unsigned int    start)
-{
-       unsigned int    xflags = start;
-
-       if (flags & FS_IMMUTABLE_FL)
-               xflags |= XFS_XFLAG_IMMUTABLE;
-       else
-               xflags &= ~XFS_XFLAG_IMMUTABLE;
-       if (flags & FS_APPEND_FL)
-               xflags |= XFS_XFLAG_APPEND;
-       else
-               xflags &= ~XFS_XFLAG_APPEND;
-       if (flags & FS_SYNC_FL)
-               xflags |= XFS_XFLAG_SYNC;
-       else
-               xflags &= ~XFS_XFLAG_SYNC;
-       if (flags & FS_NOATIME_FL)
-               xflags |= XFS_XFLAG_NOATIME;
-       else
-               xflags &= ~XFS_XFLAG_NOATIME;
-       if (flags & FS_NODUMP_FL)
-               xflags |= XFS_XFLAG_NODUMP;
-       else
-               xflags &= ~XFS_XFLAG_NODUMP;
-
-       return xflags;
-}
-
-STATIC unsigned int
-xfs_di2lxflags(
-       __uint16_t      di_flags)
-{
-       unsigned int    flags = 0;
-
-       if (di_flags & XFS_DIFLAG_IMMUTABLE)
-               flags |= FS_IMMUTABLE_FL;
-       if (di_flags & XFS_DIFLAG_APPEND)
-               flags |= FS_APPEND_FL;
-       if (di_flags & XFS_DIFLAG_SYNC)
-               flags |= FS_SYNC_FL;
-       if (di_flags & XFS_DIFLAG_NOATIME)
-               flags |= FS_NOATIME_FL;
-       if (di_flags & XFS_DIFLAG_NODUMP)
-               flags |= FS_NODUMP_FL;
-       return flags;
-}
-
-STATIC int
-xfs_ioc_fsgetxattr(
-       xfs_inode_t             *ip,
-       int                     attr,
-       void                    __user *arg)
-{
-       struct fsxattr          fa;
-
-       memset(&fa, 0, sizeof(struct fsxattr));
-
-       xfs_ilock(ip, XFS_ILOCK_SHARED);
-       fa.fsx_xflags = xfs_ip2xflags(ip);
-       fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
-       fa.fsx_projid = xfs_get_projid(ip);
-
-       if (attr) {
-               if (ip->i_afp) {
-                       if (ip->i_afp->if_flags & XFS_IFEXTENTS)
-                               fa.fsx_nextents = ip->i_afp->if_bytes /
-                                                       sizeof(xfs_bmbt_rec_t);
-                       else
-                               fa.fsx_nextents = ip->i_d.di_anextents;
-               } else
-                       fa.fsx_nextents = 0;
-       } else {
-               if (ip->i_df.if_flags & XFS_IFEXTENTS)
-                       fa.fsx_nextents = ip->i_df.if_bytes /
-                                               sizeof(xfs_bmbt_rec_t);
-               else
-                       fa.fsx_nextents = ip->i_d.di_nextents;
-       }
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
-       if (copy_to_user(arg, &fa, sizeof(fa)))
-               return -EFAULT;
-       return 0;
-}
-
-STATIC void
-xfs_set_diflags(
-       struct xfs_inode        *ip,
-       unsigned int            xflags)
-{
-       unsigned int            di_flags;
-
-       /* can't set PREALLOC this way, just preserve it */
-       di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
-       if (xflags & XFS_XFLAG_IMMUTABLE)
-               di_flags |= XFS_DIFLAG_IMMUTABLE;
-       if (xflags & XFS_XFLAG_APPEND)
-               di_flags |= XFS_DIFLAG_APPEND;
-       if (xflags & XFS_XFLAG_SYNC)
-               di_flags |= XFS_DIFLAG_SYNC;
-       if (xflags & XFS_XFLAG_NOATIME)
-               di_flags |= XFS_DIFLAG_NOATIME;
-       if (xflags & XFS_XFLAG_NODUMP)
-               di_flags |= XFS_DIFLAG_NODUMP;
-       if (xflags & XFS_XFLAG_PROJINHERIT)
-               di_flags |= XFS_DIFLAG_PROJINHERIT;
-       if (xflags & XFS_XFLAG_NODEFRAG)
-               di_flags |= XFS_DIFLAG_NODEFRAG;
-       if (xflags & XFS_XFLAG_FILESTREAM)
-               di_flags |= XFS_DIFLAG_FILESTREAM;
-       if (S_ISDIR(ip->i_d.di_mode)) {
-               if (xflags & XFS_XFLAG_RTINHERIT)
-                       di_flags |= XFS_DIFLAG_RTINHERIT;
-               if (xflags & XFS_XFLAG_NOSYMLINKS)
-                       di_flags |= XFS_DIFLAG_NOSYMLINKS;
-               if (xflags & XFS_XFLAG_EXTSZINHERIT)
-                       di_flags |= XFS_DIFLAG_EXTSZINHERIT;
-       } else if (S_ISREG(ip->i_d.di_mode)) {
-               if (xflags & XFS_XFLAG_REALTIME)
-                       di_flags |= XFS_DIFLAG_REALTIME;
-               if (xflags & XFS_XFLAG_EXTSIZE)
-                       di_flags |= XFS_DIFLAG_EXTSIZE;
-       }
-
-       ip->i_d.di_flags = di_flags;
-}
-
-STATIC void
-xfs_diflags_to_linux(
-       struct xfs_inode        *ip)
-{
-       struct inode            *inode = VFS_I(ip);
-       unsigned int            xflags = xfs_ip2xflags(ip);
-
-       if (xflags & XFS_XFLAG_IMMUTABLE)
-               inode->i_flags |= S_IMMUTABLE;
-       else
-               inode->i_flags &= ~S_IMMUTABLE;
-       if (xflags & XFS_XFLAG_APPEND)
-               inode->i_flags |= S_APPEND;
-       else
-               inode->i_flags &= ~S_APPEND;
-       if (xflags & XFS_XFLAG_SYNC)
-               inode->i_flags |= S_SYNC;
-       else
-               inode->i_flags &= ~S_SYNC;
-       if (xflags & XFS_XFLAG_NOATIME)
-               inode->i_flags |= S_NOATIME;
-       else
-               inode->i_flags &= ~S_NOATIME;
-}
-
-#define FSX_PROJID     1
-#define FSX_EXTSIZE    2
-#define FSX_XFLAGS     4
-#define FSX_NONBLOCK   8
-
-STATIC int
-xfs_ioctl_setattr(
-       xfs_inode_t             *ip,
-       struct fsxattr          *fa,
-       int                     mask)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_trans        *tp;
-       unsigned int            lock_flags = 0;
-       struct xfs_dquot        *udqp = NULL;
-       struct xfs_dquot        *gdqp = NULL;
-       struct xfs_dquot        *olddquot = NULL;
-       int                     code;
-
-       trace_xfs_ioctl_setattr(ip);
-
-       if (mp->m_flags & XFS_MOUNT_RDONLY)
-               return XFS_ERROR(EROFS);
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return XFS_ERROR(EIO);
-
-       /*
-        * Disallow 32bit project ids when projid32bit feature is not enabled.
-        */
-       if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) &&
-                       !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
-               return XFS_ERROR(EINVAL);
-
-       /*
-        * If disk quotas is on, we make sure that the dquots do exist on disk,
-        * before we start any other transactions. Trying to do this later
-        * is messy. We don't care to take a readlock to look at the ids
-        * in inode here, because we can't hold it across the trans_reserve.
-        * If the IDs do change before we take the ilock, we're covered
-        * because the i_*dquot fields will get updated anyway.
-        */
-       if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {
-               code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid,
-                                        ip->i_d.di_gid, fa->fsx_projid,
-                                        XFS_QMOPT_PQUOTA, &udqp, &gdqp);
-               if (code)
-                       return code;
-       }
-
-       /*
-        * For the other attributes, we acquire the inode lock and
-        * first do an error checking pass.
-        */
-       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
-       code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
-       if (code)
-               goto error_return;
-
-       lock_flags = XFS_ILOCK_EXCL;
-       xfs_ilock(ip, lock_flags);
-
-       /*
-        * CAP_FOWNER overrides the following restrictions:
-        *
-        * The user ID of the calling process must be equal
-        * to the file owner ID, except in cases where the
-        * CAP_FSETID capability is applicable.
-        */
-       if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) {
-               code = XFS_ERROR(EPERM);
-               goto error_return;
-       }
-
-       /*
-        * Do a quota reservation only if projid is actually going to change.
-        */
-       if (mask & FSX_PROJID) {
-               if (XFS_IS_QUOTA_RUNNING(mp) &&
-                   XFS_IS_PQUOTA_ON(mp) &&
-                   xfs_get_projid(ip) != fa->fsx_projid) {
-                       ASSERT(tp);
-                       code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
-                                               capable(CAP_FOWNER) ?
-                                               XFS_QMOPT_FORCE_RES : 0);
-                       if (code)       /* out of quota */
-                               goto error_return;
-               }
-       }
-
-       if (mask & FSX_EXTSIZE) {
-               /*
-                * Can't change extent size if any extents are allocated.
-                */
-               if (ip->i_d.di_nextents &&
-                   ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
-                    fa->fsx_extsize)) {
-                       code = XFS_ERROR(EINVAL);       /* EFBIG? */
-                       goto error_return;
-               }
-
-               /*
-                * Extent size must be a multiple of the appropriate block
-                * size, if set at all. It must also be smaller than the
-                * maximum extent size supported by the filesystem.
-                *
-                * Also, for non-realtime files, limit the extent size hint to
-                * half the size of the AGs in the filesystem so alignment
-                * doesn't result in extents larger than an AG.
-                */
-               if (fa->fsx_extsize != 0) {
-                       xfs_extlen_t    size;
-                       xfs_fsblock_t   extsize_fsb;
-
-                       extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
-                       if (extsize_fsb > MAXEXTLEN) {
-                               code = XFS_ERROR(EINVAL);
-                               goto error_return;
-                       }
-
-                       if (XFS_IS_REALTIME_INODE(ip) ||
-                           ((mask & FSX_XFLAGS) &&
-                           (fa->fsx_xflags & XFS_XFLAG_REALTIME))) {
-                               size = mp->m_sb.sb_rextsize <<
-                                      mp->m_sb.sb_blocklog;
-                       } else {
-                               size = mp->m_sb.sb_blocksize;
-                               if (extsize_fsb > mp->m_sb.sb_agblocks / 2) {
-                                       code = XFS_ERROR(EINVAL);
-                                       goto error_return;
-                               }
-                       }
-
-                       if (fa->fsx_extsize % size) {
-                               code = XFS_ERROR(EINVAL);
-                               goto error_return;
-                       }
-               }
-       }
-
-
-       if (mask & FSX_XFLAGS) {
-               /*
-                * Can't change realtime flag if any extents are allocated.
-                */
-               if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
-                   (XFS_IS_REALTIME_INODE(ip)) !=
-                   (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
-                       code = XFS_ERROR(EINVAL);       /* EFBIG? */
-                       goto error_return;
-               }
-
-               /*
-                * If realtime flag is set then must have realtime data.
-                */
-               if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
-                       if ((mp->m_sb.sb_rblocks == 0) ||
-                           (mp->m_sb.sb_rextsize == 0) ||
-                           (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
-                               code = XFS_ERROR(EINVAL);
-                               goto error_return;
-                       }
-               }
-
-               /*
-                * Can't modify an immutable/append-only file unless
-                * we have appropriate permission.
-                */
-               if ((ip->i_d.di_flags &
-                               (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
-                    (fa->fsx_xflags &
-                               (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
-                   !capable(CAP_LINUX_IMMUTABLE)) {
-                       code = XFS_ERROR(EPERM);
-                       goto error_return;
-               }
-       }
-
-       xfs_trans_ijoin(tp, ip);
-
-       /*
-        * Change file ownership.  Must be the owner or privileged.
-        */
-       if (mask & FSX_PROJID) {
-               /*
-                * CAP_FSETID overrides the following restrictions:
-                *
-                * The set-user-ID and set-group-ID bits of a file will be
-                * cleared upon successful return from chown()
-                */
-               if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
-                   !capable(CAP_FSETID))
-                       ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
-
-               /*
-                * Change the ownerships and register quota modifications
-                * in the transaction.
-                */
-               if (xfs_get_projid(ip) != fa->fsx_projid) {
-                       if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
-                               olddquot = xfs_qm_vop_chown(tp, ip,
-                                                       &ip->i_gdquot, gdqp);
-                       }
-                       xfs_set_projid(ip, fa->fsx_projid);
-
-                       /*
-                        * We may have to rev the inode as well as
-                        * the superblock version number since projids didn't
-                        * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
-                        */
-                       if (ip->i_d.di_version == 1)
-                               xfs_bump_ino_vers2(tp, ip);
-               }
-
-       }
-
-       if (mask & FSX_EXTSIZE)
-               ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
-       if (mask & FSX_XFLAGS) {
-               xfs_set_diflags(ip, fa->fsx_xflags);
-               xfs_diflags_to_linux(ip);
-       }
-
-       xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
-       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
-       XFS_STATS_INC(xs_ig_attrchg);
-
-       /*
-        * If this is a synchronous mount, make sure that the
-        * transaction goes to disk before returning to the user.
-        * This is slightly sub-optimal in that truncates require
-        * two sync transactions instead of one for wsync filesystems.
-        * One for the truncate and one for the timestamps since we
-        * don't want to change the timestamps unless we're sure the
-        * truncate worked.  Truncates are less than 1% of the laddis
-        * mix so this probably isn't worth the trouble to optimize.
-        */
-       if (mp->m_flags & XFS_MOUNT_WSYNC)
-               xfs_trans_set_sync(tp);
-       code = xfs_trans_commit(tp, 0);
-       xfs_iunlock(ip, lock_flags);
-
-       /*
-        * Release any dquot(s) the inode had kept before chown.
-        */
-       xfs_qm_dqrele(olddquot);
-       xfs_qm_dqrele(udqp);
-       xfs_qm_dqrele(gdqp);
-
-       return code;
-
- error_return:
-       xfs_qm_dqrele(udqp);
-       xfs_qm_dqrele(gdqp);
-       xfs_trans_cancel(tp, 0);
-       if (lock_flags)
-               xfs_iunlock(ip, lock_flags);
-       return code;
-}
-
-STATIC int
-xfs_ioc_fssetxattr(
-       xfs_inode_t             *ip,
-       struct file             *filp,
-       void                    __user *arg)
-{
-       struct fsxattr          fa;
-       unsigned int            mask;
-
-       if (copy_from_user(&fa, arg, sizeof(fa)))
-               return -EFAULT;
-
-       mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID;
-       if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
-               mask |= FSX_NONBLOCK;
-
-       return -xfs_ioctl_setattr(ip, &fa, mask);
-}
-
-STATIC int
-xfs_ioc_getxflags(
-       xfs_inode_t             *ip,
-       void                    __user *arg)
-{
-       unsigned int            flags;
-
-       flags = xfs_di2lxflags(ip->i_d.di_flags);
-       if (copy_to_user(arg, &flags, sizeof(flags)))
-               return -EFAULT;
-       return 0;
-}
-
-STATIC int
-xfs_ioc_setxflags(
-       xfs_inode_t             *ip,
-       struct file             *filp,
-       void                    __user *arg)
-{
-       struct fsxattr          fa;
-       unsigned int            flags;
-       unsigned int            mask;
-
-       if (copy_from_user(&flags, arg, sizeof(flags)))
-               return -EFAULT;
-
-       if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
-                     FS_NOATIME_FL | FS_NODUMP_FL | \
-                     FS_SYNC_FL))
-               return -EOPNOTSUPP;
-
-       mask = FSX_XFLAGS;
-       if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
-               mask |= FSX_NONBLOCK;
-       fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
-
-       return -xfs_ioctl_setattr(ip, &fa, mask);
-}
-
-STATIC int
-xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
-{
-       struct getbmap __user   *base = *ap;
-
-       /* copy only getbmap portion (not getbmapx) */
-       if (copy_to_user(base, bmv, sizeof(struct getbmap)))
-               return XFS_ERROR(EFAULT);
-
-       *ap += sizeof(struct getbmap);
-       return 0;
-}
-
-STATIC int
-xfs_ioc_getbmap(
-       struct xfs_inode        *ip,
-       int                     ioflags,
-       unsigned int            cmd,
-       void                    __user *arg)
-{
-       struct getbmapx         bmx;
-       int                     error;
-
-       if (copy_from_user(&bmx, arg, sizeof(struct getbmapx)))
-               return -XFS_ERROR(EFAULT);
-
-       if (bmx.bmv_count < 2)
-               return -XFS_ERROR(EINVAL);
-
-       bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
-       if (ioflags & IO_INVIS)
-               bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
-
-       error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
-                           (struct getbmap *)arg+1);
-       if (error)
-               return -error;
-
-       /* copy back header - only size of getbmap */
-       if (copy_to_user(arg, &bmx, sizeof(struct getbmap)))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-STATIC int
-xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full)
-{
-       struct getbmapx __user  *base = *ap;
-
-       if (copy_to_user(base, bmv, sizeof(struct getbmapx)))
-               return XFS_ERROR(EFAULT);
-
-       *ap += sizeof(struct getbmapx);
-       return 0;
-}
-
-STATIC int
-xfs_ioc_getbmapx(
-       struct xfs_inode        *ip,
-       void                    __user *arg)
-{
-       struct getbmapx         bmx;
-       int                     error;
-
-       if (copy_from_user(&bmx, arg, sizeof(bmx)))
-               return -XFS_ERROR(EFAULT);
-
-       if (bmx.bmv_count < 2)
-               return -XFS_ERROR(EINVAL);
-
-       if (bmx.bmv_iflags & (~BMV_IF_VALID))
-               return -XFS_ERROR(EINVAL);
-
-       error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format,
-                           (struct getbmapx *)arg+1);
-       if (error)
-               return -error;
-
-       /* copy back header */
-       if (copy_to_user(arg, &bmx, sizeof(struct getbmapx)))
-               return -XFS_ERROR(EFAULT);
-
-       return 0;
-}
-
-/*
- * Note: some of the ioctl's return positive numbers as a
- * byte count indicating success, such as readlink_by_handle.
- * So we don't "sign flip" like most other routines.  This means
- * true errors need to be returned as a negative value.
- */
-long
-xfs_file_ioctl(
-       struct file             *filp,
-       unsigned int            cmd,
-       unsigned long           p)
-{
-       struct inode            *inode = filp->f_path.dentry->d_inode;
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       void                    __user *arg = (void __user *)p;
-       int                     ioflags = 0;
-       int                     error;
-
-       if (filp->f_mode & FMODE_NOCMTIME)
-               ioflags |= IO_INVIS;
-
-       trace_xfs_file_ioctl(ip);
-
-       switch (cmd) {
-       case FITRIM:
-               return xfs_ioc_trim(mp, arg);
-       case XFS_IOC_ALLOCSP:
-       case XFS_IOC_FREESP:
-       case XFS_IOC_RESVSP:
-       case XFS_IOC_UNRESVSP:
-       case XFS_IOC_ALLOCSP64:
-       case XFS_IOC_FREESP64:
-       case XFS_IOC_RESVSP64:
-       case XFS_IOC_UNRESVSP64:
-       case XFS_IOC_ZERO_RANGE: {
-               xfs_flock64_t           bf;
-
-               if (copy_from_user(&bf, arg, sizeof(bf)))
-                       return -XFS_ERROR(EFAULT);
-               return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
-       }
-       case XFS_IOC_DIOINFO: {
-               struct dioattr  da;
-               xfs_buftarg_t   *target =
-                       XFS_IS_REALTIME_INODE(ip) ?
-                       mp->m_rtdev_targp : mp->m_ddev_targp;
-
-               da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
-               da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
-
-               if (copy_to_user(arg, &da, sizeof(da)))
-                       return -XFS_ERROR(EFAULT);
-               return 0;
-       }
-
-       case XFS_IOC_FSBULKSTAT_SINGLE:
-       case XFS_IOC_FSBULKSTAT:
-       case XFS_IOC_FSINUMBERS:
-               return xfs_ioc_bulkstat(mp, cmd, arg);
-
-       case XFS_IOC_FSGEOMETRY_V1:
-               return xfs_ioc_fsgeometry_v1(mp, arg);
-
-       case XFS_IOC_FSGEOMETRY:
-               return xfs_ioc_fsgeometry(mp, arg);
-
-       case XFS_IOC_GETVERSION:
-               return put_user(inode->i_generation, (int __user *)arg);
-
-       case XFS_IOC_FSGETXATTR:
-               return xfs_ioc_fsgetxattr(ip, 0, arg);
-       case XFS_IOC_FSGETXATTRA:
-               return xfs_ioc_fsgetxattr(ip, 1, arg);
-       case XFS_IOC_FSSETXATTR:
-               return xfs_ioc_fssetxattr(ip, filp, arg);
-       case XFS_IOC_GETXFLAGS:
-               return xfs_ioc_getxflags(ip, arg);
-       case XFS_IOC_SETXFLAGS:
-               return xfs_ioc_setxflags(ip, filp, arg);
-
-       case XFS_IOC_FSSETDM: {
-               struct fsdmidata        dmi;
-
-               if (copy_from_user(&dmi, arg, sizeof(dmi)))
-                       return -XFS_ERROR(EFAULT);
-
-               error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
-                               dmi.fsd_dmstate);
-               return -error;
-       }
-
-       case XFS_IOC_GETBMAP:
-       case XFS_IOC_GETBMAPA:
-               return xfs_ioc_getbmap(ip, ioflags, cmd, arg);
-
-       case XFS_IOC_GETBMAPX:
-               return xfs_ioc_getbmapx(ip, arg);
-
-       case XFS_IOC_FD_TO_HANDLE:
-       case XFS_IOC_PATH_TO_HANDLE:
-       case XFS_IOC_PATH_TO_FSHANDLE: {
-               xfs_fsop_handlereq_t    hreq;
-
-               if (copy_from_user(&hreq, arg, sizeof(hreq)))
-                       return -XFS_ERROR(EFAULT);
-               return xfs_find_handle(cmd, &hreq);
-       }
-       case XFS_IOC_OPEN_BY_HANDLE: {
-               xfs_fsop_handlereq_t    hreq;
-
-               if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
-                       return -XFS_ERROR(EFAULT);
-               return xfs_open_by_handle(filp, &hreq);
-       }
-       case XFS_IOC_FSSETDM_BY_HANDLE:
-               return xfs_fssetdm_by_handle(filp, arg);
-
-       case XFS_IOC_READLINK_BY_HANDLE: {
-               xfs_fsop_handlereq_t    hreq;
-
-               if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
-                       return -XFS_ERROR(EFAULT);
-               return xfs_readlink_by_handle(filp, &hreq);
-       }
-       case XFS_IOC_ATTRLIST_BY_HANDLE:
-               return xfs_attrlist_by_handle(filp, arg);
-
-       case XFS_IOC_ATTRMULTI_BY_HANDLE:
-               return xfs_attrmulti_by_handle(filp, arg);
-
-       case XFS_IOC_SWAPEXT: {
-               struct xfs_swapext      sxp;
-
-               if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t)))
-                       return -XFS_ERROR(EFAULT);
-               error = xfs_swapext(&sxp);
-               return -error;
-       }
-
-       case XFS_IOC_FSCOUNTS: {
-               xfs_fsop_counts_t out;
-
-               error = xfs_fs_counts(mp, &out);
-               if (error)
-                       return -error;
-
-               if (copy_to_user(arg, &out, sizeof(out)))
-                       return -XFS_ERROR(EFAULT);
-               return 0;
-       }
-
-       case XFS_IOC_SET_RESBLKS: {
-               xfs_fsop_resblks_t inout;
-               __uint64_t         in;
-
-               if (!capable(CAP_SYS_ADMIN))
-                       return -EPERM;
-
-               if (mp->m_flags & XFS_MOUNT_RDONLY)
-                       return -XFS_ERROR(EROFS);
-
-               if (copy_from_user(&inout, arg, sizeof(inout)))
-                       return -XFS_ERROR(EFAULT);
-
-               /* input parameter is passed in resblks field of structure */
-               in = inout.resblks;
-               error = xfs_reserve_blocks(mp, &in, &inout);
-               if (error)
-                       return -error;
-
-               if (copy_to_user(arg, &inout, sizeof(inout)))
-                       return -XFS_ERROR(EFAULT);
-               return 0;
-       }
-
-       case XFS_IOC_GET_RESBLKS: {
-               xfs_fsop_resblks_t out;
-
-               if (!capable(CAP_SYS_ADMIN))
-                       return -EPERM;
-
-               error = xfs_reserve_blocks(mp, NULL, &out);
-               if (error)
-                       return -error;
-
-               if (copy_to_user(arg, &out, sizeof(out)))
-                       return -XFS_ERROR(EFAULT);
-
-               return 0;
-       }
-
-       case XFS_IOC_FSGROWFSDATA: {
-               xfs_growfs_data_t in;
-
-               if (copy_from_user(&in, arg, sizeof(in)))
-                       return -XFS_ERROR(EFAULT);
-
-               error = xfs_growfs_data(mp, &in);
-               return -error;
-       }
-
-       case XFS_IOC_FSGROWFSLOG: {
-               xfs_growfs_log_t in;
-
-               if (copy_from_user(&in, arg, sizeof(in)))
-                       return -XFS_ERROR(EFAULT);
-
-               error = xfs_growfs_log(mp, &in);
-               return -error;
-       }
-
-       case XFS_IOC_FSGROWFSRT: {
-               xfs_growfs_rt_t in;
-
-               if (copy_from_user(&in, arg, sizeof(in)))
-                       return -XFS_ERROR(EFAULT);
-
-               error = xfs_growfs_rt(mp, &in);
-               return -error;
-       }
-
-       case XFS_IOC_GOINGDOWN: {
-               __uint32_t in;
-
-               if (!capable(CAP_SYS_ADMIN))
-                       return -EPERM;
-
-               if (get_user(in, (__uint32_t __user *)arg))
-                       return -XFS_ERROR(EFAULT);
-
-               error = xfs_fs_goingdown(mp, in);
-               return -error;
-       }
-
-       case XFS_IOC_ERROR_INJECTION: {
-               xfs_error_injection_t in;
-
-               if (!capable(CAP_SYS_ADMIN))
-                       return -EPERM;
-
-               if (copy_from_user(&in, arg, sizeof(in)))
-                       return -XFS_ERROR(EFAULT);
-
-               error = xfs_errortag_add(in.errtag, mp);
-               return -error;
-       }
-
-       case XFS_IOC_ERROR_CLEARALL:
-               if (!capable(CAP_SYS_ADMIN))
-                       return -EPERM;
-
-               error = xfs_errortag_clearall(mp, 1);
-               return -error;
-
-       default:
-               return -ENOTTY;
-       }
-}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h

deleted file mode 100644 (file)

index d56173b..0000000
--- a/fs/xfs/linux-2.6/xfs_ioctl.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2008 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_IOCTL_H__
-#define __XFS_IOCTL_H__
-
-extern int
-xfs_ioc_space(
-       struct xfs_inode        *ip,
-       struct inode            *inode,
-       struct file             *filp,
-       int                     ioflags,
-       unsigned int            cmd,
-       xfs_flock64_t           *bf);
-
-extern int
-xfs_find_handle(
-       unsigned int            cmd,
-       xfs_fsop_handlereq_t    *hreq);
-
-extern int
-xfs_open_by_handle(
-       struct file             *parfilp,
-       xfs_fsop_handlereq_t    *hreq);
-
-extern int
-xfs_readlink_by_handle(
-       struct file             *parfilp,
-       xfs_fsop_handlereq_t    *hreq);
-
-extern int
-xfs_attrmulti_attr_get(
-       struct inode            *inode,
-       unsigned char           *name,
-       unsigned char           __user *ubuf,
-       __uint32_t              *len,
-       __uint32_t              flags);
-
-extern int
-xfs_attrmulti_attr_set(
-       struct inode            *inode,
-       unsigned char           *name,
-       const unsigned char     __user *ubuf,
-       __uint32_t              len,
-       __uint32_t              flags);
-
-extern int
-xfs_attrmulti_attr_remove(
-       struct inode            *inode,
-       unsigned char           *name,
-       __uint32_t              flags);
-
-extern struct dentry *
-xfs_handle_to_dentry(
-       struct file             *parfilp,
-       void __user             *uhandle,
-       u32                     hlen);
-
-extern long
-xfs_file_ioctl(
-       struct file             *filp,
-       unsigned int            cmd,
-       unsigned long           p);
-
-extern long
-xfs_file_compat_ioctl(
-       struct file             *file,
-       unsigned int            cmd,
-       unsigned long           arg);
-
-#endif
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c

deleted file mode 100644 (file)

index 54e623b..0000000
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ /dev/null
@@ -1,672 +0,0 @@
-/*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include <linux/compat.h>
-#include <linux/ioctl.h>
-#include <linux/mount.h>
-#include <linux/slab.h>
-#include <asm/uaccess.h>
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_vnode.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_error.h"
-#include "xfs_dfrag.h"
-#include "xfs_vnodeops.h"
-#include "xfs_fsops.h"
-#include "xfs_alloc.h"
-#include "xfs_rtalloc.h"
-#include "xfs_attr.h"
-#include "xfs_ioctl.h"
-#include "xfs_ioctl32.h"
-#include "xfs_trace.h"
-
-#define  _NATIVE_IOC(cmd, type) \
-         _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
-
-#ifdef BROKEN_X86_ALIGNMENT
-STATIC int
-xfs_compat_flock64_copyin(
-       xfs_flock64_t           *bf,
-       compat_xfs_flock64_t    __user *arg32)
-{
-       if (get_user(bf->l_type,        &arg32->l_type) ||
-           get_user(bf->l_whence,      &arg32->l_whence) ||
-           get_user(bf->l_start,       &arg32->l_start) ||
-           get_user(bf->l_len,         &arg32->l_len) ||
-           get_user(bf->l_sysid,       &arg32->l_sysid) ||
-           get_user(bf->l_pid,         &arg32->l_pid) ||
-           copy_from_user(bf->l_pad,   &arg32->l_pad,  4*sizeof(u32)))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-STATIC int
-xfs_compat_ioc_fsgeometry_v1(
-       struct xfs_mount          *mp,
-       compat_xfs_fsop_geom_v1_t __user *arg32)
-{
-       xfs_fsop_geom_t           fsgeo;
-       int                       error;
-
-       error = xfs_fs_geometry(mp, &fsgeo, 3);
-       if (error)
-               return -error;
-       /* The 32-bit variant simply has some padding at the end */
-       if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1)))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-STATIC int
-xfs_compat_growfs_data_copyin(
-       struct xfs_growfs_data   *in,
-       compat_xfs_growfs_data_t __user *arg32)
-{
-       if (get_user(in->newblocks, &arg32->newblocks) ||
-           get_user(in->imaxpct,   &arg32->imaxpct))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-STATIC int
-xfs_compat_growfs_rt_copyin(
-       struct xfs_growfs_rt     *in,
-       compat_xfs_growfs_rt_t  __user *arg32)
-{
-       if (get_user(in->newblocks, &arg32->newblocks) ||
-           get_user(in->extsize,   &arg32->extsize))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-STATIC int
-xfs_inumbers_fmt_compat(
-       void                    __user *ubuffer,
-       const xfs_inogrp_t      *buffer,
-       long                    count,
-       long                    *written)
-{
-       compat_xfs_inogrp_t     __user *p32 = ubuffer;
-       long                    i;
-
-       for (i = 0; i < count; i++) {
-               if (put_user(buffer[i].xi_startino,   &p32[i].xi_startino) ||
-                   put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
-                   put_user(buffer[i].xi_allocmask,  &p32[i].xi_allocmask))
-                       return -XFS_ERROR(EFAULT);
-       }
-       *written = count * sizeof(*p32);
-       return 0;
-}
-
-#else
-#define xfs_inumbers_fmt_compat xfs_inumbers_fmt
-#endif /* BROKEN_X86_ALIGNMENT */
-
-STATIC int
-xfs_ioctl32_bstime_copyin(
-       xfs_bstime_t            *bstime,
-       compat_xfs_bstime_t     __user *bstime32)
-{
-       compat_time_t           sec32;  /* tv_sec differs on 64 vs. 32 */
-
-       if (get_user(sec32,             &bstime32->tv_sec)      ||
-           get_user(bstime->tv_nsec,   &bstime32->tv_nsec))
-               return -XFS_ERROR(EFAULT);
-       bstime->tv_sec = sec32;
-       return 0;
-}
-
-/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */
-STATIC int
-xfs_ioctl32_bstat_copyin(
-       xfs_bstat_t             *bstat,
-       compat_xfs_bstat_t      __user *bstat32)
-{
-       if (get_user(bstat->bs_ino,     &bstat32->bs_ino)       ||
-           get_user(bstat->bs_mode,    &bstat32->bs_mode)      ||
-           get_user(bstat->bs_nlink,   &bstat32->bs_nlink)     ||
-           get_user(bstat->bs_uid,     &bstat32->bs_uid)       ||
-           get_user(bstat->bs_gid,     &bstat32->bs_gid)       ||
-           get_user(bstat->bs_rdev,    &bstat32->bs_rdev)      ||
-           get_user(bstat->bs_blksize, &bstat32->bs_blksize)   ||
-           get_user(bstat->bs_size,    &bstat32->bs_size)      ||
-           xfs_ioctl32_bstime_copyin(&bstat->bs_atime, &bstat32->bs_atime) ||
-           xfs_ioctl32_bstime_copyin(&bstat->bs_mtime, &bstat32->bs_mtime) ||
-           xfs_ioctl32_bstime_copyin(&bstat->bs_ctime, &bstat32->bs_ctime) ||
-           get_user(bstat->bs_blocks,  &bstat32->bs_size)      ||
-           get_user(bstat->bs_xflags,  &bstat32->bs_size)      ||
-           get_user(bstat->bs_extsize, &bstat32->bs_extsize)   ||
-           get_user(bstat->bs_extents, &bstat32->bs_extents)   ||
-           get_user(bstat->bs_gen,     &bstat32->bs_gen)       ||
-           get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) ||
-           get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) ||
-           get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) ||
-           get_user(bstat->bs_dmstate, &bstat32->bs_dmstate)   ||
-           get_user(bstat->bs_aextents, &bstat32->bs_aextents))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-/* XFS_IOC_FSBULKSTAT and friends */
-
-STATIC int
-xfs_bstime_store_compat(
-       compat_xfs_bstime_t     __user *p32,
-       const xfs_bstime_t      *p)
-{
-       __s32                   sec32;
-
-       sec32 = p->tv_sec;
-       if (put_user(sec32, &p32->tv_sec) ||
-           put_user(p->tv_nsec, &p32->tv_nsec))
-               return -XFS_ERROR(EFAULT);
-       return 0;
-}
-
-/* Return 0 on success or positive error (to xfs_bulkstat()) */
-STATIC int
-xfs_bulkstat_one_fmt_compat(
-       void                    __user *ubuffer,
-       int                     ubsize,
-       int                     *ubused,
-       const xfs_bstat_t       *buffer)
-{
-       compat_xfs_bstat_t      __user *p32 = ubuffer;
-
-       if (ubsize < sizeof(*p32))
-               return XFS_ERROR(ENOMEM);
-
-       if (put_user(buffer->bs_ino,      &p32->bs_ino)         ||
-           put_user(buffer->bs_mode,     &p32->bs_mode)        ||
-           put_user(buffer->bs_nlink,    &p32->bs_nlink)       ||
-           put_user(buffer->bs_uid,      &p32->bs_uid)         ||
-           put_user(buffer->bs_gid,      &p32->bs_gid)         ||
-           put_user(buffer->bs_rdev,     &p32->bs_rdev)        ||
-           put_user(buffer->bs_blksize,  &p32->bs_blksize)     ||
-           put_user(buffer->bs_size,     &p32->bs_size)        ||
-           xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) ||
-           xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) ||
-           xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) ||
-           put_user(buffer->bs_blocks,   &p32->bs_blocks)      ||
-           put_user(buffer->bs_xflags,   &p32->bs_xflags)      ||
-           put_user(buffer->bs_extsize,  &p32->bs_extsize)     ||
-           put_user(buffer->bs_extents,  &p32->bs_extents)     ||
-           put_user(buffer->bs_gen,      &p32->bs_gen)         ||
-           put_user(buffer->bs_projid,   &p32->bs_projid)      ||
-           put_user(buffer->bs_projid_hi,      &p32->bs_projid_hi)     ||
-           put_user(buffer->bs_dmevmask, &p32->bs_dmevmask)    ||
-           put_user(buffer->bs_dmstate,  &p32->bs_dmstate)     ||
-           put_user(buffer->bs_aextents, &p32->bs_aextents))
-               return XFS_ERROR(EFAULT);
-       if (ubused)
-               *ubused = sizeof(*p32);
-       return 0;
-}
-
-STATIC int
-xfs_bulkstat_one_compat(
-       xfs_mount_t     *mp,            /* mount point for filesystem */
-       xfs_ino_t       ino,            /* inode number to get data for */
-       void            __user *buffer, /* buffer to place output in */
-       int             ubsize,         /* size of buffer */
-       int             *ubused,        /* bytes used by me */
-       int             *stat)          /* BULKSTAT_RV_... */
-{
-       return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
-                                   xfs_bulkstat_one_fmt_compat,
-                                   ubused, stat);
-}
-
-/* copied from xfs_ioctl.c */
-STATIC int
-xfs_compat_ioc_bulkstat(
-       xfs_mount_t               *mp,
-       unsigned int              cmd,
-       compat_xfs_fsop_bulkreq_t __user *p32)
-{
-       u32                     addr;
-       xfs_fsop_bulkreq_t      bulkreq;
-       int                     count;  /* # of records returned */
-       xfs_ino_t               inlast; /* last inode number */
-       int                     done;
-       int                     error;
-
-       /* done = 1 if there are more stats to get and if bulkstat */
-       /* should be called again (unused here, but used in dmapi) */
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return -XFS_ERROR(EIO);
-
-       if (get_user(addr, &p32->lastip))
-               return -XFS_ERROR(EFAULT);
-       bulkreq.lastip = compat_ptr(addr);
-       if (get_user(bulkreq.icount, &p32->icount) ||
-           get_user(addr, &p32->ubuffer))
-               return -XFS_ERROR(EFAULT);
-       bulkreq.ubuffer = compat_ptr(addr);
-       if (get_user(addr, &p32->ocount))
-               return -XFS_ERROR(EFAULT);
-       bulkreq.ocount = compat_ptr(addr);
-
-       if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
-               return -XFS_ERROR(EFAULT);
-
-       if ((count = bulkreq.icount) <= 0)
-               return -XFS_ERROR(EINVAL);
-
-       if (bulkreq.ubuffer == NULL)
-               return -XFS_ERROR(EINVAL);
-
-       if (cmd == XFS_IOC_FSINUMBERS_32) {
-               error = xfs_inumbers(mp, &inlast, &count,
-                               bulkreq.ubuffer, xfs_inumbers_fmt_compat);
-       } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) {
-               int res;
-
-               error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
-                               sizeof(compat_xfs_bstat_t), 0, &res);
-       } else if (cmd == XFS_IOC_FSBULKSTAT_32) {
-               error = xfs_bulkstat(mp, &inlast, &count,
-                       xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
-                       bulkreq.ubuffer, &done);
-       } else
-               error = XFS_ERROR(EINVAL);
-       if (error)
-               return -error;
-
-       if (bulkreq.ocount != NULL) {
-               if (copy_to_user(bulkreq.lastip, &inlast,
-                                               sizeof(xfs_ino_t)))
-                       return -XFS_ERROR(EFAULT);
-
-               if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
-                       return -XFS_ERROR(EFAULT);
-       }
-
-       return 0;
-}
-
-STATIC int
-xfs_compat_handlereq_copyin(
-       xfs_fsop_handlereq_t            *hreq,
-       compat_xfs_fsop_handlereq_t     __user *arg32)
-{
-       compat_xfs_fsop_handlereq_t     hreq32;
-
-       if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t)))
-               return -XFS_ERROR(EFAULT);
-
-       hreq->fd = hreq32.fd;
-       hreq->path = compat_ptr(hreq32.path);
-       hreq->oflags = hreq32.oflags;
-       hreq->ihandle = compat_ptr(hreq32.ihandle);
-       hreq->ihandlen = hreq32.ihandlen;
-       hreq->ohandle = compat_ptr(hreq32.ohandle);
-       hreq->ohandlen = compat_ptr(hreq32.ohandlen);
-
-       return 0;
-}
-
-STATIC struct dentry *
-xfs_compat_handlereq_to_dentry(
-       struct file             *parfilp,
-       compat_xfs_fsop_handlereq_t *hreq)
-{
-       return xfs_handle_to_dentry(parfilp,
-                       compat_ptr(hreq->ihandle), hreq->ihandlen);
-}
-
-STATIC int
-xfs_compat_attrlist_by_handle(
-       struct file             *parfilp,
-       void                    __user *arg)
-{
-       int                     error;
-       attrlist_cursor_kern_t  *cursor;
-       compat_xfs_fsop_attrlist_handlereq_t al_hreq;
-       struct dentry           *dentry;
-       char                    *kbuf;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-       if (copy_from_user(&al_hreq, arg,
-                          sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
-               return -XFS_ERROR(EFAULT);
-       if (al_hreq.buflen > XATTR_LIST_MAX)
-               return -XFS_ERROR(EINVAL);
-
-       /*
-        * Reject flags, only allow namespaces.
-        */
-       if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
-               return -XFS_ERROR(EINVAL);
-
-       dentry = xfs_compat_handlereq_to_dentry(parfilp, &al_hreq.hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-
-       error = -ENOMEM;
-       kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
-       if (!kbuf)
-               goto out_dput;
-
-       cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
-       error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
-                                       al_hreq.flags, cursor);
-       if (error)
-               goto out_kfree;
-
-       if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen))
-               error = -EFAULT;
-
- out_kfree:
-       kfree(kbuf);
- out_dput:
-       dput(dentry);
-       return error;
-}
-
-STATIC int
-xfs_compat_attrmulti_by_handle(
-       struct file                             *parfilp,
-       void                                    __user *arg)
-{
-       int                                     error;
-       compat_xfs_attr_multiop_t               *ops;
-       compat_xfs_fsop_attrmulti_handlereq_t   am_hreq;
-       struct dentry                           *dentry;
-       unsigned int                            i, size;
-       unsigned char                           *attr_name;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -XFS_ERROR(EPERM);
-       if (copy_from_user(&am_hreq, arg,
-                          sizeof(compat_xfs_fsop_attrmulti_handlereq_t)))
-               return -XFS_ERROR(EFAULT);
-
-       /* overflow check */
-       if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t))
-               return -E2BIG;
-
-       dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-
-       error = E2BIG;
-       size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t);
-       if (!size || size > 16 * PAGE_SIZE)
-               goto out_dput;
-
-       ops = memdup_user(compat_ptr(am_hreq.ops), size);
-       if (IS_ERR(ops)) {
-               error = PTR_ERR(ops);
-               goto out_dput;
-       }
-
-       attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
-       if (!attr_name)
-               goto out_kfree_ops;
-
-       error = 0;
-       for (i = 0; i < am_hreq.opcount; i++) {
-               ops[i].am_error = strncpy_from_user((char *)attr_name,
-                               compat_ptr(ops[i].am_attrname),
-                               MAXNAMELEN);
-               if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
-                       error = -ERANGE;
-               if (ops[i].am_error < 0)
-                       break;
-
-               switch (ops[i].am_opcode) {
-               case ATTR_OP_GET:
-                       ops[i].am_error = xfs_attrmulti_attr_get(
-                                       dentry->d_inode, attr_name,
-                                       compat_ptr(ops[i].am_attrvalue),
-                                       &ops[i].am_length, ops[i].am_flags);
-                       break;
-               case ATTR_OP_SET:
-                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
-                       if (ops[i].am_error)
-                               break;
-                       ops[i].am_error = xfs_attrmulti_attr_set(
-                                       dentry->d_inode, attr_name,
-                                       compat_ptr(ops[i].am_attrvalue),
-                                       ops[i].am_length, ops[i].am_flags);
-                       mnt_drop_write(parfilp->f_path.mnt);
-                       break;
-               case ATTR_OP_REMOVE:
-                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
-                       if (ops[i].am_error)
-                               break;
-                       ops[i].am_error = xfs_attrmulti_attr_remove(
-                                       dentry->d_inode, attr_name,
-                                       ops[i].am_flags);
-                       mnt_drop_write(parfilp->f_path.mnt);
-                       break;
-               default:
-                       ops[i].am_error = EINVAL;
-               }
-       }
-
-       if (copy_to_user(compat_ptr(am_hreq.ops), ops, size))
-               error = XFS_ERROR(EFAULT);
-
-       kfree(attr_name);
- out_kfree_ops:
-       kfree(ops);
- out_dput:
-       dput(dentry);
-       return -error;
-}
-
-STATIC int
-xfs_compat_fssetdm_by_handle(
-       struct file             *parfilp,
-       void                    __user *arg)
-{
-       int                     error;
-       struct fsdmidata        fsd;
-       compat_xfs_fsop_setdm_handlereq_t dmhreq;
-       struct dentry           *dentry;
-
-       if (!capable(CAP_MKNOD))
-               return -XFS_ERROR(EPERM);
-       if (copy_from_user(&dmhreq, arg,
-                          sizeof(compat_xfs_fsop_setdm_handlereq_t)))
-               return -XFS_ERROR(EFAULT);
-
-       dentry = xfs_compat_handlereq_to_dentry(parfilp, &dmhreq.hreq);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-
-       if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
-               error = -XFS_ERROR(EPERM);
-               goto out;
-       }
-
-       if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) {
-               error = -XFS_ERROR(EFAULT);
-               goto out;
-       }
-
-       error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
-                                fsd.fsd_dmstate);
-
-out:
-       dput(dentry);
-       return error;
-}
-
-long
-xfs_file_compat_ioctl(
-       struct file             *filp,
-       unsigned                cmd,
-       unsigned long           p)
-{
-       struct inode            *inode = filp->f_path.dentry->d_inode;
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       void                    __user *arg = (void __user *)p;
-       int                     ioflags = 0;
-       int                     error;
-
-       if (filp->f_mode & FMODE_NOCMTIME)
-               ioflags |= IO_INVIS;
-
-       trace_xfs_file_compat_ioctl(ip);
-
-       switch (cmd) {
-       /* No size or alignment issues on any arch */
-       case XFS_IOC_DIOINFO:
-       case XFS_IOC_FSGEOMETRY:
-       case XFS_IOC_FSGETXATTR:
-       case XFS_IOC_FSSETXATTR:
-       case XFS_IOC_FSGETXATTRA:
-       case XFS_IOC_FSSETDM:
-       case XFS_IOC_GETBMAP:
-       case XFS_IOC_GETBMAPA:
-       case XFS_IOC_GETBMAPX:
-       case XFS_IOC_FSCOUNTS:
-       case XFS_IOC_SET_RESBLKS:
-       case XFS_IOC_GET_RESBLKS:
-       case XFS_IOC_FSGROWFSLOG:
-       case XFS_IOC_GOINGDOWN:
-       case XFS_IOC_ERROR_INJECTION:
-       case XFS_IOC_ERROR_CLEARALL:
-               return xfs_file_ioctl(filp, cmd, p);
-#ifndef BROKEN_X86_ALIGNMENT
-       /* These are handled fine if no alignment issues */
-       case XFS_IOC_ALLOCSP:
-       case XFS_IOC_FREESP:
-       case XFS_IOC_RESVSP:
-       case XFS_IOC_UNRESVSP:
-       case XFS_IOC_ALLOCSP64:
-       case XFS_IOC_FREESP64:
-       case XFS_IOC_RESVSP64:
-       case XFS_IOC_UNRESVSP64:
-       case XFS_IOC_FSGEOMETRY_V1:
-       case XFS_IOC_FSGROWFSDATA:
-       case XFS_IOC_FSGROWFSRT:
-       case XFS_IOC_ZERO_RANGE:
-               return xfs_file_ioctl(filp, cmd, p);
-#else
-       case XFS_IOC_ALLOCSP_32:
-       case XFS_IOC_FREESP_32:
-       case XFS_IOC_ALLOCSP64_32:
-       case XFS_IOC_FREESP64_32:
-       case XFS_IOC_RESVSP_32:
-       case XFS_IOC_UNRESVSP_32:
-       case XFS_IOC_RESVSP64_32:
-       case XFS_IOC_UNRESVSP64_32:
-       case XFS_IOC_ZERO_RANGE_32: {
-               struct xfs_flock64      bf;
-
-               if (xfs_compat_flock64_copyin(&bf, arg))
-                       return -XFS_ERROR(EFAULT);
-               cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
-               return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
-       }
-       case XFS_IOC_FSGEOMETRY_V1_32:
-               return xfs_compat_ioc_fsgeometry_v1(mp, arg);
-       case XFS_IOC_FSGROWFSDATA_32: {
-               struct xfs_growfs_data  in;
-
-               if (xfs_compat_growfs_data_copyin(&in, arg))
-                       return -XFS_ERROR(EFAULT);
-               error = xfs_growfs_data(mp, &in);
-               return -error;
-       }
-       case XFS_IOC_FSGROWFSRT_32: {
-               struct xfs_growfs_rt    in;
-
-               if (xfs_compat_growfs_rt_copyin(&in, arg))
-                       return -XFS_ERROR(EFAULT);
-               error = xfs_growfs_rt(mp, &in);
-               return -error;
-       }
-#endif
-       /* long changes size, but xfs only copiese out 32 bits */
-       case XFS_IOC_GETXFLAGS_32:
-       case XFS_IOC_SETXFLAGS_32:
-       case XFS_IOC_GETVERSION_32:
-               cmd = _NATIVE_IOC(cmd, long);
-               return xfs_file_ioctl(filp, cmd, p);
-       case XFS_IOC_SWAPEXT_32: {
-               struct xfs_swapext        sxp;
-               struct compat_xfs_swapext __user *sxu = arg;
-
-               /* Bulk copy in up to the sx_stat field, then copy bstat */
-               if (copy_from_user(&sxp, sxu,
-                                  offsetof(struct xfs_swapext, sx_stat)) ||
-                   xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat))
-                       return -XFS_ERROR(EFAULT);
-               error = xfs_swapext(&sxp);
-               return -error;
-       }
-       case XFS_IOC_FSBULKSTAT_32:
-       case XFS_IOC_FSBULKSTAT_SINGLE_32:
-       case XFS_IOC_FSINUMBERS_32:
-               return xfs_compat_ioc_bulkstat(mp, cmd, arg);
-       case XFS_IOC_FD_TO_HANDLE_32:
-       case XFS_IOC_PATH_TO_HANDLE_32:
-       case XFS_IOC_PATH_TO_FSHANDLE_32: {
-               struct xfs_fsop_handlereq       hreq;
-
-               if (xfs_compat_handlereq_copyin(&hreq, arg))
-                       return -XFS_ERROR(EFAULT);
-               cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq);
-               return xfs_find_handle(cmd, &hreq);
-       }
-       case XFS_IOC_OPEN_BY_HANDLE_32: {
-               struct xfs_fsop_handlereq       hreq;
-
-               if (xfs_compat_handlereq_copyin(&hreq, arg))
-                       return -XFS_ERROR(EFAULT);
-               return xfs_open_by_handle(filp, &hreq);
-       }
-       case XFS_IOC_READLINK_BY_HANDLE_32: {
-               struct xfs_fsop_handlereq       hreq;
-
-               if (xfs_compat_handlereq_copyin(&hreq, arg))
-                       return -XFS_ERROR(EFAULT);
-               return xfs_readlink_by_handle(filp, &hreq);
-       }
-       case XFS_IOC_ATTRLIST_BY_HANDLE_32:
-               return xfs_compat_attrlist_by_handle(filp, arg);
-       case XFS_IOC_ATTRMULTI_BY_HANDLE_32:
-               return xfs_compat_attrmulti_by_handle(filp, arg);
-       case XFS_IOC_FSSETDM_BY_HANDLE_32:
-               return xfs_compat_fssetdm_by_handle(filp, arg);
-       default:
-               return -XFS_ERROR(ENOIOCTLCMD);
-       }
-}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h

deleted file mode 100644 (file)

index 80f4060..0000000
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_IOCTL32_H__
-#define __XFS_IOCTL32_H__
-
-#include <linux/compat.h>
-
-/*
- * on 32-bit arches, ioctl argument structures may have different sizes
- * and/or alignment.  We define compat structures which match the
- * 32-bit sizes/alignments here, and their associated ioctl numbers.
- *
- * xfs_ioctl32.c contains routines to copy these structures in and out.
- */
-
-/* stock kernel-level ioctls we support */
-#define XFS_IOC_GETXFLAGS_32   FS_IOC32_GETFLAGS
-#define XFS_IOC_SETXFLAGS_32   FS_IOC32_SETFLAGS
-#define XFS_IOC_GETVERSION_32  FS_IOC32_GETVERSION
-
-/*
- * On intel, even if sizes match, alignment and/or padding may differ.
- */
-#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
-#define BROKEN_X86_ALIGNMENT
-#define __compat_packed __attribute__((packed))
-#else
-#define __compat_packed
-#endif
-
-typedef struct compat_xfs_bstime {
-       compat_time_t   tv_sec;         /* seconds              */
-       __s32           tv_nsec;        /* and nanoseconds      */
-} compat_xfs_bstime_t;
-
-typedef struct compat_xfs_bstat {
-       __u64           bs_ino;         /* inode number                 */
-       __u16           bs_mode;        /* type and mode                */
-       __u16           bs_nlink;       /* number of links              */
-       __u32           bs_uid;         /* user id                      */
-       __u32           bs_gid;         /* group id                     */
-       __u32           bs_rdev;        /* device value                 */
-       __s32           bs_blksize;     /* block size                   */
-       __s64           bs_size;        /* file size                    */
-       compat_xfs_bstime_t bs_atime;   /* access time                  */
-       compat_xfs_bstime_t bs_mtime;   /* modify time                  */
-       compat_xfs_bstime_t bs_ctime;   /* inode change time            */
-       int64_t         bs_blocks;      /* number of blocks             */
-       __u32           bs_xflags;      /* extended flags               */
-       __s32           bs_extsize;     /* extent size                  */
-       __s32           bs_extents;     /* number of extents            */
-       __u32           bs_gen;         /* generation count             */
-       __u16           bs_projid_lo;   /* lower part of project id     */
-#define        bs_projid       bs_projid_lo    /* (previously just bs_projid)  */
-       __u16           bs_projid_hi;   /* high part of project id      */
-       unsigned char   bs_pad[12];     /* pad space, unused            */
-       __u32           bs_dmevmask;    /* DMIG event mask              */
-       __u16           bs_dmstate;     /* DMIG state info              */
-       __u16           bs_aextents;    /* attribute number of extents  */
-} __compat_packed compat_xfs_bstat_t;
-
-typedef struct compat_xfs_fsop_bulkreq {
-       compat_uptr_t   lastip;         /* last inode # pointer         */
-       __s32           icount;         /* count of entries in buffer   */
-       compat_uptr_t   ubuffer;        /* user buffer for inode desc.  */
-       compat_uptr_t   ocount;         /* output count pointer         */
-} compat_xfs_fsop_bulkreq_t;
-
-#define XFS_IOC_FSBULKSTAT_32 \
-       _IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
-#define XFS_IOC_FSBULKSTAT_SINGLE_32 \
-       _IOWR('X', 102, struct compat_xfs_fsop_bulkreq)
-#define XFS_IOC_FSINUMBERS_32 \
-       _IOWR('X', 103, struct compat_xfs_fsop_bulkreq)
-
-typedef struct compat_xfs_fsop_handlereq {
-       __u32           fd;             /* fd for FD_TO_HANDLE          */
-       compat_uptr_t   path;           /* user pathname                */
-       __u32           oflags;         /* open flags                   */
-       compat_uptr_t   ihandle;        /* user supplied handle         */
-       __u32           ihandlen;       /* user supplied length         */
-       compat_uptr_t   ohandle;        /* user buffer for handle       */
-       compat_uptr_t   ohandlen;       /* user buffer length           */
-} compat_xfs_fsop_handlereq_t;
-
-#define XFS_IOC_PATH_TO_FSHANDLE_32 \
-       _IOWR('X', 104, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_PATH_TO_HANDLE_32 \
-       _IOWR('X', 105, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_FD_TO_HANDLE_32 \
-       _IOWR('X', 106, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_OPEN_BY_HANDLE_32 \
-       _IOWR('X', 107, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_READLINK_BY_HANDLE_32 \
-       _IOWR('X', 108, struct compat_xfs_fsop_handlereq)
-
-/* The bstat field in the swapext struct needs translation */
-typedef struct compat_xfs_swapext {
-       __int64_t               sx_version;     /* version */
-       __int64_t               sx_fdtarget;    /* fd of target file */
-       __int64_t               sx_fdtmp;       /* fd of tmp file */
-       xfs_off_t               sx_offset;      /* offset into file */
-       xfs_off_t               sx_length;      /* leng from offset */
-       char                    sx_pad[16];     /* pad space, unused */
-       compat_xfs_bstat_t      sx_stat;        /* stat of target b4 copy */
-} __compat_packed compat_xfs_swapext_t;
-
-#define XFS_IOC_SWAPEXT_32     _IOWR('X', 109, struct compat_xfs_swapext)
-
-typedef struct compat_xfs_fsop_attrlist_handlereq {
-       struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
-       struct xfs_attrlist_cursor      pos; /* opaque cookie, list offset */
-       __u32                           flags;  /* which namespace to use */
-       __u32                           buflen; /* length of buffer supplied */
-       compat_uptr_t                   buffer; /* returned names */
-} __compat_packed compat_xfs_fsop_attrlist_handlereq_t;
-
-/* Note: actually this is read/write */
-#define XFS_IOC_ATTRLIST_BY_HANDLE_32 \
-       _IOW('X', 122, struct compat_xfs_fsop_attrlist_handlereq)
-
-/* am_opcodes defined in xfs_fs.h */
-typedef struct compat_xfs_attr_multiop {
-       __u32           am_opcode;
-       __s32           am_error;
-       compat_uptr_t   am_attrname;
-       compat_uptr_t   am_attrvalue;
-       __u32           am_length;
-       __u32           am_flags;
-} compat_xfs_attr_multiop_t;
-
-typedef struct compat_xfs_fsop_attrmulti_handlereq {
-       struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
-       __u32                           opcount;/* count of following multiop */
-       /* ptr to compat_xfs_attr_multiop */
-       compat_uptr_t                   ops; /* attr_multi data */
-} compat_xfs_fsop_attrmulti_handlereq_t;
-
-#define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \
-       _IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq)
-
-typedef struct compat_xfs_fsop_setdm_handlereq {
-       struct compat_xfs_fsop_handlereq hreq;  /* handle information   */
-       /* ptr to struct fsdmidata */
-       compat_uptr_t                   data;   /* DMAPI data   */
-} compat_xfs_fsop_setdm_handlereq_t;
-
-#define XFS_IOC_FSSETDM_BY_HANDLE_32 \
-       _IOW('X', 121, struct compat_xfs_fsop_setdm_handlereq)
-
-#ifdef BROKEN_X86_ALIGNMENT
-/* on ia32 l_start is on a 32-bit boundary */
-typedef struct compat_xfs_flock64 {
-       __s16           l_type;
-       __s16           l_whence;
-       __s64           l_start __attribute__((packed));
-                       /* len == 0 means until end of file */
-       __s64           l_len __attribute__((packed));
-       __s32           l_sysid;
-       __u32           l_pid;
-       __s32           l_pad[4];       /* reserve area */
-} compat_xfs_flock64_t;
-
-#define XFS_IOC_ALLOCSP_32     _IOW('X', 10, struct compat_xfs_flock64)
-#define XFS_IOC_FREESP_32      _IOW('X', 11, struct compat_xfs_flock64)
-#define XFS_IOC_ALLOCSP64_32   _IOW('X', 36, struct compat_xfs_flock64)
-#define XFS_IOC_FREESP64_32    _IOW('X', 37, struct compat_xfs_flock64)
-#define XFS_IOC_RESVSP_32      _IOW('X', 40, struct compat_xfs_flock64)
-#define XFS_IOC_UNRESVSP_32    _IOW('X', 41, struct compat_xfs_flock64)
-#define XFS_IOC_RESVSP64_32    _IOW('X', 42, struct compat_xfs_flock64)
-#define XFS_IOC_UNRESVSP64_32  _IOW('X', 43, struct compat_xfs_flock64)
-#define XFS_IOC_ZERO_RANGE_32  _IOW('X', 57, struct compat_xfs_flock64)
-
-typedef struct compat_xfs_fsop_geom_v1 {
-       __u32           blocksize;      /* filesystem (data) block size */
-       __u32           rtextsize;      /* realtime extent size         */
-       __u32           agblocks;       /* fsblocks in an AG            */
-       __u32           agcount;        /* number of allocation groups  */
-       __u32           logblocks;      /* fsblocks in the log          */
-       __u32           sectsize;       /* (data) sector size, bytes    */
-       __u32           inodesize;      /* inode size in bytes          */
-       __u32           imaxpct;        /* max allowed inode space(%)   */
-       __u64           datablocks;     /* fsblocks in data subvolume   */
-       __u64           rtblocks;       /* fsblocks in realtime subvol  */
-       __u64           rtextents;      /* rt extents in realtime subvol*/
-       __u64           logstart;       /* starting fsblock of the log  */
-       unsigned char   uuid[16];       /* unique id of the filesystem  */
-       __u32           sunit;          /* stripe unit, fsblocks        */
-       __u32           swidth;         /* stripe width, fsblocks       */
-       __s32           version;        /* structure version            */
-       __u32           flags;          /* superblock version flags     */
-       __u32           logsectsize;    /* log sector size, bytes       */
-       __u32           rtsectsize;     /* realtime sector size, bytes  */
-       __u32           dirblocksize;   /* directory block size, bytes  */
-} __attribute__((packed)) compat_xfs_fsop_geom_v1_t;
-
-#define XFS_IOC_FSGEOMETRY_V1_32  \
-       _IOR('X', 100, struct compat_xfs_fsop_geom_v1)
-
-typedef struct compat_xfs_inogrp {
-       __u64           xi_startino;    /* starting inode number        */
-       __s32           xi_alloccount;  /* # bits set in allocmask      */
-       __u64           xi_allocmask;   /* mask of allocated inodes     */
-} __attribute__((packed)) compat_xfs_inogrp_t;
-
-/* These growfs input structures have padding on the end, so must translate */
-typedef struct compat_xfs_growfs_data {
-       __u64           newblocks;      /* new data subvol size, fsblocks */
-       __u32           imaxpct;        /* new inode space percentage limit */
-} __attribute__((packed)) compat_xfs_growfs_data_t;
-
-typedef struct compat_xfs_growfs_rt {
-       __u64           newblocks;      /* new realtime size, fsblocks */
-       __u32           extsize;        /* new realtime extent size, fsblocks */
-} __attribute__((packed)) compat_xfs_growfs_rt_t;
-
-#define XFS_IOC_FSGROWFSDATA_32 _IOW('X', 110, struct compat_xfs_growfs_data)
-#define XFS_IOC_FSGROWFSRT_32   _IOW('X', 112, struct compat_xfs_growfs_rt)
-
-#endif /* BROKEN_X86_ALIGNMENT */
-
-#endif /* __XFS_IOCTL32_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c

deleted file mode 100644 (file)

index b9c172b..0000000
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ /dev/null
@@ -1,1210 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_acl.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_rw.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_vnodeops.h"
-#include "xfs_inode_item.h"
-#include "xfs_trace.h"
-
-#include <linux/capability.h>
-#include <linux/xattr.h>
-#include <linux/namei.h>
-#include <linux/posix_acl.h>
-#include <linux/security.h>
-#include <linux/fiemap.h>
-#include <linux/slab.h>
-
-/*
- * Bring the timestamps in the XFS inode uptodate.
- *
- * Used before writing the inode to disk.
- */
-void
-xfs_synchronize_times(
-       xfs_inode_t     *ip)
-{
-       struct inode    *inode = VFS_I(ip);
-
-       ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
-       ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
-       ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
-       ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
-       ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
-       ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
-}
-
-/*
- * If the linux inode is valid, mark it dirty.
- * Used when committing a dirty inode into a transaction so that
- * the inode will get written back by the linux code
- */
-void
-xfs_mark_inode_dirty_sync(
-       xfs_inode_t     *ip)
-{
-       struct inode    *inode = VFS_I(ip);
-
-       if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
-               mark_inode_dirty_sync(inode);
-}
-
-void
-xfs_mark_inode_dirty(
-       xfs_inode_t     *ip)
-{
-       struct inode    *inode = VFS_I(ip);
-
-       if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
-               mark_inode_dirty(inode);
-}
-
-/*
- * Hook in SELinux.  This is not quite correct yet, what we really need
- * here (as we do for default ACLs) is a mechanism by which creation of
- * these attrs can be journalled at inode creation time (along with the
- * inode, of course, such that log replay can't cause these to be lost).
- */
-STATIC int
-xfs_init_security(
-       struct inode    *inode,
-       struct inode    *dir,
-       const struct qstr *qstr)
-{
-       struct xfs_inode *ip = XFS_I(inode);
-       size_t          length;
-       void            *value;
-       unsigned char   *name;
-       int             error;
-
-       error = security_inode_init_security(inode, dir, qstr, (char **)&name,
-                                            &value, &length);
-       if (error) {
-               if (error == -EOPNOTSUPP)
-                       return 0;
-               return -error;
-       }
-
-       error = xfs_attr_set(ip, name, value, length, ATTR_SECURE);
-
-       kfree(name);
-       kfree(value);
-       return error;
-}
-
-static void
-xfs_dentry_to_name(
-       struct xfs_name *namep,
-       struct dentry   *dentry)
-{
-       namep->name = dentry->d_name.name;
-       namep->len = dentry->d_name.len;
-}
-
-STATIC void
-xfs_cleanup_inode(
-       struct inode    *dir,
-       struct inode    *inode,
-       struct dentry   *dentry)
-{
-       struct xfs_name teardown;
-
-       /* Oh, the horror.
-        * If we can't add the ACL or we fail in
-        * xfs_init_security we must back out.
-        * ENOSPC can hit here, among other things.
-        */
-       xfs_dentry_to_name(&teardown, dentry);
-
-       xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
-       iput(inode);
-}
-
-STATIC int
-xfs_vn_mknod(
-       struct inode    *dir,
-       struct dentry   *dentry,
-       int             mode,
-       dev_t           rdev)
-{
-       struct inode    *inode;
-       struct xfs_inode *ip = NULL;
-       struct posix_acl *default_acl = NULL;
-       struct xfs_name name;
-       int             error;
-
-       /*
-        * Irix uses Missed'em'V split, but doesn't want to see
-        * the upper 5 bits of (14bit) major.
-        */
-       if (S_ISCHR(mode) || S_ISBLK(mode)) {
-               if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
-                       return -EINVAL;
-               rdev = sysv_encode_dev(rdev);
-       } else {
-               rdev = 0;
-       }
-
-       if (IS_POSIXACL(dir)) {
-               default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
-               if (IS_ERR(default_acl))
-                       return PTR_ERR(default_acl);
-
-               if (!default_acl)
-                       mode &= ~current_umask();
-       }
-
-       xfs_dentry_to_name(&name, dentry);
-       error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
-       if (unlikely(error))
-               goto out_free_acl;
-
-       inode = VFS_I(ip);
-
-       error = xfs_init_security(inode, dir, &dentry->d_name);
-       if (unlikely(error))
-               goto out_cleanup_inode;
-
-       if (default_acl) {
-               error = -xfs_inherit_acl(inode, default_acl);
-               default_acl = NULL;
-               if (unlikely(error))
-                       goto out_cleanup_inode;
-       }
-
-
-       d_instantiate(dentry, inode);
-       return -error;
-
- out_cleanup_inode:
-       xfs_cleanup_inode(dir, inode, dentry);
- out_free_acl:
-       posix_acl_release(default_acl);
-       return -error;
-}
-
-STATIC int
-xfs_vn_create(
-       struct inode    *dir,
-       struct dentry   *dentry,
-       int             mode,
-       struct nameidata *nd)
-{
-       return xfs_vn_mknod(dir, dentry, mode, 0);
-}
-
-STATIC int
-xfs_vn_mkdir(
-       struct inode    *dir,
-       struct dentry   *dentry,
-       int             mode)
-{
-       return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
-}
-
-STATIC struct dentry *
-xfs_vn_lookup(
-       struct inode    *dir,
-       struct dentry   *dentry,
-       struct nameidata *nd)
-{
-       struct xfs_inode *cip;
-       struct xfs_name name;
-       int             error;
-
-       if (dentry->d_name.len >= MAXNAMELEN)
-               return ERR_PTR(-ENAMETOOLONG);
-
-       xfs_dentry_to_name(&name, dentry);
-       error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
-       if (unlikely(error)) {
-               if (unlikely(error != ENOENT))
-                       return ERR_PTR(-error);
-               d_add(dentry, NULL);
-               return NULL;
-       }
-
-       return d_splice_alias(VFS_I(cip), dentry);
-}
-
-STATIC struct dentry *
-xfs_vn_ci_lookup(
-       struct inode    *dir,
-       struct dentry   *dentry,
-       struct nameidata *nd)
-{
-       struct xfs_inode *ip;
-       struct xfs_name xname;
-       struct xfs_name ci_name;
-       struct qstr     dname;
-       int             error;
-
-       if (dentry->d_name.len >= MAXNAMELEN)
-               return ERR_PTR(-ENAMETOOLONG);
-
-       xfs_dentry_to_name(&xname, dentry);
-       error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
-       if (unlikely(error)) {
-               if (unlikely(error != ENOENT))
-                       return ERR_PTR(-error);
-               /*
-                * call d_add(dentry, NULL) here when d_drop_negative_children
-                * is called in xfs_vn_mknod (ie. allow negative dentries
-                * with CI filesystems).
-                */
-               return NULL;
-       }
-
-       /* if exact match, just splice and exit */
-       if (!ci_name.name)
-               return d_splice_alias(VFS_I(ip), dentry);
-
-       /* else case-insensitive match... */
-       dname.name = ci_name.name;
-       dname.len = ci_name.len;
-       dentry = d_add_ci(dentry, VFS_I(ip), &dname);
-       kmem_free(ci_name.name);
-       return dentry;
-}
-
-STATIC int
-xfs_vn_link(
-       struct dentry   *old_dentry,
-       struct inode    *dir,
-       struct dentry   *dentry)
-{
-       struct inode    *inode = old_dentry->d_inode;
-       struct xfs_name name;
-       int             error;
-
-       xfs_dentry_to_name(&name, dentry);
-
-       error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
-       if (unlikely(error))
-               return -error;
-
-       ihold(inode);
-       d_instantiate(dentry, inode);
-       return 0;
-}
-
-STATIC int
-xfs_vn_unlink(
-       struct inode    *dir,
-       struct dentry   *dentry)
-{
-       struct xfs_name name;
-       int             error;
-
-       xfs_dentry_to_name(&name, dentry);
-
-       error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
-       if (error)
-               return error;
-
-       /*
-        * With unlink, the VFS makes the dentry "negative": no inode,
-        * but still hashed. This is incompatible with case-insensitive
-        * mode, so invalidate (unhash) the dentry in CI-mode.
-        */
-       if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
-               d_invalidate(dentry);
-       return 0;
-}
-
-STATIC int
-xfs_vn_symlink(
-       struct inode    *dir,
-       struct dentry   *dentry,
-       const char      *symname)
-{
-       struct inode    *inode;
-       struct xfs_inode *cip = NULL;
-       struct xfs_name name;
-       int             error;
-       mode_t          mode;
-
-       mode = S_IFLNK |
-               (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
-       xfs_dentry_to_name(&name, dentry);
-
-       error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
-       if (unlikely(error))
-               goto out;
-
-       inode = VFS_I(cip);
-
-       error = xfs_init_security(inode, dir, &dentry->d_name);
-       if (unlikely(error))
-               goto out_cleanup_inode;
-
-       d_instantiate(dentry, inode);
-       return 0;
-
- out_cleanup_inode:
-       xfs_cleanup_inode(dir, inode, dentry);
- out:
-       return -error;
-}
-
-STATIC int
-xfs_vn_rename(
-       struct inode    *odir,
-       struct dentry   *odentry,
-       struct inode    *ndir,
-       struct dentry   *ndentry)
-{
-       struct inode    *new_inode = ndentry->d_inode;
-       struct xfs_name oname;
-       struct xfs_name nname;
-
-       xfs_dentry_to_name(&oname, odentry);
-       xfs_dentry_to_name(&nname, ndentry);
-
-       return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
-                          XFS_I(ndir), &nname, new_inode ?
-                                               XFS_I(new_inode) : NULL);
-}
-
-/*
- * careful here - this function can get called recursively, so
- * we need to be very careful about how much stack we use.
- * uio is kmalloced for this reason...
- */
-STATIC void *
-xfs_vn_follow_link(
-       struct dentry           *dentry,
-       struct nameidata        *nd)
-{
-       char                    *link;
-       int                     error = -ENOMEM;
-
-       link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
-       if (!link)
-               goto out_err;
-
-       error = -xfs_readlink(XFS_I(dentry->d_inode), link);
-       if (unlikely(error))
-               goto out_kfree;
-
-       nd_set_link(nd, link);
-       return NULL;
-
- out_kfree:
-       kfree(link);
- out_err:
-       nd_set_link(nd, ERR_PTR(error));
-       return NULL;
-}
-
-STATIC void
-xfs_vn_put_link(
-       struct dentry   *dentry,
-       struct nameidata *nd,
-       void            *p)
-{
-       char            *s = nd_get_link(nd);
-
-       if (!IS_ERR(s))
-               kfree(s);
-}
-
-STATIC int
-xfs_vn_getattr(
-       struct vfsmount         *mnt,
-       struct dentry           *dentry,
-       struct kstat            *stat)
-{
-       struct inode            *inode = dentry->d_inode;
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-
-       trace_xfs_getattr(ip);
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return XFS_ERROR(EIO);
-
-       stat->size = XFS_ISIZE(ip);
-       stat->dev = inode->i_sb->s_dev;
-       stat->mode = ip->i_d.di_mode;
-       stat->nlink = ip->i_d.di_nlink;
-       stat->uid = ip->i_d.di_uid;
-       stat->gid = ip->i_d.di_gid;
-       stat->ino = ip->i_ino;
-       stat->atime = inode->i_atime;
-       stat->mtime = inode->i_mtime;
-       stat->ctime = inode->i_ctime;
-       stat->blocks =
-               XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
-
-
-       switch (inode->i_mode & S_IFMT) {
-       case S_IFBLK:
-       case S_IFCHR:
-               stat->blksize = BLKDEV_IOSIZE;
-               stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
-                                  sysv_minor(ip->i_df.if_u2.if_rdev));
-               break;
-       default:
-               if (XFS_IS_REALTIME_INODE(ip)) {
-                       /*
-                        * If the file blocks are being allocated from a
-                        * realtime volume, then return the inode's realtime
-                        * extent size or the realtime volume's extent size.
-                        */
-                       stat->blksize =
-                               xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
-               } else
-                       stat->blksize = xfs_preferred_iosize(mp);
-               stat->rdev = 0;
-               break;
-       }
-
-       return 0;
-}
-
-int
-xfs_setattr_nonsize(
-       struct xfs_inode        *ip,
-       struct iattr            *iattr,
-       int                     flags)
-{
-       xfs_mount_t             *mp = ip->i_mount;
-       struct inode            *inode = VFS_I(ip);
-       int                     mask = iattr->ia_valid;
-       xfs_trans_t             *tp;
-       int                     error;
-       uid_t                   uid = 0, iuid = 0;
-       gid_t                   gid = 0, igid = 0;
-       struct xfs_dquot        *udqp = NULL, *gdqp = NULL;
-       struct xfs_dquot        *olddquot1 = NULL, *olddquot2 = NULL;
-
-       trace_xfs_setattr(ip);
-
-       if (mp->m_flags & XFS_MOUNT_RDONLY)
-               return XFS_ERROR(EROFS);
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return XFS_ERROR(EIO);
-
-       error = -inode_change_ok(inode, iattr);
-       if (error)
-               return XFS_ERROR(error);
-
-       ASSERT((mask & ATTR_SIZE) == 0);
-
-       /*
-        * If disk quotas is on, we make sure that the dquots do exist on disk,
-        * before we start any other transactions. Trying to do this later
-        * is messy. We don't care to take a readlock to look at the ids
-        * in inode here, because we can't hold it across the trans_reserve.
-        * If the IDs do change before we take the ilock, we're covered
-        * because the i_*dquot fields will get updated anyway.
-        */
-       if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
-               uint    qflags = 0;
-
-               if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
-                       uid = iattr->ia_uid;
-                       qflags |= XFS_QMOPT_UQUOTA;
-               } else {
-                       uid = ip->i_d.di_uid;
-               }
-               if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
-                       gid = iattr->ia_gid;
-                       qflags |= XFS_QMOPT_GQUOTA;
-               }  else {
-                       gid = ip->i_d.di_gid;
-               }
-
-               /*
-                * We take a reference when we initialize udqp and gdqp,
-                * so it is important that we never blindly double trip on
-                * the same variable. See xfs_create() for an example.
-                */
-               ASSERT(udqp == NULL);
-               ASSERT(gdqp == NULL);
-               error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
-                                        qflags, &udqp, &gdqp);
-               if (error)
-                       return error;
-       }
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
-       error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
-       if (error)
-               goto out_dqrele;
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-       /*
-        * Change file ownership.  Must be the owner or privileged.
-        */
-       if (mask & (ATTR_UID|ATTR_GID)) {
-               /*
-                * These IDs could have changed since we last looked at them.
-                * But, we're assured that if the ownership did change
-                * while we didn't have the inode locked, inode's dquot(s)
-                * would have changed also.
-                */
-               iuid = ip->i_d.di_uid;
-               igid = ip->i_d.di_gid;
-               gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
-               uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
-
-               /*
-                * Do a quota reservation only if uid/gid is actually
-                * going to change.
-                */
-               if (XFS_IS_QUOTA_RUNNING(mp) &&
-                   ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
-                    (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
-                       ASSERT(tp);
-                       error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
-                                               capable(CAP_FOWNER) ?
-                                               XFS_QMOPT_FORCE_RES : 0);
-                       if (error)      /* out of quota */
-                               goto out_trans_cancel;
-               }
-       }
-
-       xfs_trans_ijoin(tp, ip);
-
-       /*
-        * Change file ownership.  Must be the owner or privileged.
-        */
-       if (mask & (ATTR_UID|ATTR_GID)) {
-               /*
-                * CAP_FSETID overrides the following restrictions:
-                *
-                * The set-user-ID and set-group-ID bits of a file will be
-                * cleared upon successful return from chown()
-                */
-               if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
-                   !capable(CAP_FSETID))
-                       ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
-
-               /*
-                * Change the ownerships and register quota modifications
-                * in the transaction.
-                */
-               if (iuid != uid) {
-                       if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
-                               ASSERT(mask & ATTR_UID);
-                               ASSERT(udqp);
-                               olddquot1 = xfs_qm_vop_chown(tp, ip,
-                                                       &ip->i_udquot, udqp);
-                       }
-                       ip->i_d.di_uid = uid;
-                       inode->i_uid = uid;
-               }
-               if (igid != gid) {
-                       if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
-                               ASSERT(!XFS_IS_PQUOTA_ON(mp));
-                               ASSERT(mask & ATTR_GID);
-                               ASSERT(gdqp);
-                               olddquot2 = xfs_qm_vop_chown(tp, ip,
-                                                       &ip->i_gdquot, gdqp);
-                       }
-                       ip->i_d.di_gid = gid;
-                       inode->i_gid = gid;
-               }
-       }
-
-       /*
-        * Change file access modes.
-        */
-       if (mask & ATTR_MODE) {
-               umode_t mode = iattr->ia_mode;
-
-               if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
-                       mode &= ~S_ISGID;
-
-               ip->i_d.di_mode &= S_IFMT;
-               ip->i_d.di_mode |= mode & ~S_IFMT;
-
-               inode->i_mode &= S_IFMT;
-               inode->i_mode |= mode & ~S_IFMT;
-       }
-
-       /*
-        * Change file access or modified times.
-        */
-       if (mask & ATTR_ATIME) {
-               inode->i_atime = iattr->ia_atime;
-               ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
-               ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
-               ip->i_update_core = 1;
-       }
-       if (mask & ATTR_CTIME) {
-               inode->i_ctime = iattr->ia_ctime;
-               ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
-               ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
-               ip->i_update_core = 1;
-       }
-       if (mask & ATTR_MTIME) {
-               inode->i_mtime = iattr->ia_mtime;
-               ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
-               ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
-               ip->i_update_core = 1;
-       }
-
-       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
-       XFS_STATS_INC(xs_ig_attrchg);
-
-       if (mp->m_flags & XFS_MOUNT_WSYNC)
-               xfs_trans_set_sync(tp);
-       error = xfs_trans_commit(tp, 0);
-
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-       /*
-        * Release any dquot(s) the inode had kept before chown.
-        */
-       xfs_qm_dqrele(olddquot1);
-       xfs_qm_dqrele(olddquot2);
-       xfs_qm_dqrele(udqp);
-       xfs_qm_dqrele(gdqp);
-
-       if (error)
-               return XFS_ERROR(error);
-
-       /*
-        * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
-        *           update.  We could avoid this with linked transactions
-        *           and passing down the transaction pointer all the way
-        *           to attr_set.  No previous user of the generic
-        *           Posix ACL code seems to care about this issue either.
-        */
-       if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
-               error = -xfs_acl_chmod(inode);
-               if (error)
-                       return XFS_ERROR(error);
-       }
-
-       return 0;
-
-out_trans_cancel:
-       xfs_trans_cancel(tp, 0);
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-out_dqrele:
-       xfs_qm_dqrele(udqp);
-       xfs_qm_dqrele(gdqp);
-       return error;
-}
-
-/*
- * Truncate file.  Must have write permission and not be a directory.
- */
-int
-xfs_setattr_size(
-       struct xfs_inode        *ip,
-       struct iattr            *iattr,
-       int                     flags)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       struct inode            *inode = VFS_I(ip);
-       int                     mask = iattr->ia_valid;
-       struct xfs_trans        *tp;
-       int                     error;
-       uint                    lock_flags;
-       uint                    commit_flags = 0;
-
-       trace_xfs_setattr(ip);
-
-       if (mp->m_flags & XFS_MOUNT_RDONLY)
-               return XFS_ERROR(EROFS);
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return XFS_ERROR(EIO);
-
-       error = -inode_change_ok(inode, iattr);
-       if (error)
-               return XFS_ERROR(error);
-
-       ASSERT(S_ISREG(ip->i_d.di_mode));
-       ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
-                       ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID|
-                       ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
-
-       lock_flags = XFS_ILOCK_EXCL;
-       if (!(flags & XFS_ATTR_NOLOCK))
-               lock_flags |= XFS_IOLOCK_EXCL;
-       xfs_ilock(ip, lock_flags);
-
-       /*
-        * Short circuit the truncate case for zero length files.
-        */
-       if (iattr->ia_size == 0 &&
-           ip->i_size == 0 && ip->i_d.di_nextents == 0) {
-               if (!(mask & (ATTR_CTIME|ATTR_MTIME)))
-                       goto out_unlock;
-
-               /*
-                * Use the regular setattr path to update the timestamps.
-                */
-               xfs_iunlock(ip, lock_flags);
-               iattr->ia_valid &= ~ATTR_SIZE;
-               return xfs_setattr_nonsize(ip, iattr, 0);
-       }
-
-       /*
-        * Make sure that the dquots are attached to the inode.
-        */
-       error = xfs_qm_dqattach_locked(ip, 0);
-       if (error)
-               goto out_unlock;
-
-       /*
-        * Now we can make the changes.  Before we join the inode to the
-        * transaction, take care of the part of the truncation that must be
-        * done without the inode lock.  This needs to be done before joining
-        * the inode to the transaction, because the inode cannot be unlocked
-        * once it is a part of the transaction.
-        */
-       if (iattr->ia_size > ip->i_size) {
-               /*
-                * Do the first part of growing a file: zero any data in the
-                * last block that is beyond the old EOF.  We need to do this
-                * before the inode is joined to the transaction to modify
-                * i_size.
-                */
-               error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
-               if (error)
-                       goto out_unlock;
-       }
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       lock_flags &= ~XFS_ILOCK_EXCL;
-
-       /*
-        * We are going to log the inode size change in this transaction so
-        * any previous writes that are beyond the on disk EOF and the new
-        * EOF that have not been written out need to be written here.  If we
-        * do not write the data out, we expose ourselves to the null files
-        * problem.
-        *
-        * Only flush from the on disk size to the smaller of the in memory
-        * file size or the new size as that's the range we really care about
-        * here and prevents waiting for other data not within the range we
-        * care about here.
-        */
-       if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) {
-               error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size,
-                                       XBF_ASYNC, FI_NONE);
-               if (error)
-                       goto out_unlock;
-       }
-
-       /*
-        * Wait for all I/O to complete.
-        */
-       xfs_ioend_wait(ip);
-
-       error = -block_truncate_page(inode->i_mapping, iattr->ia_size,
-                                    xfs_get_blocks);
-       if (error)
-               goto out_unlock;
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
-       error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
-                                XFS_TRANS_PERM_LOG_RES,
-                                XFS_ITRUNCATE_LOG_COUNT);
-       if (error)
-               goto out_trans_cancel;
-
-       truncate_setsize(inode, iattr->ia_size);
-
-       commit_flags = XFS_TRANS_RELEASE_LOG_RES;
-       lock_flags |= XFS_ILOCK_EXCL;
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-       xfs_trans_ijoin(tp, ip);
-
-       /*
-        * Only change the c/mtime if we are changing the size or we are
-        * explicitly asked to change it.  This handles the semantic difference
-        * between truncate() and ftruncate() as implemented in the VFS.
-        *
-        * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
-        * special case where we need to update the times despite not having
-        * these flags set.  For all other operations the VFS set these flags
-        * explicitly if it wants a timestamp update.
-        */
-       if (iattr->ia_size != ip->i_size &&
-           (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
-               iattr->ia_ctime = iattr->ia_mtime =
-                       current_fs_time(inode->i_sb);
-               mask |= ATTR_CTIME | ATTR_MTIME;
-       }
-
-       if (iattr->ia_size > ip->i_size) {
-               ip->i_d.di_size = iattr->ia_size;
-               ip->i_size = iattr->ia_size;
-       } else if (iattr->ia_size <= ip->i_size ||
-                  (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
-               error = xfs_itruncate_data(&tp, ip, iattr->ia_size);
-               if (error)
-                       goto out_trans_abort;
-
-               /*
-                * Truncated "down", so we're removing references to old data
-                * here - if we delay flushing for a long time, we expose
-                * ourselves unduly to the notorious NULL files problem.  So,
-                * we mark this inode and flush it when the file is closed,
-                * and do not wait the usual (long) time for writeout.
-                */
-               xfs_iflags_set(ip, XFS_ITRUNCATED);
-       }
-
-       if (mask & ATTR_CTIME) {
-               inode->i_ctime = iattr->ia_ctime;
-               ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
-               ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
-               ip->i_update_core = 1;
-       }
-       if (mask & ATTR_MTIME) {
-               inode->i_mtime = iattr->ia_mtime;
-               ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
-               ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
-               ip->i_update_core = 1;
-       }
-
-       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
-       XFS_STATS_INC(xs_ig_attrchg);
-
-       if (mp->m_flags & XFS_MOUNT_WSYNC)
-               xfs_trans_set_sync(tp);
-
-       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-out_unlock:
-       if (lock_flags)
-               xfs_iunlock(ip, lock_flags);
-       return error;
-
-out_trans_abort:
-       commit_flags |= XFS_TRANS_ABORT;
-out_trans_cancel:
-       xfs_trans_cancel(tp, commit_flags);
-       goto out_unlock;
-}
-
-STATIC int
-xfs_vn_setattr(
-       struct dentry   *dentry,
-       struct iattr    *iattr)
-{
-       if (iattr->ia_valid & ATTR_SIZE)
-               return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0);
-       return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0);
-}
-
-#define XFS_FIEMAP_FLAGS       (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
-
-/*
- * Call fiemap helper to fill in user data.
- * Returns positive errors to xfs_getbmap.
- */
-STATIC int
-xfs_fiemap_format(
-       void                    **arg,
-       struct getbmapx         *bmv,
-       int                     *full)
-{
-       int                     error;
-       struct fiemap_extent_info *fieinfo = *arg;
-       u32                     fiemap_flags = 0;
-       u64                     logical, physical, length;
-
-       /* Do nothing for a hole */
-       if (bmv->bmv_block == -1LL)
-               return 0;
-
-       logical = BBTOB(bmv->bmv_offset);
-       physical = BBTOB(bmv->bmv_block);
-       length = BBTOB(bmv->bmv_length);
-
-       if (bmv->bmv_oflags & BMV_OF_PREALLOC)
-               fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;
-       else if (bmv->bmv_oflags & BMV_OF_DELALLOC) {
-               fiemap_flags |= FIEMAP_EXTENT_DELALLOC;
-               physical = 0;   /* no block yet */
-       }
-       if (bmv->bmv_oflags & BMV_OF_LAST)
-               fiemap_flags |= FIEMAP_EXTENT_LAST;
-
-       error = fiemap_fill_next_extent(fieinfo, logical, physical,
-                                       length, fiemap_flags);
-       if (error > 0) {
-               error = 0;
-               *full = 1;      /* user array now full */
-       }
-
-       return -error;
-}
-
-STATIC int
-xfs_vn_fiemap(
-       struct inode            *inode,
-       struct fiemap_extent_info *fieinfo,
-       u64                     start,
-       u64                     length)
-{
-       xfs_inode_t             *ip = XFS_I(inode);
-       struct getbmapx         bm;
-       int                     error;
-
-       error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS);
-       if (error)
-               return error;
-
-       /* Set up bmap header for xfs internal routine */
-       bm.bmv_offset = BTOBB(start);
-       /* Special case for whole file */
-       if (length == FIEMAP_MAX_OFFSET)
-               bm.bmv_length = -1LL;
-       else
-               bm.bmv_length = BTOBB(length);
-
-       /* We add one because in getbmap world count includes the header */
-       bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
-                                       fieinfo->fi_extents_max + 1;
-       bm.bmv_count = min_t(__s32, bm.bmv_count,
-                            (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
-       bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
-       if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
-               bm.bmv_iflags |= BMV_IF_ATTRFORK;
-       if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
-               bm.bmv_iflags |= BMV_IF_DELALLOC;
-
-       error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
-       if (error)
-               return -error;
-
-       return 0;
-}
-
-static const struct inode_operations xfs_inode_operations = {
-       .get_acl                = xfs_get_acl,
-       .getattr                = xfs_vn_getattr,
-       .setattr                = xfs_vn_setattr,
-       .setxattr               = generic_setxattr,
-       .getxattr               = generic_getxattr,
-       .removexattr            = generic_removexattr,
-       .listxattr              = xfs_vn_listxattr,
-       .fiemap                 = xfs_vn_fiemap,
-};
-
-static const struct inode_operations xfs_dir_inode_operations = {
-       .create                 = xfs_vn_create,
-       .lookup                 = xfs_vn_lookup,
-       .link                   = xfs_vn_link,
-       .unlink                 = xfs_vn_unlink,
-       .symlink                = xfs_vn_symlink,
-       .mkdir                  = xfs_vn_mkdir,
-       /*
-        * Yes, XFS uses the same method for rmdir and unlink.
-        *
-        * There are some subtile differences deeper in the code,
-        * but we use S_ISDIR to check for those.
-        */
-       .rmdir                  = xfs_vn_unlink,
-       .mknod                  = xfs_vn_mknod,
-       .rename                 = xfs_vn_rename,
-       .get_acl                = xfs_get_acl,
-       .getattr                = xfs_vn_getattr,
-       .setattr                = xfs_vn_setattr,
-       .setxattr               = generic_setxattr,
-       .getxattr               = generic_getxattr,
-       .removexattr            = generic_removexattr,
-       .listxattr              = xfs_vn_listxattr,
-};
-
-static const struct inode_operations xfs_dir_ci_inode_operations = {
-       .create                 = xfs_vn_create,
-       .lookup                 = xfs_vn_ci_lookup,
-       .link                   = xfs_vn_link,
-       .unlink                 = xfs_vn_unlink,
-       .symlink                = xfs_vn_symlink,
-       .mkdir                  = xfs_vn_mkdir,
-       /*
-        * Yes, XFS uses the same method for rmdir and unlink.
-        *
-        * There are some subtile differences deeper in the code,
-        * but we use S_ISDIR to check for those.
-        */
-       .rmdir                  = xfs_vn_unlink,
-       .mknod                  = xfs_vn_mknod,
-       .rename                 = xfs_vn_rename,
-       .get_acl                = xfs_get_acl,
-       .getattr                = xfs_vn_getattr,
-       .setattr                = xfs_vn_setattr,
-       .setxattr               = generic_setxattr,
-       .getxattr               = generic_getxattr,
-       .removexattr            = generic_removexattr,
-       .listxattr              = xfs_vn_listxattr,
-};
-
-static const struct inode_operations xfs_symlink_inode_operations = {
-       .readlink               = generic_readlink,
-       .follow_link            = xfs_vn_follow_link,
-       .put_link               = xfs_vn_put_link,
-       .get_acl                = xfs_get_acl,
-       .getattr                = xfs_vn_getattr,
-       .setattr                = xfs_vn_setattr,
-       .setxattr               = generic_setxattr,
-       .getxattr               = generic_getxattr,
-       .removexattr            = generic_removexattr,
-       .listxattr              = xfs_vn_listxattr,
-};
-
-STATIC void
-xfs_diflags_to_iflags(
-       struct inode            *inode,
-       struct xfs_inode        *ip)
-{
-       if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
-               inode->i_flags |= S_IMMUTABLE;
-       else
-               inode->i_flags &= ~S_IMMUTABLE;
-       if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
-               inode->i_flags |= S_APPEND;
-       else
-               inode->i_flags &= ~S_APPEND;
-       if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
-               inode->i_flags |= S_SYNC;
-       else
-               inode->i_flags &= ~S_SYNC;
-       if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
-               inode->i_flags |= S_NOATIME;
-       else
-               inode->i_flags &= ~S_NOATIME;
-}
-
-/*
- * Initialize the Linux inode, set up the operation vectors and
- * unlock the inode.
- *
- * When reading existing inodes from disk this is called directly
- * from xfs_iget, when creating a new inode it is called from
- * xfs_ialloc after setting up the inode.
- *
- * We are always called with an uninitialised linux inode here.
- * We need to initialise the necessary fields and take a reference
- * on it.
- */
-void
-xfs_setup_inode(
-       struct xfs_inode        *ip)
-{
-       struct inode            *inode = &ip->i_vnode;
-
-       inode->i_ino = ip->i_ino;
-       inode->i_state = I_NEW;
-
-       inode_sb_list_add(inode);
-       /* make the inode look hashed for the writeback code */
-       hlist_add_fake(&inode->i_hash);
-
-       inode->i_mode   = ip->i_d.di_mode;
-       inode->i_nlink  = ip->i_d.di_nlink;
-       inode->i_uid    = ip->i_d.di_uid;
-       inode->i_gid    = ip->i_d.di_gid;
-
-       switch (inode->i_mode & S_IFMT) {
-       case S_IFBLK:
-       case S_IFCHR:
-               inode->i_rdev =
-                       MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
-                             sysv_minor(ip->i_df.if_u2.if_rdev));
-               break;
-       default:
-               inode->i_rdev = 0;
-               break;
-       }
-
-       inode->i_generation = ip->i_d.di_gen;
-       i_size_write(inode, ip->i_d.di_size);
-       inode->i_atime.tv_sec   = ip->i_d.di_atime.t_sec;
-       inode->i_atime.tv_nsec  = ip->i_d.di_atime.t_nsec;
-       inode->i_mtime.tv_sec   = ip->i_d.di_mtime.t_sec;
-       inode->i_mtime.tv_nsec  = ip->i_d.di_mtime.t_nsec;
-       inode->i_ctime.tv_sec   = ip->i_d.di_ctime.t_sec;
-       inode->i_ctime.tv_nsec  = ip->i_d.di_ctime.t_nsec;
-       xfs_diflags_to_iflags(inode, ip);
-
-       switch (inode->i_mode & S_IFMT) {
-       case S_IFREG:
-               inode->i_op = &xfs_inode_operations;
-               inode->i_fop = &xfs_file_operations;
-               inode->i_mapping->a_ops = &xfs_address_space_operations;
-               break;
-       case S_IFDIR:
-               if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
-                       inode->i_op = &xfs_dir_ci_inode_operations;
-               else
-                       inode->i_op = &xfs_dir_inode_operations;
-               inode->i_fop = &xfs_dir_file_operations;
-               break;
-       case S_IFLNK:
-               inode->i_op = &xfs_symlink_inode_operations;
-               if (!(ip->i_df.if_flags & XFS_IFINLINE))
-                       inode->i_mapping->a_ops = &xfs_address_space_operations;
-               break;
-       default:
-               inode->i_op = &xfs_inode_operations;
-               init_special_inode(inode, inode->i_mode, inode->i_rdev);
-               break;
-       }
-
-       /*
-        * If there is no attribute fork no ACL can exist on this inode,
-        * and it can't have any file capabilities attached to it either.
-        */
-       if (!XFS_IFORK_Q(ip)) {
-               inode_has_no_xattr(inode);
-               cache_no_acl(inode);
-       }
-
-       xfs_iflags_clear(ip, XFS_INEW);
-       barrier();
-
-       unlock_new_inode(inode);
-}
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h

deleted file mode 100644 (file)

index ef41c92..0000000
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_IOPS_H__
-#define __XFS_IOPS_H__
-
-struct xfs_inode;
-
-extern const struct file_operations xfs_file_operations;
-extern const struct file_operations xfs_dir_file_operations;
-
-extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
-
-extern void xfs_setup_inode(struct xfs_inode *);
-
-#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h

deleted file mode 100644 (file)

index d42f814..0000000
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ /dev/null
@@ -1,310 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_LINUX__
-#define __XFS_LINUX__
-
-#include <linux/types.h>
-
-/*
- * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
- * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set.
- */
-#if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64)
-# define XFS_BIG_BLKNOS        1
-# define XFS_BIG_INUMS 1
-#else
-# define XFS_BIG_BLKNOS        0
-# define XFS_BIG_INUMS 0
-#endif
-
-#include <xfs_types.h>
-
-#include <kmem.h>
-#include <mrlock.h>
-#include <time.h>
-
-#include <support/uuid.h>
-
-#include <linux/semaphore.h>
-#include <linux/mm.h>
-#include <linux/kernel.h>
-#include <linux/blkdev.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/file.h>
-#include <linux/swap.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/bitops.h>
-#include <linux/major.h>
-#include <linux/pagemap.h>
-#include <linux/vfs.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/proc_fs.h>
-#include <linux/sort.h>
-#include <linux/cpu.h>
-#include <linux/notifier.h>
-#include <linux/delay.h>
-#include <linux/log2.h>
-#include <linux/spinlock.h>
-#include <linux/random.h>
-#include <linux/ctype.h>
-#include <linux/writeback.h>
-#include <linux/capability.h>
-#include <linux/list_sort.h>
-
-#include <asm/page.h>
-#include <asm/div64.h>
-#include <asm/param.h>
-#include <asm/uaccess.h>
-#include <asm/byteorder.h>
-#include <asm/unaligned.h>
-
-#include <xfs_vnode.h>
-#include <xfs_stats.h>
-#include <xfs_sysctl.h>
-#include <xfs_iops.h>
-#include <xfs_aops.h>
-#include <xfs_super.h>
-#include <xfs_buf.h>
-#include <xfs_message.h>
-
-#ifdef __BIG_ENDIAN
-#define XFS_NATIVE_HOST 1
-#else
-#undef XFS_NATIVE_HOST
-#endif
-
-/*
- * Feature macros (disable/enable)
- */
-#ifdef CONFIG_SMP
-#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
-#else
-#undef  HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
-#endif
-
-#define irix_sgid_inherit      xfs_params.sgid_inherit.val
-#define irix_symlink_mode      xfs_params.symlink_mode.val
-#define xfs_panic_mask         xfs_params.panic_mask.val
-#define xfs_error_level                xfs_params.error_level.val
-#define xfs_syncd_centisecs    xfs_params.syncd_timer.val
-#define xfs_stats_clear                xfs_params.stats_clear.val
-#define xfs_inherit_sync       xfs_params.inherit_sync.val
-#define xfs_inherit_nodump     xfs_params.inherit_nodump.val
-#define xfs_inherit_noatime    xfs_params.inherit_noatim.val
-#define xfs_buf_timer_centisecs        xfs_params.xfs_buf_timer.val
-#define xfs_buf_age_centisecs  xfs_params.xfs_buf_age.val
-#define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val
-#define xfs_rotorstep          xfs_params.rotorstep.val
-#define xfs_inherit_nodefrag   xfs_params.inherit_nodfrg.val
-#define xfs_fstrm_centisecs    xfs_params.fstrm_timer.val
-
-#define current_cpu()          (raw_smp_processor_id())
-#define current_pid()          (current->pid)
-#define current_test_flags(f)  (current->flags & (f))
-#define current_set_flags_nested(sp, f)                \
-               (*(sp) = current->flags, current->flags |= (f))
-#define current_clear_flags_nested(sp, f)      \
-               (*(sp) = current->flags, current->flags &= ~(f))
-#define current_restore_flags_nested(sp, f)    \
-               (current->flags = ((current->flags & ~(f)) | (*(sp) & (f))))
-
-#define spinlock_destroy(lock)
-
-#define NBBY           8               /* number of bits per byte */
-
-/*
- * Size of block device i/o is parameterized here.
- * Currently the system supports page-sized i/o.
- */
-#define        BLKDEV_IOSHIFT          PAGE_CACHE_SHIFT
-#define        BLKDEV_IOSIZE           (1<<BLKDEV_IOSHIFT)
-/* number of BB's per block device block */
-#define        BLKDEV_BB               BTOBB(BLKDEV_IOSIZE)
-
-#define ENOATTR                ENODATA         /* Attribute not found */
-#define EWRONGFS       EINVAL          /* Mount with wrong filesystem type */
-#define EFSCORRUPTED   EUCLEAN         /* Filesystem is corrupted */
-
-#define SYNCHRONIZE()  barrier()
-#define __return_address __builtin_return_address(0)
-
-#define XFS_PROJID_DEFAULT     0
-#define MAXPATHLEN     1024
-
-#define MIN(a,b)       (min(a,b))
-#define MAX(a,b)       (max(a,b))
-#define howmany(x, y)  (((x)+((y)-1))/(y))
-
-/*
- * Various platform dependent calls that don't fit anywhere else
- */
-#define xfs_sort(a,n,s,fn)     sort(a,n,s,fn,NULL)
-#define xfs_stack_trace()      dump_stack()
-
-
-/* Move the kernel do_div definition off to one side */
-
-#if defined __i386__
-/* For ia32 we need to pull some tricks to get past various versions
- * of the compiler which do not like us using do_div in the middle
- * of large functions.
- */
-static inline __u32 xfs_do_div(void *a, __u32 b, int n)
-{
-       __u32   mod;
-
-       switch (n) {
-               case 4:
-                       mod = *(__u32 *)a % b;
-                       *(__u32 *)a = *(__u32 *)a / b;
-                       return mod;
-               case 8:
-                       {
-                       unsigned long __upper, __low, __high, __mod;
-                       __u64   c = *(__u64 *)a;
-                       __upper = __high = c >> 32;
-                       __low = c;
-                       if (__high) {
-                               __upper = __high % (b);
-                               __high = __high / (b);
-                       }
-                       asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
-                       asm("":"=A" (c):"a" (__low),"d" (__high));
-                       *(__u64 *)a = c;
-                       return __mod;
-                       }
-       }
-
-       /* NOTREACHED */
-       return 0;
-}
-
-/* Side effect free 64 bit mod operation */
-static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
-{
-       switch (n) {
-               case 4:
-                       return *(__u32 *)a % b;
-               case 8:
-                       {
-                       unsigned long __upper, __low, __high, __mod;
-                       __u64   c = *(__u64 *)a;
-                       __upper = __high = c >> 32;
-                       __low = c;
-                       if (__high) {
-                               __upper = __high % (b);
-                               __high = __high / (b);
-                       }
-                       asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
-                       asm("":"=A" (c):"a" (__low),"d" (__high));
-                       return __mod;
-                       }
-       }
-
-       /* NOTREACHED */
-       return 0;
-}
-#else
-static inline __u32 xfs_do_div(void *a, __u32 b, int n)
-{
-       __u32   mod;
-
-       switch (n) {
-               case 4:
-                       mod = *(__u32 *)a % b;
-                       *(__u32 *)a = *(__u32 *)a / b;
-                       return mod;
-               case 8:
-                       mod = do_div(*(__u64 *)a, b);
-                       return mod;
-       }
-
-       /* NOTREACHED */
-       return 0;
-}
-
-/* Side effect free 64 bit mod operation */
-static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
-{
-       switch (n) {
-               case 4:
-                       return *(__u32 *)a % b;
-               case 8:
-                       {
-                       __u64   c = *(__u64 *)a;
-                       return do_div(c, b);
-                       }
-       }
-
-       /* NOTREACHED */
-       return 0;
-}
-#endif
-
-#undef do_div
-#define do_div(a, b)   xfs_do_div(&(a), (b), sizeof(a))
-#define do_mod(a, b)   xfs_do_mod(&(a), (b), sizeof(a))
-
-static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
-{
-       x += y - 1;
-       do_div(x, y);
-       return(x * y);
-}
-
-static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
-{
-       x += y - 1;
-       do_div(x, y);
-       return x;
-}
-
-/* ARM old ABI has some weird alignment/padding */
-#if defined(__arm__) && !defined(__ARM_EABI__)
-#define __arch_pack __attribute__((packed))
-#else
-#define __arch_pack
-#endif
-
-#define ASSERT_ALWAYS(expr)    \
-       (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
-
-#ifndef DEBUG
-#define ASSERT(expr)   ((void)0)
-
-#ifndef STATIC
-# define STATIC static noinline
-#endif
-
-#else /* DEBUG */
-
-#define ASSERT(expr)   \
-       (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
-
-#ifndef STATIC
-# define STATIC noinline
-#endif
-
-#endif /* DEBUG */
-
-#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c

deleted file mode 100644 (file)

index bd672de..0000000
--- a/fs/xfs/linux-2.6/xfs_message.c
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2011 Red Hat, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-
-/*
- * XFS logging functions
- */
-static void
-__xfs_printk(
-       const char              *level,
-       const struct xfs_mount  *mp,
-       struct va_format        *vaf)
-{
-       if (mp && mp->m_fsname) {
-               printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf);
-               return;
-       }
-       printk("%sXFS: %pV\n", level, vaf);
-}
-
-#define define_xfs_printk_level(func, kern_level)              \
-void func(const struct xfs_mount *mp, const char *fmt, ...)    \
-{                                                              \
-       struct va_format        vaf;                            \
-       va_list                 args;                           \
-                                                               \
-       va_start(args, fmt);                                    \
-                                                               \
-       vaf.fmt = fmt;                                          \
-       vaf.va = &args;                                         \
-                                                               \
-       __xfs_printk(kern_level, mp, &vaf);                     \
-       va_end(args);                                           \
-}                                                              \
-
-define_xfs_printk_level(xfs_emerg, KERN_EMERG);
-define_xfs_printk_level(xfs_alert, KERN_ALERT);
-define_xfs_printk_level(xfs_crit, KERN_CRIT);
-define_xfs_printk_level(xfs_err, KERN_ERR);
-define_xfs_printk_level(xfs_warn, KERN_WARNING);
-define_xfs_printk_level(xfs_notice, KERN_NOTICE);
-define_xfs_printk_level(xfs_info, KERN_INFO);
-#ifdef DEBUG
-define_xfs_printk_level(xfs_debug, KERN_DEBUG);
-#endif
-
-void
-xfs_alert_tag(
-       const struct xfs_mount  *mp,
-       int                     panic_tag,
-       const char              *fmt, ...)
-{
-       struct va_format        vaf;
-       va_list                 args;
-       int                     do_panic = 0;
-
-       if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
-               xfs_alert(mp, "Transforming an alert into a BUG.");
-               do_panic = 1;
-       }
-
-       va_start(args, fmt);
-
-       vaf.fmt = fmt;
-       vaf.va = &args;
-
-       __xfs_printk(KERN_ALERT, mp, &vaf);
-       va_end(args);
-
-       BUG_ON(do_panic);
-}
-
-void
-assfail(char *expr, char *file, int line)
-{
-       xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d",
-               expr, file, line);
-       BUG();
-}
-
-void
-xfs_hex_dump(void *p, int length)
-{
-       print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
-}
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h

deleted file mode 100644 (file)

index 7fb7ea0..0000000
--- a/fs/xfs/linux-2.6/xfs_message.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef __XFS_MESSAGE_H
-#define __XFS_MESSAGE_H 1
-
-struct xfs_mount;
-
-extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_alert_tag(const struct xfs_mount *mp, int tag,
-                        const char *fmt, ...)
-        __attribute__ ((format (printf, 3, 4)));
-extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-
-#ifdef DEBUG
-extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
-        __attribute__ ((format (printf, 2, 3)));
-#else
-static inline void
-__attribute__ ((format (printf, 2, 3)))
-xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
-{
-}
-#endif
-
-extern void assfail(char *expr, char *f, int l);
-
-extern void xfs_hex_dump(void *p, int length);
-
-#endif /* __XFS_MESSAGE_H */
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c

deleted file mode 100644 (file)

index 29b9d64..0000000
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2008, Christoph Hellwig
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_sb.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_quota.h"
-#include "xfs_trans.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "quota/xfs_qm.h"
-#include <linux/quota.h>
-
-
-STATIC int
-xfs_quota_type(int type)
-{
-       switch (type) {
-       case USRQUOTA:
-               return XFS_DQ_USER;
-       case GRPQUOTA:
-               return XFS_DQ_GROUP;
-       default:
-               return XFS_DQ_PROJ;
-       }
-}
-
-STATIC int
-xfs_fs_get_xstate(
-       struct super_block      *sb,
-       struct fs_quota_stat    *fqs)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-
-       if (!XFS_IS_QUOTA_RUNNING(mp))
-               return -ENOSYS;
-       return -xfs_qm_scall_getqstat(mp, fqs);
-}
-
-STATIC int
-xfs_fs_set_xstate(
-       struct super_block      *sb,
-       unsigned int            uflags,
-       int                     op)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-       unsigned int            flags = 0;
-
-       if (sb->s_flags & MS_RDONLY)
-               return -EROFS;
-       if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp))
-               return -ENOSYS;
-
-       if (uflags & FS_QUOTA_UDQ_ACCT)
-               flags |= XFS_UQUOTA_ACCT;
-       if (uflags & FS_QUOTA_PDQ_ACCT)
-               flags |= XFS_PQUOTA_ACCT;
-       if (uflags & FS_QUOTA_GDQ_ACCT)
-               flags |= XFS_GQUOTA_ACCT;
-       if (uflags & FS_QUOTA_UDQ_ENFD)
-               flags |= XFS_UQUOTA_ENFD;
-       if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD))
-               flags |= XFS_OQUOTA_ENFD;
-
-       switch (op) {
-       case Q_XQUOTAON:
-               return -xfs_qm_scall_quotaon(mp, flags);
-       case Q_XQUOTAOFF:
-               if (!XFS_IS_QUOTA_ON(mp))
-                       return -EINVAL;
-               return -xfs_qm_scall_quotaoff(mp, flags);
-       case Q_XQUOTARM:
-               if (XFS_IS_QUOTA_ON(mp))
-                       return -EINVAL;
-               return -xfs_qm_scall_trunc_qfiles(mp, flags);
-       }
-
-       return -EINVAL;
-}
-
-STATIC int
-xfs_fs_get_dqblk(
-       struct super_block      *sb,
-       int                     type,
-       qid_t                   id,
-       struct fs_disk_quota    *fdq)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-
-       if (!XFS_IS_QUOTA_RUNNING(mp))
-               return -ENOSYS;
-       if (!XFS_IS_QUOTA_ON(mp))
-               return -ESRCH;
-
-       return -xfs_qm_scall_getquota(mp, id, xfs_quota_type(type), fdq);
-}
-
-STATIC int
-xfs_fs_set_dqblk(
-       struct super_block      *sb,
-       int                     type,
-       qid_t                   id,
-       struct fs_disk_quota    *fdq)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-
-       if (sb->s_flags & MS_RDONLY)
-               return -EROFS;
-       if (!XFS_IS_QUOTA_RUNNING(mp))
-               return -ENOSYS;
-       if (!XFS_IS_QUOTA_ON(mp))
-               return -ESRCH;
-
-       return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq);
-}
-
-const struct quotactl_ops xfs_quotactl_operations = {
-       .get_xstate             = xfs_fs_get_xstate,
-       .set_xstate             = xfs_fs_set_xstate,
-       .get_dqblk              = xfs_fs_get_dqblk,
-       .set_dqblk              = xfs_fs_set_dqblk,
-};
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c

deleted file mode 100644 (file)

index 76fdc58..0000000
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include <linux/proc_fs.h>
-
-DEFINE_PER_CPU(struct xfsstats, xfsstats);
-
-static int xfs_stat_proc_show(struct seq_file *m, void *v)
-{
-       int             c, i, j, val;
-       __uint64_t      xs_xstrat_bytes = 0;
-       __uint64_t      xs_write_bytes = 0;
-       __uint64_t      xs_read_bytes = 0;
-
-       static const struct xstats_entry {
-               char    *desc;
-               int     endpoint;
-       } xstats[] = {
-               { "extent_alloc",       XFSSTAT_END_EXTENT_ALLOC        },
-               { "abt",                XFSSTAT_END_ALLOC_BTREE         },
-               { "blk_map",            XFSSTAT_END_BLOCK_MAPPING       },
-               { "bmbt",               XFSSTAT_END_BLOCK_MAP_BTREE     },
-               { "dir",                XFSSTAT_END_DIRECTORY_OPS       },
-               { "trans",              XFSSTAT_END_TRANSACTIONS        },
-               { "ig",                 XFSSTAT_END_INODE_OPS           },
-               { "log",                XFSSTAT_END_LOG_OPS             },
-               { "push_ail",           XFSSTAT_END_TAIL_PUSHING        },
-               { "xstrat",             XFSSTAT_END_WRITE_CONVERT       },
-               { "rw",                 XFSSTAT_END_READ_WRITE_OPS      },
-               { "attr",               XFSSTAT_END_ATTRIBUTE_OPS       },
-               { "icluster",           XFSSTAT_END_INODE_CLUSTER       },
-               { "vnodes",             XFSSTAT_END_VNODE_OPS           },
-               { "buf",                XFSSTAT_END_BUF                 },
-               { "abtb2",              XFSSTAT_END_ABTB_V2             },
-               { "abtc2",              XFSSTAT_END_ABTC_V2             },
-               { "bmbt2",              XFSSTAT_END_BMBT_V2             },
-               { "ibt2",               XFSSTAT_END_IBT_V2              },
-       };
-
-       /* Loop over all stats groups */
-       for (i=j = 0; i < ARRAY_SIZE(xstats); i++) {
-               seq_printf(m, "%s", xstats[i].desc);
-               /* inner loop does each group */
-               while (j < xstats[i].endpoint) {
-                       val = 0;
-                       /* sum over all cpus */
-                       for_each_possible_cpu(c)
-                               val += *(((__u32*)&per_cpu(xfsstats, c) + j));
-                       seq_printf(m, " %u", val);
-                       j++;
-               }
-               seq_putc(m, '\n');
-       }
-       /* extra precision counters */
-       for_each_possible_cpu(i) {
-               xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes;
-               xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes;
-               xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
-       }
-
-       seq_printf(m, "xpc %Lu %Lu %Lu\n",
-                       xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
-       seq_printf(m, "debug %u\n",
-#if defined(DEBUG)
-               1);
-#else
-               0);
-#endif
-       return 0;
-}
-
-static int xfs_stat_proc_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, xfs_stat_proc_show, NULL);
-}
-
-static const struct file_operations xfs_stat_proc_fops = {
-       .owner          = THIS_MODULE,
-       .open           = xfs_stat_proc_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = single_release,
-};
-
-int
-xfs_init_procfs(void)
-{
-       if (!proc_mkdir("fs/xfs", NULL))
-               goto out;
-
-       if (!proc_create("fs/xfs/stat", 0, NULL,
-                        &xfs_stat_proc_fops))
-               goto out_remove_entry;
-       return 0;
-
- out_remove_entry:
-       remove_proc_entry("fs/xfs", NULL);
- out:
-       return -ENOMEM;
-}
-
-void
-xfs_cleanup_procfs(void)
-{
-       remove_proc_entry("fs/xfs/stat", NULL);
-       remove_proc_entry("fs/xfs", NULL);
-}
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h

deleted file mode 100644 (file)

index 736854b..0000000
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright (c) 2000,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_STATS_H__
-#define __XFS_STATS_H__
-
-
-#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
-
-#include <linux/percpu.h>
-
-/*
- * XFS global statistics
- */
-struct xfsstats {
-# define XFSSTAT_END_EXTENT_ALLOC      4
-       __uint32_t              xs_allocx;
-       __uint32_t              xs_allocb;
-       __uint32_t              xs_freex;
-       __uint32_t              xs_freeb;
-# define XFSSTAT_END_ALLOC_BTREE       (XFSSTAT_END_EXTENT_ALLOC+4)
-       __uint32_t              xs_abt_lookup;
-       __uint32_t              xs_abt_compare;
-       __uint32_t              xs_abt_insrec;
-       __uint32_t              xs_abt_delrec;
-# define XFSSTAT_END_BLOCK_MAPPING     (XFSSTAT_END_ALLOC_BTREE+7)
-       __uint32_t              xs_blk_mapr;
-       __uint32_t              xs_blk_mapw;
-       __uint32_t              xs_blk_unmap;
-       __uint32_t              xs_add_exlist;
-       __uint32_t              xs_del_exlist;
-       __uint32_t              xs_look_exlist;
-       __uint32_t              xs_cmp_exlist;
-# define XFSSTAT_END_BLOCK_MAP_BTREE   (XFSSTAT_END_BLOCK_MAPPING+4)
-       __uint32_t              xs_bmbt_lookup;
-       __uint32_t              xs_bmbt_compare;
-       __uint32_t              xs_bmbt_insrec;
-       __uint32_t              xs_bmbt_delrec;
-# define XFSSTAT_END_DIRECTORY_OPS     (XFSSTAT_END_BLOCK_MAP_BTREE+4)
-       __uint32_t              xs_dir_lookup;
-       __uint32_t              xs_dir_create;
-       __uint32_t              xs_dir_remove;
-       __uint32_t              xs_dir_getdents;
-# define XFSSTAT_END_TRANSACTIONS      (XFSSTAT_END_DIRECTORY_OPS+3)
-       __uint32_t              xs_trans_sync;
-       __uint32_t              xs_trans_async;
-       __uint32_t              xs_trans_empty;
-# define XFSSTAT_END_INODE_OPS         (XFSSTAT_END_TRANSACTIONS+7)
-       __uint32_t              xs_ig_attempts;
-       __uint32_t              xs_ig_found;
-       __uint32_t              xs_ig_frecycle;
-       __uint32_t              xs_ig_missed;
-       __uint32_t              xs_ig_dup;
-       __uint32_t              xs_ig_reclaims;
-       __uint32_t              xs_ig_attrchg;
-# define XFSSTAT_END_LOG_OPS           (XFSSTAT_END_INODE_OPS+5)
-       __uint32_t              xs_log_writes;
-       __uint32_t              xs_log_blocks;
-       __uint32_t              xs_log_noiclogs;
-       __uint32_t              xs_log_force;
-       __uint32_t              xs_log_force_sleep;
-# define XFSSTAT_END_TAIL_PUSHING      (XFSSTAT_END_LOG_OPS+10)
-       __uint32_t              xs_try_logspace;
-       __uint32_t              xs_sleep_logspace;
-       __uint32_t              xs_push_ail;
-       __uint32_t              xs_push_ail_success;
-       __uint32_t              xs_push_ail_pushbuf;
-       __uint32_t              xs_push_ail_pinned;
-       __uint32_t              xs_push_ail_locked;
-       __uint32_t              xs_push_ail_flushing;
-       __uint32_t              xs_push_ail_restarts;
-       __uint32_t              xs_push_ail_flush;
-# define XFSSTAT_END_WRITE_CONVERT     (XFSSTAT_END_TAIL_PUSHING+2)
-       __uint32_t              xs_xstrat_quick;
-       __uint32_t              xs_xstrat_split;
-# define XFSSTAT_END_READ_WRITE_OPS    (XFSSTAT_END_WRITE_CONVERT+2)
-       __uint32_t              xs_write_calls;
-       __uint32_t              xs_read_calls;
-# define XFSSTAT_END_ATTRIBUTE_OPS     (XFSSTAT_END_READ_WRITE_OPS+4)
-       __uint32_t              xs_attr_get;
-       __uint32_t              xs_attr_set;
-       __uint32_t              xs_attr_remove;
-       __uint32_t              xs_attr_list;
-# define XFSSTAT_END_INODE_CLUSTER     (XFSSTAT_END_ATTRIBUTE_OPS+3)
-       __uint32_t              xs_iflush_count;
-       __uint32_t              xs_icluster_flushcnt;
-       __uint32_t              xs_icluster_flushinode;
-# define XFSSTAT_END_VNODE_OPS         (XFSSTAT_END_INODE_CLUSTER+8)
-       __uint32_t              vn_active;      /* # vnodes not on free lists */
-       __uint32_t              vn_alloc;       /* # times vn_alloc called */
-       __uint32_t              vn_get;         /* # times vn_get called */
-       __uint32_t              vn_hold;        /* # times vn_hold called */
-       __uint32_t              vn_rele;        /* # times vn_rele called */
-       __uint32_t              vn_reclaim;     /* # times vn_reclaim called */
-       __uint32_t              vn_remove;      /* # times vn_remove called */
-       __uint32_t              vn_free;        /* # times vn_free called */
-#define XFSSTAT_END_BUF                        (XFSSTAT_END_VNODE_OPS+9)
-       __uint32_t              xb_get;
-       __uint32_t              xb_create;
-       __uint32_t              xb_get_locked;
-       __uint32_t              xb_get_locked_waited;
-       __uint32_t              xb_busy_locked;
-       __uint32_t              xb_miss_locked;
-       __uint32_t              xb_page_retries;
-       __uint32_t              xb_page_found;
-       __uint32_t              xb_get_read;
-/* Version 2 btree counters */
-#define XFSSTAT_END_ABTB_V2            (XFSSTAT_END_BUF+15)
-       __uint32_t              xs_abtb_2_lookup;
-       __uint32_t              xs_abtb_2_compare;
-       __uint32_t              xs_abtb_2_insrec;
-       __uint32_t              xs_abtb_2_delrec;
-       __uint32_t              xs_abtb_2_newroot;
-       __uint32_t              xs_abtb_2_killroot;
-       __uint32_t              xs_abtb_2_increment;
-       __uint32_t              xs_abtb_2_decrement;
-       __uint32_t              xs_abtb_2_lshift;
-       __uint32_t              xs_abtb_2_rshift;
-       __uint32_t              xs_abtb_2_split;
-       __uint32_t              xs_abtb_2_join;
-       __uint32_t              xs_abtb_2_alloc;
-       __uint32_t              xs_abtb_2_free;
-       __uint32_t              xs_abtb_2_moves;
-#define XFSSTAT_END_ABTC_V2            (XFSSTAT_END_ABTB_V2+15)
-       __uint32_t              xs_abtc_2_lookup;
-       __uint32_t              xs_abtc_2_compare;
-       __uint32_t              xs_abtc_2_insrec;
-       __uint32_t              xs_abtc_2_delrec;
-       __uint32_t              xs_abtc_2_newroot;
-       __uint32_t              xs_abtc_2_killroot;
-       __uint32_t              xs_abtc_2_increment;
-       __uint32_t              xs_abtc_2_decrement;
-       __uint32_t              xs_abtc_2_lshift;
-       __uint32_t              xs_abtc_2_rshift;
-       __uint32_t              xs_abtc_2_split;
-       __uint32_t              xs_abtc_2_join;
-       __uint32_t              xs_abtc_2_alloc;
-       __uint32_t              xs_abtc_2_free;
-       __uint32_t              xs_abtc_2_moves;
-#define XFSSTAT_END_BMBT_V2            (XFSSTAT_END_ABTC_V2+15)
-       __uint32_t              xs_bmbt_2_lookup;
-       __uint32_t              xs_bmbt_2_compare;
-       __uint32_t              xs_bmbt_2_insrec;
-       __uint32_t              xs_bmbt_2_delrec;
-       __uint32_t              xs_bmbt_2_newroot;
-       __uint32_t              xs_bmbt_2_killroot;
-       __uint32_t              xs_bmbt_2_increment;
-       __uint32_t              xs_bmbt_2_decrement;
-       __uint32_t              xs_bmbt_2_lshift;
-       __uint32_t              xs_bmbt_2_rshift;
-       __uint32_t              xs_bmbt_2_split;
-       __uint32_t              xs_bmbt_2_join;
-       __uint32_t              xs_bmbt_2_alloc;
-       __uint32_t              xs_bmbt_2_free;
-       __uint32_t              xs_bmbt_2_moves;
-#define XFSSTAT_END_IBT_V2             (XFSSTAT_END_BMBT_V2+15)
-       __uint32_t              xs_ibt_2_lookup;
-       __uint32_t              xs_ibt_2_compare;
-       __uint32_t              xs_ibt_2_insrec;
-       __uint32_t              xs_ibt_2_delrec;
-       __uint32_t              xs_ibt_2_newroot;
-       __uint32_t              xs_ibt_2_killroot;
-       __uint32_t              xs_ibt_2_increment;
-       __uint32_t              xs_ibt_2_decrement;
-       __uint32_t              xs_ibt_2_lshift;
-       __uint32_t              xs_ibt_2_rshift;
-       __uint32_t              xs_ibt_2_split;
-       __uint32_t              xs_ibt_2_join;
-       __uint32_t              xs_ibt_2_alloc;
-       __uint32_t              xs_ibt_2_free;
-       __uint32_t              xs_ibt_2_moves;
-/* Extra precision counters */
-       __uint64_t              xs_xstrat_bytes;
-       __uint64_t              xs_write_bytes;
-       __uint64_t              xs_read_bytes;
-};
-
-DECLARE_PER_CPU(struct xfsstats, xfsstats);
-
-/*
- * We don't disable preempt, not too worried about poking the
- * wrong CPU's stat for now (also aggregated before reporting).
- */
-#define XFS_STATS_INC(v)       (per_cpu(xfsstats, current_cpu()).v++)
-#define XFS_STATS_DEC(v)       (per_cpu(xfsstats, current_cpu()).v--)
-#define XFS_STATS_ADD(v, inc)  (per_cpu(xfsstats, current_cpu()).v += (inc))
-
-extern int xfs_init_procfs(void);
-extern void xfs_cleanup_procfs(void);
-
-
-#else  /* !CONFIG_PROC_FS */
-
-# define XFS_STATS_INC(count)
-# define XFS_STATS_DEC(count)
-# define XFS_STATS_ADD(count, inc)
-
-static inline int xfs_init_procfs(void)
-{
-       return 0;
-}
-
-static inline void xfs_cleanup_procfs(void)
-{
-}
-
-#endif /* !CONFIG_PROC_FS */
-
-#endif /* __XFS_STATS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c

deleted file mode 100644 (file)

index 9a72dda..0000000
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ /dev/null
@@ -1,1773 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-#include "xfs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_fsops.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_vnodeops.h"
-#include "xfs_log_priv.h"
-#include "xfs_trans_priv.h"
-#include "xfs_filestream.h"
-#include "xfs_da_btree.h"
-#include "xfs_extfree_item.h"
-#include "xfs_mru_cache.h"
-#include "xfs_inode_item.h"
-#include "xfs_sync.h"
-#include "xfs_trace.h"
-
-#include <linux/namei.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/mount.h>
-#include <linux/mempool.h>
-#include <linux/writeback.h>
-#include <linux/kthread.h>
-#include <linux/freezer.h>
-#include <linux/parser.h>
-
-static const struct super_operations xfs_super_operations;
-static kmem_zone_t *xfs_ioend_zone;
-mempool_t *xfs_ioend_pool;
-
-#define MNTOPT_LOGBUFS "logbufs"       /* number of XFS log buffers */
-#define MNTOPT_LOGBSIZE        "logbsize"      /* size of XFS log buffers */
-#define MNTOPT_LOGDEV  "logdev"        /* log device */
-#define MNTOPT_RTDEV   "rtdev"         /* realtime I/O device */
-#define MNTOPT_BIOSIZE "biosize"       /* log2 of preferred buffered io size */
-#define MNTOPT_WSYNC   "wsync"         /* safe-mode nfs compatible mount */
-#define MNTOPT_NOALIGN "noalign"       /* turn off stripe alignment */
-#define MNTOPT_SWALLOC "swalloc"       /* turn on stripe width allocation */
-#define MNTOPT_SUNIT   "sunit"         /* data volume stripe unit */
-#define MNTOPT_SWIDTH  "swidth"        /* data volume stripe width */
-#define MNTOPT_NOUUID  "nouuid"        /* ignore filesystem UUID */
-#define MNTOPT_MTPT    "mtpt"          /* filesystem mount point */
-#define MNTOPT_GRPID   "grpid"         /* group-ID from parent directory */
-#define MNTOPT_NOGRPID "nogrpid"       /* group-ID from current process */
-#define MNTOPT_BSDGROUPS    "bsdgroups"    /* group-ID from parent directory */
-#define MNTOPT_SYSVGROUPS   "sysvgroups"   /* group-ID from current process */
-#define MNTOPT_ALLOCSIZE    "allocsize"    /* preferred allocation size */
-#define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
-#define MNTOPT_BARRIER "barrier"       /* use writer barriers for log write and
-                                        * unwritten extent conversion */
-#define MNTOPT_NOBARRIER "nobarrier"   /* .. disable */
-#define MNTOPT_64BITINODE   "inode64"  /* inodes can be allocated anywhere */
-#define MNTOPT_IKEEP   "ikeep"         /* do not free empty inode clusters */
-#define MNTOPT_NOIKEEP "noikeep"       /* free empty inode clusters */
-#define MNTOPT_LARGEIO    "largeio"    /* report large I/O sizes in stat() */
-#define MNTOPT_NOLARGEIO   "nolargeio" /* do not report large I/O sizes
-                                        * in stat(). */
-#define MNTOPT_ATTR2   "attr2"         /* do use attr2 attribute format */
-#define MNTOPT_NOATTR2 "noattr2"       /* do not use attr2 attribute format */
-#define MNTOPT_FILESTREAM  "filestreams" /* use filestreams allocator */
-#define MNTOPT_QUOTA   "quota"         /* disk quotas (user) */
-#define MNTOPT_NOQUOTA "noquota"       /* no quotas */
-#define MNTOPT_USRQUOTA        "usrquota"      /* user quota enabled */
-#define MNTOPT_GRPQUOTA        "grpquota"      /* group quota enabled */
-#define MNTOPT_PRJQUOTA        "prjquota"      /* project quota enabled */
-#define MNTOPT_UQUOTA  "uquota"        /* user quota (IRIX variant) */
-#define MNTOPT_GQUOTA  "gquota"        /* group quota (IRIX variant) */
-#define MNTOPT_PQUOTA  "pquota"        /* project quota (IRIX variant) */
-#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
-#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
-#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
-#define MNTOPT_QUOTANOENF  "qnoenforce"        /* same as uqnoenforce */
-#define MNTOPT_DELAYLOG    "delaylog"  /* Delayed logging enabled */
-#define MNTOPT_NODELAYLOG  "nodelaylog"        /* Delayed logging disabled */
-#define MNTOPT_DISCARD    "discard"    /* Discard unused blocks */
-#define MNTOPT_NODISCARD   "nodiscard" /* Do not discard unused blocks */
-
-/*
- * Table driven mount option parser.
- *
- * Currently only used for remount, but it will be used for mount
- * in the future, too.
- */
-enum {
-       Opt_barrier, Opt_nobarrier, Opt_err
-};
-
-static const match_table_t tokens = {
-       {Opt_barrier, "barrier"},
-       {Opt_nobarrier, "nobarrier"},
-       {Opt_err, NULL}
-};
-
-
-STATIC unsigned long
-suffix_strtoul(char *s, char **endp, unsigned int base)
-{
-       int     last, shift_left_factor = 0;
-       char    *value = s;
-
-       last = strlen(value) - 1;
-       if (value[last] == 'K' || value[last] == 'k') {
-               shift_left_factor = 10;
-               value[last] = '\0';
-       }
-       if (value[last] == 'M' || value[last] == 'm') {
-               shift_left_factor = 20;
-               value[last] = '\0';
-       }
-       if (value[last] == 'G' || value[last] == 'g') {
-               shift_left_factor = 30;
-               value[last] = '\0';
-       }
-
-       return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
-}
-
-/*
- * This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock has _not_ yet been read in.
- *
- * Note that this function leaks the various device name allocations on
- * failure.  The caller takes care of them.
- */
-STATIC int
-xfs_parseargs(
-       struct xfs_mount        *mp,
-       char                    *options)
-{
-       struct super_block      *sb = mp->m_super;
-       char                    *this_char, *value, *eov;
-       int                     dsunit = 0;
-       int                     dswidth = 0;
-       int                     iosize = 0;
-       __uint8_t               iosizelog = 0;
-
-       /*
-        * set up the mount name first so all the errors will refer to the
-        * correct device.
-        */
-       mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
-       if (!mp->m_fsname)
-               return ENOMEM;
-       mp->m_fsname_len = strlen(mp->m_fsname) + 1;
-
-       /*
-        * Copy binary VFS mount flags we are interested in.
-        */
-       if (sb->s_flags & MS_RDONLY)
-               mp->m_flags |= XFS_MOUNT_RDONLY;
-       if (sb->s_flags & MS_DIRSYNC)
-               mp->m_flags |= XFS_MOUNT_DIRSYNC;
-       if (sb->s_flags & MS_SYNCHRONOUS)
-               mp->m_flags |= XFS_MOUNT_WSYNC;
-
-       /*
-        * Set some default flags that could be cleared by the mount option
-        * parsing.
-        */
-       mp->m_flags |= XFS_MOUNT_BARRIER;
-       mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
-       mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
-       mp->m_flags |= XFS_MOUNT_DELAYLOG;
-
-       /*
-        * These can be overridden by the mount option parsing.
-        */
-       mp->m_logbufs = -1;
-       mp->m_logbsize = -1;
-
-       if (!options)
-               goto done;
-
-       while ((this_char = strsep(&options, ",")) != NULL) {
-               if (!*this_char)
-                       continue;
-               if ((value = strchr(this_char, '=')) != NULL)
-                       *value++ = 0;
-
-               if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       mp->m_logbufs = simple_strtoul(value, &eov, 10);
-               } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       mp->m_logbsize = suffix_strtoul(value, &eov, 10);
-               } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
-                       if (!mp->m_logname)
-                               return ENOMEM;
-               } else if (!strcmp(this_char, MNTOPT_MTPT)) {
-                       xfs_warn(mp, "%s option not allowed on this system",
-                               this_char);
-                       return EINVAL;
-               } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
-                       if (!mp->m_rtname)
-                               return ENOMEM;
-               } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       iosize = simple_strtoul(value, &eov, 10);
-                       iosizelog = ffs(iosize) - 1;
-               } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       iosize = suffix_strtoul(value, &eov, 10);
-                       iosizelog = ffs(iosize) - 1;
-               } else if (!strcmp(this_char, MNTOPT_GRPID) ||
-                          !strcmp(this_char, MNTOPT_BSDGROUPS)) {
-                       mp->m_flags |= XFS_MOUNT_GRPID;
-               } else if (!strcmp(this_char, MNTOPT_NOGRPID) ||
-                          !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
-                       mp->m_flags &= ~XFS_MOUNT_GRPID;
-               } else if (!strcmp(this_char, MNTOPT_WSYNC)) {
-                       mp->m_flags |= XFS_MOUNT_WSYNC;
-               } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
-                       mp->m_flags |= XFS_MOUNT_NORECOVERY;
-               } else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
-                       mp->m_flags |= XFS_MOUNT_NOALIGN;
-               } else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
-                       mp->m_flags |= XFS_MOUNT_SWALLOC;
-               } else if (!strcmp(this_char, MNTOPT_SUNIT)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       dsunit = simple_strtoul(value, &eov, 10);
-               } else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return EINVAL;
-                       }
-                       dswidth = simple_strtoul(value, &eov, 10);
-               } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
-                       mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
-#if !XFS_BIG_INUMS
-                       xfs_warn(mp, "%s option not allowed on this system",
-                               this_char);
-                       return EINVAL;
-#endif
-               } else if (!strcmp(this_char, MNTOPT_NOUUID)) {
-                       mp->m_flags |= XFS_MOUNT_NOUUID;
-               } else if (!strcmp(this_char, MNTOPT_BARRIER)) {
-                       mp->m_flags |= XFS_MOUNT_BARRIER;
-               } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
-                       mp->m_flags &= ~XFS_MOUNT_BARRIER;
-               } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
-                       mp->m_flags |= XFS_MOUNT_IKEEP;
-               } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
-                       mp->m_flags &= ~XFS_MOUNT_IKEEP;
-               } else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
-                       mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
-               } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
-                       mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
-               } else if (!strcmp(this_char, MNTOPT_ATTR2)) {
-                       mp->m_flags |= XFS_MOUNT_ATTR2;
-               } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
-                       mp->m_flags &= ~XFS_MOUNT_ATTR2;
-                       mp->m_flags |= XFS_MOUNT_NOATTR2;
-               } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
-                       mp->m_flags |= XFS_MOUNT_FILESTREAMS;
-               } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
-                       mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
-                                         XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
-                                         XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
-                                         XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD);
-               } else if (!strcmp(this_char, MNTOPT_QUOTA) ||
-                          !strcmp(this_char, MNTOPT_UQUOTA) ||
-                          !strcmp(this_char, MNTOPT_USRQUOTA)) {
-                       mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
-                                        XFS_UQUOTA_ENFD);
-               } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
-                          !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
-                       mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
-                       mp->m_qflags &= ~XFS_UQUOTA_ENFD;
-               } else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
-                          !strcmp(this_char, MNTOPT_PRJQUOTA)) {
-                       mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
-                                        XFS_OQUOTA_ENFD);
-               } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
-                       mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
-                       mp->m_qflags &= ~XFS_OQUOTA_ENFD;
-               } else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
-                          !strcmp(this_char, MNTOPT_GRPQUOTA)) {
-                       mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
-                                        XFS_OQUOTA_ENFD);
-               } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
-                       mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
-                       mp->m_qflags &= ~XFS_OQUOTA_ENFD;
-               } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
-                       mp->m_flags |= XFS_MOUNT_DELAYLOG;
-               } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
-                       mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
-               } else if (!strcmp(this_char, MNTOPT_DISCARD)) {
-                       mp->m_flags |= XFS_MOUNT_DISCARD;
-               } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
-                       mp->m_flags &= ~XFS_MOUNT_DISCARD;
-               } else if (!strcmp(this_char, "ihashsize")) {
-                       xfs_warn(mp,
-       "ihashsize no longer used, option is deprecated.");
-               } else if (!strcmp(this_char, "osyncisdsync")) {
-                       xfs_warn(mp,
-       "osyncisdsync has no effect, option is deprecated.");
-               } else if (!strcmp(this_char, "osyncisosync")) {
-                       xfs_warn(mp,
-       "osyncisosync has no effect, option is deprecated.");
-               } else if (!strcmp(this_char, "irixsgid")) {
-                       xfs_warn(mp,
-       "irixsgid is now a sysctl(2) variable, option is deprecated.");
-               } else {
-                       xfs_warn(mp, "unknown mount option [%s].", this_char);
-                       return EINVAL;
-               }
-       }
-
-       /*
-        * no recovery flag requires a read-only mount
-        */
-       if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
-           !(mp->m_flags & XFS_MOUNT_RDONLY)) {
-               xfs_warn(mp, "no-recovery mounts must be read-only.");
-               return EINVAL;
-       }
-
-       if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
-               xfs_warn(mp,
-       "sunit and swidth options incompatible with the noalign option");
-               return EINVAL;
-       }
-
-       if ((mp->m_flags & XFS_MOUNT_DISCARD) &&
-           !(mp->m_flags & XFS_MOUNT_DELAYLOG)) {
-               xfs_warn(mp,
-       "the discard option is incompatible with the nodelaylog option");
-               return EINVAL;
-       }
-
-#ifndef CONFIG_XFS_QUOTA
-       if (XFS_IS_QUOTA_RUNNING(mp)) {
-               xfs_warn(mp, "quota support not available in this kernel.");
-               return EINVAL;
-       }
-#endif
-
-       if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
-           (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
-               xfs_warn(mp, "cannot mount with both project and group quota");
-               return EINVAL;
-       }
-
-       if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
-               xfs_warn(mp, "sunit and swidth must be specified together");
-               return EINVAL;
-       }
-
-       if (dsunit && (dswidth % dsunit != 0)) {
-               xfs_warn(mp,
-       "stripe width (%d) must be a multiple of the stripe unit (%d)",
-                       dswidth, dsunit);
-               return EINVAL;
-       }
-
-done:
-       if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) {
-               /*
-                * At this point the superblock has not been read
-                * in, therefore we do not know the block size.
-                * Before the mount call ends we will convert
-                * these to FSBs.
-                */
-               if (dsunit) {
-                       mp->m_dalign = dsunit;
-                       mp->m_flags |= XFS_MOUNT_RETERR;
-               }
-
-               if (dswidth)
-                       mp->m_swidth = dswidth;
-       }
-
-       if (mp->m_logbufs != -1 &&
-           mp->m_logbufs != 0 &&
-           (mp->m_logbufs < XLOG_MIN_ICLOGS ||
-            mp->m_logbufs > XLOG_MAX_ICLOGS)) {
-               xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
-                       mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
-               return XFS_ERROR(EINVAL);
-       }
-       if (mp->m_logbsize != -1 &&
-           mp->m_logbsize !=  0 &&
-           (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
-            mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
-            !is_power_of_2(mp->m_logbsize))) {
-               xfs_warn(mp,
-                       "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
-                       mp->m_logbsize);
-               return XFS_ERROR(EINVAL);
-       }
-
-       if (iosizelog) {
-               if (iosizelog > XFS_MAX_IO_LOG ||
-                   iosizelog < XFS_MIN_IO_LOG) {
-                       xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
-                               iosizelog, XFS_MIN_IO_LOG,
-                               XFS_MAX_IO_LOG);
-                       return XFS_ERROR(EINVAL);
-               }
-
-               mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
-               mp->m_readio_log = iosizelog;
-               mp->m_writeio_log = iosizelog;
-       }
-
-       return 0;
-}
-
-struct proc_xfs_info {
-       int     flag;
-       char    *str;
-};
-
-STATIC int
-xfs_showargs(
-       struct xfs_mount        *mp,
-       struct seq_file         *m)
-{
-       static struct proc_xfs_info xfs_info_set[] = {
-               /* the few simple ones we can get from the mount struct */
-               { XFS_MOUNT_IKEEP,              "," MNTOPT_IKEEP },
-               { XFS_MOUNT_WSYNC,              "," MNTOPT_WSYNC },
-               { XFS_MOUNT_NOALIGN,            "," MNTOPT_NOALIGN },
-               { XFS_MOUNT_SWALLOC,            "," MNTOPT_SWALLOC },
-               { XFS_MOUNT_NOUUID,             "," MNTOPT_NOUUID },
-               { XFS_MOUNT_NORECOVERY,         "," MNTOPT_NORECOVERY },
-               { XFS_MOUNT_ATTR2,              "," MNTOPT_ATTR2 },
-               { XFS_MOUNT_FILESTREAMS,        "," MNTOPT_FILESTREAM },
-               { XFS_MOUNT_GRPID,              "," MNTOPT_GRPID },
-               { XFS_MOUNT_DELAYLOG,           "," MNTOPT_DELAYLOG },
-               { XFS_MOUNT_DISCARD,            "," MNTOPT_DISCARD },
-               { 0, NULL }
-       };
-       static struct proc_xfs_info xfs_info_unset[] = {
-               /* the few simple ones we can get from the mount struct */
-               { XFS_MOUNT_COMPAT_IOSIZE,      "," MNTOPT_LARGEIO },
-               { XFS_MOUNT_BARRIER,            "," MNTOPT_NOBARRIER },
-               { XFS_MOUNT_SMALL_INUMS,        "," MNTOPT_64BITINODE },
-               { 0, NULL }
-       };
-       struct proc_xfs_info    *xfs_infop;
-
-       for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
-               if (mp->m_flags & xfs_infop->flag)
-                       seq_puts(m, xfs_infop->str);
-       }
-       for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) {
-               if (!(mp->m_flags & xfs_infop->flag))
-                       seq_puts(m, xfs_infop->str);
-       }
-
-       if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
-               seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
-                               (int)(1 << mp->m_writeio_log) >> 10);
-
-       if (mp->m_logbufs > 0)
-               seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
-       if (mp->m_logbsize > 0)
-               seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
-
-       if (mp->m_logname)
-               seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname);
-       if (mp->m_rtname)
-               seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname);
-
-       if (mp->m_dalign > 0)
-               seq_printf(m, "," MNTOPT_SUNIT "=%d",
-                               (int)XFS_FSB_TO_BB(mp, mp->m_dalign));
-       if (mp->m_swidth > 0)
-               seq_printf(m, "," MNTOPT_SWIDTH "=%d",
-                               (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
-
-       if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
-               seq_puts(m, "," MNTOPT_USRQUOTA);
-       else if (mp->m_qflags & XFS_UQUOTA_ACCT)
-               seq_puts(m, "," MNTOPT_UQUOTANOENF);
-
-       /* Either project or group quotas can be active, not both */
-
-       if (mp->m_qflags & XFS_PQUOTA_ACCT) {
-               if (mp->m_qflags & XFS_OQUOTA_ENFD)
-                       seq_puts(m, "," MNTOPT_PRJQUOTA);
-               else
-                       seq_puts(m, "," MNTOPT_PQUOTANOENF);
-       } else if (mp->m_qflags & XFS_GQUOTA_ACCT) {
-               if (mp->m_qflags & XFS_OQUOTA_ENFD)
-                       seq_puts(m, "," MNTOPT_GRPQUOTA);
-               else
-                       seq_puts(m, "," MNTOPT_GQUOTANOENF);
-       }
-
-       if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
-               seq_puts(m, "," MNTOPT_NOQUOTA);
-
-       return 0;
-}
-__uint64_t
-xfs_max_file_offset(
-       unsigned int            blockshift)
-{
-       unsigned int            pagefactor = 1;
-       unsigned int            bitshift = BITS_PER_LONG - 1;
-
-       /* Figure out maximum filesize, on Linux this can depend on
-        * the filesystem blocksize (on 32 bit platforms).
-        * __block_write_begin does this in an [unsigned] long...
-        *      page->index << (PAGE_CACHE_SHIFT - bbits)
-        * So, for page sized blocks (4K on 32 bit platforms),
-        * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
-        *      (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
-        * but for smaller blocksizes it is less (bbits = log2 bsize).
-        * Note1: get_block_t takes a long (implicit cast from above)
-        * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
-        * can optionally convert the [unsigned] long from above into
-        * an [unsigned] long long.
-        */
-
-#if BITS_PER_LONG == 32
-# if defined(CONFIG_LBDAF)
-       ASSERT(sizeof(sector_t) == 8);
-       pagefactor = PAGE_CACHE_SIZE;
-       bitshift = BITS_PER_LONG;
-# else
-       pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
-# endif
-#endif
-
-       return (((__uint64_t)pagefactor) << bitshift) - 1;
-}
-
-STATIC int
-xfs_blkdev_get(
-       xfs_mount_t             *mp,
-       const char              *name,
-       struct block_device     **bdevp)
-{
-       int                     error = 0;
-
-       *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
-                                   mp);
-       if (IS_ERR(*bdevp)) {
-               error = PTR_ERR(*bdevp);
-               xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error);
-       }
-
-       return -error;
-}
-
-STATIC void
-xfs_blkdev_put(
-       struct block_device     *bdev)
-{
-       if (bdev)
-               blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
-}
-
-void
-xfs_blkdev_issue_flush(
-       xfs_buftarg_t           *buftarg)
-{
-       blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL);
-}
-
-STATIC void
-xfs_close_devices(
-       struct xfs_mount        *mp)
-{
-       if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
-               struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
-               xfs_free_buftarg(mp, mp->m_logdev_targp);
-               xfs_blkdev_put(logdev);
-       }
-       if (mp->m_rtdev_targp) {
-               struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
-               xfs_free_buftarg(mp, mp->m_rtdev_targp);
-               xfs_blkdev_put(rtdev);
-       }
-       xfs_free_buftarg(mp, mp->m_ddev_targp);
-}
-
-/*
- * The file system configurations are:
- *     (1) device (partition) with data and internal log
- *     (2) logical volume with data and log subvolumes.
- *     (3) logical volume with data, log, and realtime subvolumes.
- *
- * We only have to handle opening the log and realtime volumes here if
- * they are present.  The data subvolume has already been opened by
- * get_sb_bdev() and is stored in sb->s_bdev.
- */
-STATIC int
-xfs_open_devices(
-       struct xfs_mount        *mp)
-{
-       struct block_device     *ddev = mp->m_super->s_bdev;
-       struct block_device     *logdev = NULL, *rtdev = NULL;
-       int                     error;
-
-       /*
-        * Open real time and log devices - order is important.
-        */
-       if (mp->m_logname) {
-               error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
-               if (error)
-                       goto out;
-       }
-
-       if (mp->m_rtname) {
-               error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
-               if (error)
-                       goto out_close_logdev;
-
-               if (rtdev == ddev || rtdev == logdev) {
-                       xfs_warn(mp,
-       "Cannot mount filesystem with identical rtdev and ddev/logdev.");
-                       error = EINVAL;
-                       goto out_close_rtdev;
-               }
-       }
-
-       /*
-        * Setup xfs_mount buffer target pointers
-        */
-       error = ENOMEM;
-       mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname);
-       if (!mp->m_ddev_targp)
-               goto out_close_rtdev;
-
-       if (rtdev) {
-               mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1,
-                                                       mp->m_fsname);
-               if (!mp->m_rtdev_targp)
-                       goto out_free_ddev_targ;
-       }
-
-       if (logdev && logdev != ddev) {
-               mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1,
-                                                       mp->m_fsname);
-               if (!mp->m_logdev_targp)
-                       goto out_free_rtdev_targ;
-       } else {
-               mp->m_logdev_targp = mp->m_ddev_targp;
-       }
-
-       return 0;
-
- out_free_rtdev_targ:
-       if (mp->m_rtdev_targp)
-               xfs_free_buftarg(mp, mp->m_rtdev_targp);
- out_free_ddev_targ:
-       xfs_free_buftarg(mp, mp->m_ddev_targp);
- out_close_rtdev:
-       if (rtdev)
-               xfs_blkdev_put(rtdev);
- out_close_logdev:
-       if (logdev && logdev != ddev)
-               xfs_blkdev_put(logdev);
- out:
-       return error;
-}
-
-/*
- * Setup xfs_mount buffer target pointers based on superblock
- */
-STATIC int
-xfs_setup_devices(
-       struct xfs_mount        *mp)
-{
-       int                     error;
-
-       error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
-                                   mp->m_sb.sb_sectsize);
-       if (error)
-               return error;
-
-       if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
-               unsigned int    log_sector_size = BBSIZE;
-
-               if (xfs_sb_version_hassector(&mp->m_sb))
-                       log_sector_size = mp->m_sb.sb_logsectsize;
-               error = xfs_setsize_buftarg(mp->m_logdev_targp,
-                                           mp->m_sb.sb_blocksize,
-                                           log_sector_size);
-               if (error)
-                       return error;
-       }
-       if (mp->m_rtdev_targp) {
-               error = xfs_setsize_buftarg(mp->m_rtdev_targp,
-                                           mp->m_sb.sb_blocksize,
-                                           mp->m_sb.sb_sectsize);
-               if (error)
-                       return error;
-       }
-
-       return 0;
-}
-
-/* Catch misguided souls that try to use this interface on XFS */
-STATIC struct inode *
-xfs_fs_alloc_inode(
-       struct super_block      *sb)
-{
-       BUG();
-       return NULL;
-}
-
-/*
- * Now that the generic code is guaranteed not to be accessing
- * the linux inode, we can reclaim the inode.
- */
-STATIC void
-xfs_fs_destroy_inode(
-       struct inode            *inode)
-{
-       struct xfs_inode        *ip = XFS_I(inode);
-
-       trace_xfs_destroy_inode(ip);
-
-       XFS_STATS_INC(vn_reclaim);
-
-       /* bad inode, get out here ASAP */
-       if (is_bad_inode(inode))
-               goto out_reclaim;
-
-       xfs_ioend_wait(ip);
-
-       ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
-
-       /*
-        * We should never get here with one of the reclaim flags already set.
-        */
-       ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
-       ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
-
-       /*
-        * We always use background reclaim here because even if the
-        * inode is clean, it still may be under IO and hence we have
-        * to take the flush lock. The background reclaim path handles
-        * this more efficiently than we can here, so simply let background
-        * reclaim tear down all inodes.
-        */
-out_reclaim:
-       xfs_inode_set_reclaim_tag(ip);
-}
-
-/*
- * Slab object creation initialisation for the XFS inode.
- * This covers only the idempotent fields in the XFS inode;
- * all other fields need to be initialised on allocation
- * from the slab. This avoids the need to repeatedly initialise
- * fields in the xfs inode that left in the initialise state
- * when freeing the inode.
- */
-STATIC void
-xfs_fs_inode_init_once(
-       void                    *inode)
-{
-       struct xfs_inode        *ip = inode;
-
-       memset(ip, 0, sizeof(struct xfs_inode));
-
-       /* vfs inode */
-       inode_init_once(VFS_I(ip));
-
-       /* xfs inode */
-       atomic_set(&ip->i_iocount, 0);
-       atomic_set(&ip->i_pincount, 0);
-       spin_lock_init(&ip->i_flags_lock);
-       init_waitqueue_head(&ip->i_ipin_wait);
-       /*
-        * Because we want to use a counting completion, complete
-        * the flush completion once to allow a single access to
-        * the flush completion without blocking.
-        */
-       init_completion(&ip->i_flush);
-       complete(&ip->i_flush);
-
-       mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
-                    "xfsino", ip->i_ino);
-}
-
-/*
- * Dirty the XFS inode when mark_inode_dirty_sync() is called so that
- * we catch unlogged VFS level updates to the inode.
- *
- * We need the barrier() to maintain correct ordering between unlogged
- * updates and the transaction commit code that clears the i_update_core
- * field. This requires all updates to be completed before marking the
- * inode dirty.
- */
-STATIC void
-xfs_fs_dirty_inode(
-       struct inode    *inode,
-       int             flags)
-{
-       barrier();
-       XFS_I(inode)->i_update_core = 1;
-}
-
-STATIC int
-xfs_log_inode(
-       struct xfs_inode        *ip)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_trans        *tp;
-       int                     error;
-
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
-       tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
-       error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
-
-       if (error) {
-               xfs_trans_cancel(tp, 0);
-               /* we need to return with the lock hold shared */
-               xfs_ilock(ip, XFS_ILOCK_SHARED);
-               return error;
-       }
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-       /*
-        * Note - it's possible that we might have pushed ourselves out of the
-        * way during trans_reserve which would flush the inode.  But there's
-        * no guarantee that the inode buffer has actually gone out yet (it's
-        * delwri).  Plus the buffer could be pinned anyway if it's part of
-        * an inode in another recent transaction.  So we play it safe and
-        * fire off the transaction anyway.
-        */
-       xfs_trans_ijoin(tp, ip);
-       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-       error = xfs_trans_commit(tp, 0);
-       xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
-
-       return error;
-}
-
-STATIC int
-xfs_fs_write_inode(
-       struct inode            *inode,
-       struct writeback_control *wbc)
-{
-       struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
-       int                     error = EAGAIN;
-
-       trace_xfs_write_inode(ip);
-
-       if (XFS_FORCED_SHUTDOWN(mp))
-               return XFS_ERROR(EIO);
-
-       if (wbc->sync_mode == WB_SYNC_ALL) {
-               /*
-                * Make sure the inode has made it it into the log.  Instead
-                * of forcing it all the way to stable storage using a
-                * synchronous transaction we let the log force inside the
-                * ->sync_fs call do that for thus, which reduces the number
-                * of synchronous log foces dramatically.
-                */
-               xfs_ioend_wait(ip);
-               xfs_ilock(ip, XFS_ILOCK_SHARED);
-               if (ip->i_update_core) {
-                       error = xfs_log_inode(ip);
-                       if (error)
-                               goto out_unlock;
-               }
-       } else {
-               /*
-                * We make this non-blocking if the inode is contended, return
-                * EAGAIN to indicate to the caller that they did not succeed.
-                * This prevents the flush path from blocking on inodes inside
-                * another operation right now, they get caught later by
-                * xfs_sync.
-                */
-               if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
-                       goto out;
-
-               if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
-                       goto out_unlock;
-
-               /*
-                * Now we have the flush lock and the inode is not pinned, we
-                * can check if the inode is really clean as we know that
-                * there are no pending transaction completions, it is not
-                * waiting on the delayed write queue and there is no IO in
-                * progress.
-                */
-               if (xfs_inode_clean(ip)) {
-                       xfs_ifunlock(ip);
-                       error = 0;
-                       goto out_unlock;
-               }
-               error = xfs_iflush(ip, SYNC_TRYLOCK);
-       }
-
- out_unlock:
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
- out:
-       /*
-        * if we failed to write out the inode then mark
-        * it dirty again so we'll try again later.
-        */
-       if (error)
-               xfs_mark_inode_dirty_sync(ip);
-       return -error;
-}
-
-STATIC void
-xfs_fs_evict_inode(
-       struct inode            *inode)
-{
-       xfs_inode_t             *ip = XFS_I(inode);
-
-       trace_xfs_evict_inode(ip);
-
-       truncate_inode_pages(&inode->i_data, 0);
-       end_writeback(inode);
-       XFS_STATS_INC(vn_rele);
-       XFS_STATS_INC(vn_remove);
-       XFS_STATS_DEC(vn_active);
-
-       /*
-        * The iolock is used by the file system to coordinate reads,
-        * writes, and block truncates.  Up to this point the lock
-        * protected concurrent accesses by users of the inode.  But
-        * from here forward we're doing some final processing of the
-        * inode because we're done with it, and although we reuse the
-        * iolock for protection it is really a distinct lock class
-        * (in the lockdep sense) from before.  To keep lockdep happy
-        * (and basically indicate what we are doing), we explicitly
-        * re-init the iolock here.
-        */
-       ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
-       mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
-       lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
-                       &xfs_iolock_reclaimable, "xfs_iolock_reclaimable");
-
-       xfs_inactive(ip);
-}
-
-STATIC void
-xfs_free_fsname(
-       struct xfs_mount        *mp)
-{
-       kfree(mp->m_fsname);
-       kfree(mp->m_rtname);
-       kfree(mp->m_logname);
-}
-
-STATIC void
-xfs_fs_put_super(
-       struct super_block      *sb)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-
-       xfs_syncd_stop(mp);
-
-       /*
-        * Blow away any referenced inode in the filestreams cache.
-        * This can and will cause log traffic as inodes go inactive
-        * here.
-        */
-       xfs_filestream_unmount(mp);
-
-       XFS_bflush(mp->m_ddev_targp);
-
-       xfs_unmountfs(mp);
-       xfs_freesb(mp);
-       xfs_icsb_destroy_counters(mp);
-       xfs_close_devices(mp);
-       xfs_free_fsname(mp);
-       kfree(mp);
-}
-
-STATIC int
-xfs_fs_sync_fs(
-       struct super_block      *sb,
-       int                     wait)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-       int                     error;
-
-       /*
-        * Not much we can do for the first async pass.  Writing out the
-        * superblock would be counter-productive as we are going to redirty
-        * when writing out other data and metadata (and writing out a single
-        * block is quite fast anyway).
-        *
-        * Try to asynchronously kick off quota syncing at least.
-        */
-       if (!wait) {
-               xfs_qm_sync(mp, SYNC_TRYLOCK);
-               return 0;
-       }
-
-       error = xfs_quiesce_data(mp);
-       if (error)
-               return -error;
-
-       if (laptop_mode) {
-               /*
-                * The disk must be active because we're syncing.
-                * We schedule xfssyncd now (now that the disk is
-                * active) instead of later (when it might not be).
-                */
-               flush_delayed_work_sync(&mp->m_sync_work);
-       }
-
-       return 0;
-}
-
-STATIC int
-xfs_fs_statfs(
-       struct dentry           *dentry,
-       struct kstatfs          *statp)
-{
-       struct xfs_mount        *mp = XFS_M(dentry->d_sb);
-       xfs_sb_t                *sbp = &mp->m_sb;
-       struct xfs_inode        *ip = XFS_I(dentry->d_inode);
-       __uint64_t              fakeinos, id;
-       xfs_extlen_t            lsize;
-       __int64_t               ffree;
-
-       statp->f_type = XFS_SB_MAGIC;
-       statp->f_namelen = MAXNAMELEN - 1;
-
-       id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
-       statp->f_fsid.val[0] = (u32)id;
-       statp->f_fsid.val[1] = (u32)(id >> 32);
-
-       xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
-
-       spin_lock(&mp->m_sb_lock);
-       statp->f_bsize = sbp->sb_blocksize;
-       lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
-       statp->f_blocks = sbp->sb_dblocks - lsize;
-       statp->f_bfree = statp->f_bavail =
-                               sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
-       fakeinos = statp->f_bfree << sbp->sb_inopblog;
-       statp->f_files =
-           MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
-       if (mp->m_maxicount)
-               statp->f_files = min_t(typeof(statp->f_files),
-                                       statp->f_files,
-                                       mp->m_maxicount);
-
-       /* make sure statp->f_ffree does not underflow */
-       ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
-       statp->f_ffree = max_t(__int64_t, ffree, 0);
-
-       spin_unlock(&mp->m_sb_lock);
-
-       if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
-           ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
-                             (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
-               xfs_qm_statvfs(ip, statp);
-       return 0;
-}
-
-STATIC void
-xfs_save_resvblks(struct xfs_mount *mp)
-{
-       __uint64_t resblks = 0;
-
-       mp->m_resblks_save = mp->m_resblks;
-       xfs_reserve_blocks(mp, &resblks, NULL);
-}
-
-STATIC void
-xfs_restore_resvblks(struct xfs_mount *mp)
-{
-       __uint64_t resblks;
-
-       if (mp->m_resblks_save) {
-               resblks = mp->m_resblks_save;
-               mp->m_resblks_save = 0;
-       } else
-               resblks = xfs_default_resblks(mp);
-
-       xfs_reserve_blocks(mp, &resblks, NULL);
-}
-
-STATIC int
-xfs_fs_remount(
-       struct super_block      *sb,
-       int                     *flags,
-       char                    *options)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-       substring_t             args[MAX_OPT_ARGS];
-       char                    *p;
-       int                     error;
-
-       while ((p = strsep(&options, ",")) != NULL) {
-               int token;
-
-               if (!*p)
-                       continue;
-
-               token = match_token(p, tokens, args);
-               switch (token) {
-               case Opt_barrier:
-                       mp->m_flags |= XFS_MOUNT_BARRIER;
-                       break;
-               case Opt_nobarrier:
-                       mp->m_flags &= ~XFS_MOUNT_BARRIER;
-                       break;
-               default:
-                       /*
-                        * Logically we would return an error here to prevent
-                        * users from believing they might have changed
-                        * mount options using remount which can't be changed.
-                        *
-                        * But unfortunately mount(8) adds all options from
-                        * mtab and fstab to the mount arguments in some cases
-                        * so we can't blindly reject options, but have to
-                        * check for each specified option if it actually
-                        * differs from the currently set option and only
-                        * reject it if that's the case.
-                        *
-                        * Until that is implemented we return success for
-                        * every remount request, and silently ignore all
-                        * options that we can't actually change.
-                        */
-#if 0
-                       xfs_info(mp,
-               "mount option \"%s\" not supported for remount\n", p);
-                       return -EINVAL;
-#else
-                       break;
-#endif
-               }
-       }
-
-       /* ro -> rw */
-       if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
-               mp->m_flags &= ~XFS_MOUNT_RDONLY;
-
-               /*
-                * If this is the first remount to writeable state we
-                * might have some superblock changes to update.
-                */
-               if (mp->m_update_flags) {
-                       error = xfs_mount_log_sb(mp, mp->m_update_flags);
-                       if (error) {
-                               xfs_warn(mp, "failed to write sb changes");
-                               return error;
-                       }
-                       mp->m_update_flags = 0;
-               }
-
-               /*
-                * Fill out the reserve pool if it is empty. Use the stashed
-                * value if it is non-zero, otherwise go with the default.
-                */
-               xfs_restore_resvblks(mp);
-       }
-
-       /* rw -> ro */
-       if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
-               /*
-                * After we have synced the data but before we sync the
-                * metadata, we need to free up the reserve block pool so that
-                * the used block count in the superblock on disk is correct at
-                * the end of the remount. Stash the current reserve pool size
-                * so that if we get remounted rw, we can return it to the same
-                * size.
-                */
-
-               xfs_quiesce_data(mp);
-               xfs_save_resvblks(mp);
-               xfs_quiesce_attr(mp);
-               mp->m_flags |= XFS_MOUNT_RDONLY;
-       }
-
-       return 0;
-}
-
-/*
- * Second stage of a freeze. The data is already frozen so we only
- * need to take care of the metadata. Once that's done write a dummy
- * record to dirty the log in case of a crash while frozen.
- */
-STATIC int
-xfs_fs_freeze(
-       struct super_block      *sb)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-
-       xfs_save_resvblks(mp);
-       xfs_quiesce_attr(mp);
-       return -xfs_fs_log_dummy(mp);
-}
-
-STATIC int
-xfs_fs_unfreeze(
-       struct super_block      *sb)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-
-       xfs_restore_resvblks(mp);
-       return 0;
-}
-
-STATIC int
-xfs_fs_show_options(
-       struct seq_file         *m,
-       struct vfsmount         *mnt)
-{
-       return -xfs_showargs(XFS_M(mnt->mnt_sb), m);
-}
-
-/*
- * This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock _has_ now been read in.
- */
-STATIC int
-xfs_finish_flags(
-       struct xfs_mount        *mp)
-{
-       int                     ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
-
-       /* Fail a mount where the logbuf is smaller than the log stripe */
-       if (xfs_sb_version_haslogv2(&mp->m_sb)) {
-               if (mp->m_logbsize <= 0 &&
-                   mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
-                       mp->m_logbsize = mp->m_sb.sb_logsunit;
-               } else if (mp->m_logbsize > 0 &&
-                          mp->m_logbsize < mp->m_sb.sb_logsunit) {
-                       xfs_warn(mp,
-               "logbuf size must be greater than or equal to log stripe size");
-                       return XFS_ERROR(EINVAL);
-               }
-       } else {
-               /* Fail a mount if the logbuf is larger than 32K */
-               if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
-                       xfs_warn(mp,
-               "logbuf size for version 1 logs must be 16K or 32K");
-                       return XFS_ERROR(EINVAL);
-               }
-       }
-
-       /*
-        * mkfs'ed attr2 will turn on attr2 mount unless explicitly
-        * told by noattr2 to turn it off
-        */
-       if (xfs_sb_version_hasattr2(&mp->m_sb) &&
-           !(mp->m_flags & XFS_MOUNT_NOATTR2))
-               mp->m_flags |= XFS_MOUNT_ATTR2;
-
-       /*
-        * prohibit r/w mounts of read-only filesystems
-        */
-       if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
-               xfs_warn(mp,
-                       "cannot mount a read-only filesystem as read-write");
-               return XFS_ERROR(EROFS);
-       }
-
-       return 0;
-}
-
-STATIC int
-xfs_fs_fill_super(
-       struct super_block      *sb,
-       void                    *data,
-       int                     silent)
-{
-       struct inode            *root;
-       struct xfs_mount        *mp = NULL;
-       int                     flags = 0, error = ENOMEM;
-
-       mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
-       if (!mp)
-               goto out;
-
-       spin_lock_init(&mp->m_sb_lock);
-       mutex_init(&mp->m_growlock);
-       atomic_set(&mp->m_active_trans, 0);
-
-       mp->m_super = sb;
-       sb->s_fs_info = mp;
-
-       error = xfs_parseargs(mp, (char *)data);
-       if (error)
-               goto out_free_fsname;
-
-       sb_min_blocksize(sb, BBSIZE);
-       sb->s_xattr = xfs_xattr_handlers;
-       sb->s_export_op = &xfs_export_operations;
-#ifdef CONFIG_XFS_QUOTA
-       sb->s_qcop = &xfs_quotactl_operations;
-#endif
-       sb->s_op = &xfs_super_operations;
-
-       if (silent)
-               flags |= XFS_MFSI_QUIET;
-
-       error = xfs_open_devices(mp);
-       if (error)
-               goto out_free_fsname;
-
-       error = xfs_icsb_init_counters(mp);
-       if (error)
-               goto out_close_devices;
-
-       error = xfs_readsb(mp, flags);
-       if (error)
-               goto out_destroy_counters;
-
-       error = xfs_finish_flags(mp);
-       if (error)
-               goto out_free_sb;
-
-       error = xfs_setup_devices(mp);
-       if (error)
-               goto out_free_sb;
-
-       error = xfs_filestream_mount(mp);
-       if (error)
-               goto out_free_sb;
-
-       /*
-        * we must configure the block size in the superblock before we run the
-        * full mount process as the mount process can lookup and cache inodes.
-        * For the same reason we must also initialise the syncd and register
-        * the inode cache shrinker so that inodes can be reclaimed during
-        * operations like a quotacheck that iterate all inodes in the
-        * filesystem.
-        */
-       sb->s_magic = XFS_SB_MAGIC;
-       sb->s_blocksize = mp->m_sb.sb_blocksize;
-       sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
-       sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
-       sb->s_time_gran = 1;
-       set_posix_acl_flag(sb);
-
-       error = xfs_mountfs(mp);
-       if (error)
-               goto out_filestream_unmount;
-
-       error = xfs_syncd_init(mp);
-       if (error)
-               goto out_unmount;
-
-       root = igrab(VFS_I(mp->m_rootip));
-       if (!root) {
-               error = ENOENT;
-               goto out_syncd_stop;
-       }
-       if (is_bad_inode(root)) {
-               error = EINVAL;
-               goto out_syncd_stop;
-       }
-       sb->s_root = d_alloc_root(root);
-       if (!sb->s_root) {
-               error = ENOMEM;
-               goto out_iput;
-       }
-
-       return 0;
-
- out_filestream_unmount:
-       xfs_filestream_unmount(mp);
- out_free_sb:
-       xfs_freesb(mp);
- out_destroy_counters:
-       xfs_icsb_destroy_counters(mp);
- out_close_devices:
-       xfs_close_devices(mp);
- out_free_fsname:
-       xfs_free_fsname(mp);
-       kfree(mp);
- out:
-       return -error;
-
- out_iput:
-       iput(root);
- out_syncd_stop:
-       xfs_syncd_stop(mp);
- out_unmount:
-       /*
-        * Blow away any referenced inode in the filestreams cache.
-        * This can and will cause log traffic as inodes go inactive
-        * here.
-        */
-       xfs_filestream_unmount(mp);
-
-       XFS_bflush(mp->m_ddev_targp);
-
-       xfs_unmountfs(mp);
-       goto out_free_sb;
-}
-
-STATIC struct dentry *
-xfs_fs_mount(
-       struct file_system_type *fs_type,
-       int                     flags,
-       const char              *dev_name,
-       void                    *data)
-{
-       return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
-}
-
-static int
-xfs_fs_nr_cached_objects(
-       struct super_block      *sb)
-{
-       return xfs_reclaim_inodes_count(XFS_M(sb));
-}
-
-static void
-xfs_fs_free_cached_objects(
-       struct super_block      *sb,
-       int                     nr_to_scan)
-{
-       xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan);
-}
-
-static const struct super_operations xfs_super_operations = {
-       .alloc_inode            = xfs_fs_alloc_inode,
-       .destroy_inode          = xfs_fs_destroy_inode,
-       .dirty_inode            = xfs_fs_dirty_inode,
-       .write_inode            = xfs_fs_write_inode,
-       .evict_inode            = xfs_fs_evict_inode,
-       .put_super              = xfs_fs_put_super,
-       .sync_fs                = xfs_fs_sync_fs,
-       .freeze_fs              = xfs_fs_freeze,
-       .unfreeze_fs            = xfs_fs_unfreeze,
-       .statfs                 = xfs_fs_statfs,
-       .remount_fs             = xfs_fs_remount,
-       .show_options           = xfs_fs_show_options,
-       .nr_cached_objects      = xfs_fs_nr_cached_objects,
-       .free_cached_objects    = xfs_fs_free_cached_objects,
-};
-
-static struct file_system_type xfs_fs_type = {
-       .owner                  = THIS_MODULE,
-       .name                   = "xfs",
-       .mount                  = xfs_fs_mount,
-       .kill_sb                = kill_block_super,
-       .fs_flags               = FS_REQUIRES_DEV,
-};
-
-STATIC int __init
-xfs_init_zones(void)
-{
-
-       xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
-       if (!xfs_ioend_zone)
-               goto out;
-
-       xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
-                                                 xfs_ioend_zone);
-       if (!xfs_ioend_pool)
-               goto out_destroy_ioend_zone;
-
-       xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
-                                               "xfs_log_ticket");
-       if (!xfs_log_ticket_zone)
-               goto out_destroy_ioend_pool;
-
-       xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
-                                               "xfs_bmap_free_item");
-       if (!xfs_bmap_free_item_zone)
-               goto out_destroy_log_ticket_zone;
-
-       xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
-                                               "xfs_btree_cur");
-       if (!xfs_btree_cur_zone)
-               goto out_destroy_bmap_free_item_zone;
-
-       xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
-                                               "xfs_da_state");
-       if (!xfs_da_state_zone)
-               goto out_destroy_btree_cur_zone;
-
-       xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
-       if (!xfs_dabuf_zone)
-               goto out_destroy_da_state_zone;
-
-       xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
-       if (!xfs_ifork_zone)
-               goto out_destroy_dabuf_zone;
-
-       xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
-       if (!xfs_trans_zone)
-               goto out_destroy_ifork_zone;
-
-       xfs_log_item_desc_zone =
-               kmem_zone_init(sizeof(struct xfs_log_item_desc),
-                              "xfs_log_item_desc");
-       if (!xfs_log_item_desc_zone)
-               goto out_destroy_trans_zone;
-
-       /*
-        * The size of the zone allocated buf log item is the maximum
-        * size possible under XFS.  This wastes a little bit of memory,
-        * but it is much faster.
-        */
-       xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) +
-                               (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) /
-                                 NBWORD) * sizeof(int))), "xfs_buf_item");
-       if (!xfs_buf_item_zone)
-               goto out_destroy_log_item_desc_zone;
-
-       xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
-                       ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
-                                sizeof(xfs_extent_t))), "xfs_efd_item");
-       if (!xfs_efd_zone)
-               goto out_destroy_buf_item_zone;
-
-       xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
-                       ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
-                               sizeof(xfs_extent_t))), "xfs_efi_item");
-       if (!xfs_efi_zone)
-               goto out_destroy_efd_zone;
-
-       xfs_inode_zone =
-               kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
-                       KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD,
-                       xfs_fs_inode_init_once);
-       if (!xfs_inode_zone)
-               goto out_destroy_efi_zone;
-
-       xfs_ili_zone =
-               kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
-                                       KM_ZONE_SPREAD, NULL);
-       if (!xfs_ili_zone)
-               goto out_destroy_inode_zone;
-
-       return 0;
-
- out_destroy_inode_zone:
-       kmem_zone_destroy(xfs_inode_zone);
- out_destroy_efi_zone:
-       kmem_zone_destroy(xfs_efi_zone);
- out_destroy_efd_zone:
-       kmem_zone_destroy(xfs_efd_zone);
- out_destroy_buf_item_zone:
-       kmem_zone_destroy(xfs_buf_item_zone);
- out_destroy_log_item_desc_zone:
-       kmem_zone_destroy(xfs_log_item_desc_zone);
- out_destroy_trans_zone:
-       kmem_zone_destroy(xfs_trans_zone);
- out_destroy_ifork_zone:
-       kmem_zone_destroy(xfs_ifork_zone);
- out_destroy_dabuf_zone:
-       kmem_zone_destroy(xfs_dabuf_zone);
- out_destroy_da_state_zone:
-       kmem_zone_destroy(xfs_da_state_zone);
- out_destroy_btree_cur_zone:
-       kmem_zone_destroy(xfs_btree_cur_zone);
- out_destroy_bmap_free_item_zone:
-       kmem_zone_destroy(xfs_bmap_free_item_zone);
- out_destroy_log_ticket_zone:
-       kmem_zone_destroy(xfs_log_ticket_zone);
- out_destroy_ioend_pool:
-       mempool_destroy(xfs_ioend_pool);
- out_destroy_ioend_zone:
-       kmem_zone_destroy(xfs_ioend_zone);
- out:
-       return -ENOMEM;
-}
-
-STATIC void
-xfs_destroy_zones(void)
-{
-       kmem_zone_destroy(xfs_ili_zone);
-       kmem_zone_destroy(xfs_inode_zone);
-       kmem_zone_destroy(xfs_efi_zone);
-       kmem_zone_destroy(xfs_efd_zone);
-       kmem_zone_destroy(xfs_buf_item_zone);
-       kmem_zone_destroy(xfs_log_item_desc_zone);
-       kmem_zone_destroy(xfs_trans_zone);
-       kmem_zone_destroy(xfs_ifork_zone);
-       kmem_zone_destroy(xfs_dabuf_zone);
-       kmem_zone_destroy(xfs_da_state_zone);
-       kmem_zone_destroy(xfs_btree_cur_zone);
-       kmem_zone_destroy(xfs_bmap_free_item_zone);
-       kmem_zone_destroy(xfs_log_ticket_zone);
-       mempool_destroy(xfs_ioend_pool);
-       kmem_zone_destroy(xfs_ioend_zone);
-
-}
-
-STATIC int __init
-xfs_init_workqueues(void)
-{
-       /*
-        * max_active is set to 8 to give enough concurency to allow
-        * multiple work operations on each CPU to run. This allows multiple
-        * filesystems to be running sync work concurrently, and scales with
-        * the number of CPUs in the system.
-        */
-       xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
-       if (!xfs_syncd_wq)
-               goto out;
-
-       xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8);
-       if (!xfs_ail_wq)
-               goto out_destroy_syncd;
-
-       return 0;
-
-out_destroy_syncd:
-       destroy_workqueue(xfs_syncd_wq);
-out:
-       return -ENOMEM;
-}
-
-STATIC void
-xfs_destroy_workqueues(void)
-{
-       destroy_workqueue(xfs_ail_wq);
-       destroy_workqueue(xfs_syncd_wq);
-}
-
-STATIC int __init
-init_xfs_fs(void)
-{
-       int                     error;
-
-       printk(KERN_INFO XFS_VERSION_STRING " with "
-                        XFS_BUILD_OPTIONS " enabled\n");
-
-       xfs_ioend_init();
-       xfs_dir_startup();
-
-       error = xfs_init_zones();
-       if (error)
-               goto out;
-
-       error = xfs_init_workqueues();
-       if (error)
-               goto out_destroy_zones;
-
-       error = xfs_mru_cache_init();
-       if (error)
-               goto out_destroy_wq;
-
-       error = xfs_filestream_init();
-       if (error)
-               goto out_mru_cache_uninit;
-
-       error = xfs_buf_init();
-       if (error)
-               goto out_filestream_uninit;
-
-       error = xfs_init_procfs();
-       if (error)
-               goto out_buf_terminate;
-
-       error = xfs_sysctl_register();
-       if (error)
-               goto out_cleanup_procfs;
-
-       vfs_initquota();
-
-       error = register_filesystem(&xfs_fs_type);
-       if (error)
-               goto out_sysctl_unregister;
-       return 0;
-
- out_sysctl_unregister:
-       xfs_sysctl_unregister();
- out_cleanup_procfs:
-       xfs_cleanup_procfs();
- out_buf_terminate:
-       xfs_buf_terminate();
- out_filestream_uninit:
-       xfs_filestream_uninit();
- out_mru_cache_uninit:
-       xfs_mru_cache_uninit();
- out_destroy_wq:
-       xfs_destroy_workqueues();
- out_destroy_zones:
-       xfs_destroy_zones();
- out:
-       return error;
-}
-
-STATIC void __exit
-exit_xfs_fs(void)
-{
-       vfs_exitquota();
-       unregister_filesystem(&xfs_fs_type);
-       xfs_sysctl_unregister();
-       xfs_cleanup_procfs();
-       xfs_buf_terminate();
-       xfs_filestream_uninit();
-       xfs_mru_cache_uninit();
-       xfs_destroy_workqueues();
-       xfs_destroy_zones();
-}
-
-module_init(init_xfs_fs);
-module_exit(exit_xfs_fs);
-
-MODULE_AUTHOR("Silicon Graphics, Inc.");
-MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
-MODULE_LICENSE("GPL");
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h

deleted file mode 100644 (file)

index 50a3266..0000000
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPER_H__
-#define __XFS_SUPER_H__
-
-#include <linux/exportfs.h>
-
-#ifdef CONFIG_XFS_QUOTA
-extern void xfs_qm_init(void);
-extern void xfs_qm_exit(void);
-# define vfs_initquota()       xfs_qm_init()
-# define vfs_exitquota()       xfs_qm_exit()
-#else
-# define vfs_initquota()       do { } while (0)
-# define vfs_exitquota()       do { } while (0)
-#endif
-
-#ifdef CONFIG_XFS_POSIX_ACL
-# define XFS_ACL_STRING                "ACLs, "
-# define set_posix_acl_flag(sb)        ((sb)->s_flags |= MS_POSIXACL)
-#else
-# define XFS_ACL_STRING
-# define set_posix_acl_flag(sb)        do { } while (0)
-#endif
-
-#define XFS_SECURITY_STRING    "security attributes, "
-
-#ifdef CONFIG_XFS_RT
-# define XFS_REALTIME_STRING   "realtime, "
-#else
-# define XFS_REALTIME_STRING
-#endif
-
-#if XFS_BIG_BLKNOS
-# if XFS_BIG_INUMS
-#  define XFS_BIGFS_STRING     "large block/inode numbers, "
-# else
-#  define XFS_BIGFS_STRING     "large block numbers, "
-# endif
-#else
-# define XFS_BIGFS_STRING
-#endif
-
-#ifdef DEBUG
-# define XFS_DBG_STRING                "debug"
-#else
-# define XFS_DBG_STRING                "no debug"
-#endif
-
-#define XFS_VERSION_STRING     "SGI XFS"
-#define XFS_BUILD_OPTIONS      XFS_ACL_STRING \
-                               XFS_SECURITY_STRING \
-                               XFS_REALTIME_STRING \
-                               XFS_BIGFS_STRING \
-                               XFS_DBG_STRING /* DBG must be last */
-
-struct xfs_inode;
-struct xfs_mount;
-struct xfs_buftarg;
-struct block_device;
-
-extern __uint64_t xfs_max_file_offset(unsigned int);
-
-extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
-
-extern const struct export_operations xfs_export_operations;
-extern const struct xattr_handler *xfs_xattr_handlers[];
-extern const struct quotactl_ops xfs_quotactl_operations;
-
-#define XFS_M(sb)              ((struct xfs_mount *)((sb)->s_fs_info))
-
-#endif /* __XFS_SUPER_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c

deleted file mode 100644 (file)

index e4c938a..0000000
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ /dev/null
@@ -1,1065 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_dinode.h"
-#include "xfs_error.h"
-#include "xfs_filestream.h"
-#include "xfs_vnodeops.h"
-#include "xfs_inode_item.h"
-#include "xfs_quota.h"
-#include "xfs_trace.h"
-#include "xfs_fsops.h"
-
-#include <linux/kthread.h>
-#include <linux/freezer.h>
-
-struct workqueue_struct        *xfs_syncd_wq;  /* sync workqueue */
-
-/*
- * The inode lookup is done in batches to keep the amount of lock traffic and
- * radix tree lookups to a minimum. The batch size is a trade off between
- * lookup reduction and stack usage. This is in the reclaim path, so we can't
- * be too greedy.
- */
-#define XFS_LOOKUP_BATCH       32
-
-STATIC int
-xfs_inode_ag_walk_grab(
-       struct xfs_inode        *ip)
-{
-       struct inode            *inode = VFS_I(ip);
-
-       ASSERT(rcu_read_lock_held());
-
-       /*
-        * check for stale RCU freed inode
-        *
-        * If the inode has been reallocated, it doesn't matter if it's not in
-        * the AG we are walking - we are walking for writeback, so if it
-        * passes all the "valid inode" checks and is dirty, then we'll write
-        * it back anyway.  If it has been reallocated and still being
-        * initialised, the XFS_INEW check below will catch it.
-        */
-       spin_lock(&ip->i_flags_lock);
-       if (!ip->i_ino)
-               goto out_unlock_noent;
-
-       /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
-       if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
-               goto out_unlock_noent;
-       spin_unlock(&ip->i_flags_lock);
-
-       /* nothing to sync during shutdown */
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               return EFSCORRUPTED;
-
-       /* If we can't grab the inode, it must on it's way to reclaim. */
-       if (!igrab(inode))
-               return ENOENT;
-
-       if (is_bad_inode(inode)) {
-               IRELE(ip);
-               return ENOENT;
-       }
-
-       /* inode is valid */
-       return 0;
-
-out_unlock_noent:
-       spin_unlock(&ip->i_flags_lock);
-       return ENOENT;
-}
-
-STATIC int
-xfs_inode_ag_walk(
-       struct xfs_mount        *mp,
-       struct xfs_perag        *pag,
-       int                     (*execute)(struct xfs_inode *ip,
-                                          struct xfs_perag *pag, int flags),
-       int                     flags)
-{
-       uint32_t                first_index;
-       int                     last_error = 0;
-       int                     skipped;
-       int                     done;
-       int                     nr_found;
-
-restart:
-       done = 0;
-       skipped = 0;
-       first_index = 0;
-       nr_found = 0;
-       do {
-               struct xfs_inode *batch[XFS_LOOKUP_BATCH];
-               int             error = 0;
-               int             i;
-
-               rcu_read_lock();
-               nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
-                                       (void **)batch, first_index,
-                                       XFS_LOOKUP_BATCH);
-               if (!nr_found) {
-                       rcu_read_unlock();
-                       break;
-               }
-
-               /*
-                * Grab the inodes before we drop the lock. if we found
-                * nothing, nr == 0 and the loop will be skipped.
-                */
-               for (i = 0; i < nr_found; i++) {
-                       struct xfs_inode *ip = batch[i];
-
-                       if (done || xfs_inode_ag_walk_grab(ip))
-                               batch[i] = NULL;
-
-                       /*
-                        * Update the index for the next lookup. Catch
-                        * overflows into the next AG range which can occur if
-                        * we have inodes in the last block of the AG and we
-                        * are currently pointing to the last inode.
-                        *
-                        * Because we may see inodes that are from the wrong AG
-                        * due to RCU freeing and reallocation, only update the
-                        * index if it lies in this AG. It was a race that lead
-                        * us to see this inode, so another lookup from the
-                        * same index will not find it again.
-                        */
-                       if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
-                               continue;
-                       first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-                       if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
-                               done = 1;
-               }
-
-               /* unlock now we've grabbed the inodes. */
-               rcu_read_unlock();
-
-               for (i = 0; i < nr_found; i++) {
-                       if (!batch[i])
-                               continue;
-                       error = execute(batch[i], pag, flags);
-                       IRELE(batch[i]);
-                       if (error == EAGAIN) {
-                               skipped++;
-                               continue;
-                       }
-                       if (error && last_error != EFSCORRUPTED)
-                               last_error = error;
-               }
-
-               /* bail out if the filesystem is corrupted.  */
-               if (error == EFSCORRUPTED)
-                       break;
-
-               cond_resched();
-
-       } while (nr_found && !done);
-
-       if (skipped) {
-               delay(1);
-               goto restart;
-       }
-       return last_error;
-}
-
-int
-xfs_inode_ag_iterator(
-       struct xfs_mount        *mp,
-       int                     (*execute)(struct xfs_inode *ip,
-                                          struct xfs_perag *pag, int flags),
-       int                     flags)
-{
-       struct xfs_perag        *pag;
-       int                     error = 0;
-       int                     last_error = 0;
-       xfs_agnumber_t          ag;
-
-       ag = 0;
-       while ((pag = xfs_perag_get(mp, ag))) {
-               ag = pag->pag_agno + 1;
-               error = xfs_inode_ag_walk(mp, pag, execute, flags);
-               xfs_perag_put(pag);
-               if (error) {
-                       last_error = error;
-                       if (error == EFSCORRUPTED)
-                               break;
-               }
-       }
-       return XFS_ERROR(last_error);
-}
-
-STATIC int
-xfs_sync_inode_data(
-       struct xfs_inode        *ip,
-       struct xfs_perag        *pag,
-       int                     flags)
-{
-       struct inode            *inode = VFS_I(ip);
-       struct address_space *mapping = inode->i_mapping;
-       int                     error = 0;
-
-       if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
-               goto out_wait;
-
-       if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
-               if (flags & SYNC_TRYLOCK)
-                       goto out_wait;
-               xfs_ilock(ip, XFS_IOLOCK_SHARED);
-       }
-
-       error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
-                               0 : XBF_ASYNC, FI_NONE);
-       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-
- out_wait:
-       if (flags & SYNC_WAIT)
-               xfs_ioend_wait(ip);
-       return error;
-}
-
-STATIC int
-xfs_sync_inode_attr(
-       struct xfs_inode        *ip,
-       struct xfs_perag        *pag,
-       int                     flags)
-{
-       int                     error = 0;
-
-       xfs_ilock(ip, XFS_ILOCK_SHARED);
-       if (xfs_inode_clean(ip))
-               goto out_unlock;
-       if (!xfs_iflock_nowait(ip)) {
-               if (!(flags & SYNC_WAIT))
-                       goto out_unlock;
-               xfs_iflock(ip);
-       }
-
-       if (xfs_inode_clean(ip)) {
-               xfs_ifunlock(ip);
-               goto out_unlock;
-       }
-
-       error = xfs_iflush(ip, flags);
-
-       /*
-        * We don't want to try again on non-blocking flushes that can't run
-        * again immediately. If an inode really must be written, then that's
-        * what the SYNC_WAIT flag is for.
-        */
-       if (error == EAGAIN) {
-               ASSERT(!(flags & SYNC_WAIT));
-               error = 0;
-       }
-
- out_unlock:
-       xfs_iunlock(ip, XFS_ILOCK_SHARED);
-       return error;
-}
-
-/*
- * Write out pagecache data for the whole filesystem.
- */
-STATIC int
-xfs_sync_data(
-       struct xfs_mount        *mp,
-       int                     flags)
-{
-       int                     error;
-
-       ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
-
-       error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags);
-       if (error)
-               return XFS_ERROR(error);
-
-       xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
-       return 0;
-}
-
-/*
- * Write out inode metadata (attributes) for the whole filesystem.
- */
-STATIC int
-xfs_sync_attr(
-       struct xfs_mount        *mp,
-       int                     flags)
-{
-       ASSERT((flags & ~SYNC_WAIT) == 0);
-
-       return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
-}
-
-STATIC int
-xfs_sync_fsdata(
-       struct xfs_mount        *mp)
-{
-       struct xfs_buf          *bp;
-
-       /*
-        * If the buffer is pinned then push on the log so we won't get stuck
-        * waiting in the write for someone, maybe ourselves, to flush the log.
-        *
-        * Even though we just pushed the log above, we did not have the
-        * superblock buffer locked at that point so it can become pinned in
-        * between there and here.
-        */
-       bp = xfs_getsb(mp, 0);
-       if (XFS_BUF_ISPINNED(bp))
-               xfs_log_force(mp, 0);
-
-       return xfs_bwrite(mp, bp);
-}
-
-/*
- * When remounting a filesystem read-only or freezing the filesystem, we have
- * two phases to execute. This first phase is syncing the data before we
- * quiesce the filesystem, and the second is flushing all the inodes out after
- * we've waited for all the transactions created by the first phase to
- * complete. The second phase ensures that the inodes are written to their
- * location on disk rather than just existing in transactions in the log. This
- * means after a quiesce there is no log replay required to write the inodes to
- * disk (this is the main difference between a sync and a quiesce).
- */
-/*
- * First stage of freeze - no writers will make progress now we are here,
- * so we flush delwri and delalloc buffers here, then wait for all I/O to
- * complete.  Data is frozen at that point. Metadata is not frozen,
- * transactions can still occur here so don't bother flushing the buftarg
- * because it'll just get dirty again.
- */
-int
-xfs_quiesce_data(
-       struct xfs_mount        *mp)
-{
-       int                     error, error2 = 0;
-
-       xfs_qm_sync(mp, SYNC_TRYLOCK);
-       xfs_qm_sync(mp, SYNC_WAIT);
-
-       /* force out the newly dirtied log buffers */
-       xfs_log_force(mp, XFS_LOG_SYNC);
-
-       /* write superblock and hoover up shutdown errors */
-       error = xfs_sync_fsdata(mp);
-
-       /* make sure all delwri buffers are written out */
-       xfs_flush_buftarg(mp->m_ddev_targp, 1);
-
-       /* mark the log as covered if needed */
-       if (xfs_log_need_covered(mp))
-               error2 = xfs_fs_log_dummy(mp);
-
-       /* flush data-only devices */
-       if (mp->m_rtdev_targp)
-               XFS_bflush(mp->m_rtdev_targp);
-
-       return error ? error : error2;
-}
-
-STATIC void
-xfs_quiesce_fs(
-       struct xfs_mount        *mp)
-{
-       int     count = 0, pincount;
-
-       xfs_reclaim_inodes(mp, 0);
-       xfs_flush_buftarg(mp->m_ddev_targp, 0);
-
-       /*
-        * This loop must run at least twice.  The first instance of the loop
-        * will flush most meta data but that will generate more meta data
-        * (typically directory updates).  Which then must be flushed and
-        * logged before we can write the unmount record. We also so sync
-        * reclaim of inodes to catch any that the above delwri flush skipped.
-        */
-       do {
-               xfs_reclaim_inodes(mp, SYNC_WAIT);
-               xfs_sync_attr(mp, SYNC_WAIT);
-               pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
-               if (!pincount) {
-                       delay(50);
-                       count++;
-               }
-       } while (count < 2);
-}
-
-/*
- * Second stage of a quiesce. The data is already synced, now we have to take
- * care of the metadata. New transactions are already blocked, so we need to
- * wait for any remaining transactions to drain out before proceeding.
- */
-void
-xfs_quiesce_attr(
-       struct xfs_mount        *mp)
-{
-       int     error = 0;
-
-       /* wait for all modifications to complete */
-       while (atomic_read(&mp->m_active_trans) > 0)
-               delay(100);
-
-       /* flush inodes and push all remaining buffers out to disk */
-       xfs_quiesce_fs(mp);
-
-       /*
-        * Just warn here till VFS can correctly support
-        * read-only remount without racing.
-        */
-       WARN_ON(atomic_read(&mp->m_active_trans) != 0);
-
-       /* Push the superblock and write an unmount record */
-       error = xfs_log_sbcount(mp);
-       if (error)
-               xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
-                               "Frozen image may not be consistent.");
-       xfs_log_unmount_write(mp);
-       xfs_unmountfs_writesb(mp);
-}
-
-static void
-xfs_syncd_queue_sync(
-       struct xfs_mount        *mp)
-{
-       queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work,
-                               msecs_to_jiffies(xfs_syncd_centisecs * 10));
-}
-
-/*
- * Every sync period we need to unpin all items, reclaim inodes and sync
- * disk quotas.  We might need to cover the log to indicate that the
- * filesystem is idle and not frozen.
- */
-STATIC void
-xfs_sync_worker(
-       struct work_struct *work)
-{
-       struct xfs_mount *mp = container_of(to_delayed_work(work),
-                                       struct xfs_mount, m_sync_work);
-       int             error;
-
-       if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
-               /* dgc: errors ignored here */
-               if (mp->m_super->s_frozen == SB_UNFROZEN &&
-                   xfs_log_need_covered(mp))
-                       error = xfs_fs_log_dummy(mp);
-               else
-                       xfs_log_force(mp, 0);
-               error = xfs_qm_sync(mp, SYNC_TRYLOCK);
-
-               /* start pushing all the metadata that is currently dirty */
-               xfs_ail_push_all(mp->m_ail);
-       }
-
-       /* queue us up again */
-       xfs_syncd_queue_sync(mp);
-}
-
-/*
- * Queue a new inode reclaim pass if there are reclaimable inodes and there
- * isn't a reclaim pass already in progress. By default it runs every 5s based
- * on the xfs syncd work default of 30s. Perhaps this should have it's own
- * tunable, but that can be done if this method proves to be ineffective or too
- * aggressive.
- */
-static void
-xfs_syncd_queue_reclaim(
-       struct xfs_mount        *mp)
-{
-
-       /*
-        * We can have inodes enter reclaim after we've shut down the syncd
-        * workqueue during unmount, so don't allow reclaim work to be queued
-        * during unmount.
-        */
-       if (!(mp->m_super->s_flags & MS_ACTIVE))
-               return;
-
-       rcu_read_lock();
-       if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
-               queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
-                       msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
-       }
-       rcu_read_unlock();
-}
-
-/*
- * This is a fast pass over the inode cache to try to get reclaim moving on as
- * many inodes as possible in a short period of time. It kicks itself every few
- * seconds, as well as being kicked by the inode cache shrinker when memory
- * goes low. It scans as quickly as possible avoiding locked inodes or those
- * already being flushed, and once done schedules a future pass.
- */
-STATIC void
-xfs_reclaim_worker(
-       struct work_struct *work)
-{
-       struct xfs_mount *mp = container_of(to_delayed_work(work),
-                                       struct xfs_mount, m_reclaim_work);
-
-       xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
-       xfs_syncd_queue_reclaim(mp);
-}
-
-/*
- * Flush delayed allocate data, attempting to free up reserved space
- * from existing allocations.  At this point a new allocation attempt
- * has failed with ENOSPC and we are in the process of scratching our
- * heads, looking about for more room.
- *
- * Queue a new data flush if there isn't one already in progress and
- * wait for completion of the flush. This means that we only ever have one
- * inode flush in progress no matter how many ENOSPC events are occurring and
- * so will prevent the system from bogging down due to every concurrent
- * ENOSPC event scanning all the active inodes in the system for writeback.
- */
-void
-xfs_flush_inodes(
-       struct xfs_inode        *ip)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-
-       queue_work(xfs_syncd_wq, &mp->m_flush_work);
-       flush_work_sync(&mp->m_flush_work);
-}
-
-STATIC void
-xfs_flush_worker(
-       struct work_struct *work)
-{
-       struct xfs_mount *mp = container_of(work,
-                                       struct xfs_mount, m_flush_work);
-
-       xfs_sync_data(mp, SYNC_TRYLOCK);
-       xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
-}
-
-int
-xfs_syncd_init(
-       struct xfs_mount        *mp)
-{
-       INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
-       INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
-       INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
-
-       xfs_syncd_queue_sync(mp);
-       xfs_syncd_queue_reclaim(mp);
-
-       return 0;
-}
-
-void
-xfs_syncd_stop(
-       struct xfs_mount        *mp)
-{
-       cancel_delayed_work_sync(&mp->m_sync_work);
-       cancel_delayed_work_sync(&mp->m_reclaim_work);
-       cancel_work_sync(&mp->m_flush_work);
-}
-
-void
-__xfs_inode_set_reclaim_tag(
-       struct xfs_perag        *pag,
-       struct xfs_inode        *ip)
-{
-       radix_tree_tag_set(&pag->pag_ici_root,
-                          XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
-                          XFS_ICI_RECLAIM_TAG);
-
-       if (!pag->pag_ici_reclaimable) {
-               /* propagate the reclaim tag up into the perag radix tree */
-               spin_lock(&ip->i_mount->m_perag_lock);
-               radix_tree_tag_set(&ip->i_mount->m_perag_tree,
-                               XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
-                               XFS_ICI_RECLAIM_TAG);
-               spin_unlock(&ip->i_mount->m_perag_lock);
-
-               /* schedule periodic background inode reclaim */
-               xfs_syncd_queue_reclaim(ip->i_mount);
-
-               trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
-                                                       -1, _RET_IP_);
-       }
-       pag->pag_ici_reclaimable++;
-}
-
-/*
- * We set the inode flag atomically with the radix tree tag.
- * Once we get tag lookups on the radix tree, this inode flag
- * can go away.
- */
-void
-xfs_inode_set_reclaim_tag(
-       xfs_inode_t     *ip)
-{
-       struct xfs_mount *mp = ip->i_mount;
-       struct xfs_perag *pag;
-
-       pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
-       spin_lock(&pag->pag_ici_lock);
-       spin_lock(&ip->i_flags_lock);
-       __xfs_inode_set_reclaim_tag(pag, ip);
-       __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
-       spin_unlock(&ip->i_flags_lock);
-       spin_unlock(&pag->pag_ici_lock);
-       xfs_perag_put(pag);
-}
-
-STATIC void
-__xfs_inode_clear_reclaim(
-       xfs_perag_t     *pag,
-       xfs_inode_t     *ip)
-{
-       pag->pag_ici_reclaimable--;
-       if (!pag->pag_ici_reclaimable) {
-               /* clear the reclaim tag from the perag radix tree */
-               spin_lock(&ip->i_mount->m_perag_lock);
-               radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
-                               XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
-                               XFS_ICI_RECLAIM_TAG);
-               spin_unlock(&ip->i_mount->m_perag_lock);
-               trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
-                                                       -1, _RET_IP_);
-       }
-}
-
-void
-__xfs_inode_clear_reclaim_tag(
-       xfs_mount_t     *mp,
-       xfs_perag_t     *pag,
-       xfs_inode_t     *ip)
-{
-       radix_tree_tag_clear(&pag->pag_ici_root,
-                       XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
-       __xfs_inode_clear_reclaim(pag, ip);
-}
-
-/*
- * Grab the inode for reclaim exclusively.
- * Return 0 if we grabbed it, non-zero otherwise.
- */
-STATIC int
-xfs_reclaim_inode_grab(
-       struct xfs_inode        *ip,
-       int                     flags)
-{
-       ASSERT(rcu_read_lock_held());
-
-       /* quick check for stale RCU freed inode */
-       if (!ip->i_ino)
-               return 1;
-
-       /*
-        * do some unlocked checks first to avoid unnecessary lock traffic.
-        * The first is a flush lock check, the second is a already in reclaim
-        * check. Only do these checks if we are not going to block on locks.
-        */
-       if ((flags & SYNC_TRYLOCK) &&
-           (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) {
-               return 1;
-       }
-
-       /*
-        * The radix tree lock here protects a thread in xfs_iget from racing
-        * with us starting reclaim on the inode.  Once we have the
-        * XFS_IRECLAIM flag set it will not touch us.
-        *
-        * Due to RCU lookup, we may find inodes that have been freed and only
-        * have XFS_IRECLAIM set.  Indeed, we may see reallocated inodes that
-        * aren't candidates for reclaim at all, so we must check the
-        * XFS_IRECLAIMABLE is set first before proceeding to reclaim.
-        */
-       spin_lock(&ip->i_flags_lock);
-       if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) ||
-           __xfs_iflags_test(ip, XFS_IRECLAIM)) {
-               /* not a reclaim candidate. */
-               spin_unlock(&ip->i_flags_lock);
-               return 1;
-       }
-       __xfs_iflags_set(ip, XFS_IRECLAIM);
-       spin_unlock(&ip->i_flags_lock);
-       return 0;
-}
-
-/*
- * Inodes in different states need to be treated differently, and the return
- * value of xfs_iflush is not sufficient to get this right. The following table
- * lists the inode states and the reclaim actions necessary for non-blocking
- * reclaim:
- *
- *
- *     inode state          iflush ret         required action
- *      ---------------      ----------         ---------------
- *     bad                     -               reclaim
- *     shutdown                EIO             unpin and reclaim
- *     clean, unpinned         0               reclaim
- *     stale, unpinned         0               reclaim
- *     clean, pinned(*)        0               requeue
- *     stale, pinned           EAGAIN          requeue
- *     dirty, delwri ok        0               requeue
- *     dirty, delwri blocked   EAGAIN          requeue
- *     dirty, sync flush       0               reclaim
- *
- * (*) dgc: I don't think the clean, pinned state is possible but it gets
- * handled anyway given the order of checks implemented.
- *
- * As can be seen from the table, the return value of xfs_iflush() is not
- * sufficient to correctly decide the reclaim action here. The checks in
- * xfs_iflush() might look like duplicates, but they are not.
- *
- * Also, because we get the flush lock first, we know that any inode that has
- * been flushed delwri has had the flush completed by the time we check that
- * the inode is clean. The clean inode check needs to be done before flushing
- * the inode delwri otherwise we would loop forever requeuing clean inodes as
- * we cannot tell apart a successful delwri flush and a clean inode from the
- * return value of xfs_iflush().
- *
- * Note that because the inode is flushed delayed write by background
- * writeback, the flush lock may already be held here and waiting on it can
- * result in very long latencies. Hence for sync reclaims, where we wait on the
- * flush lock, the caller should push out delayed write inodes first before
- * trying to reclaim them to minimise the amount of time spent waiting. For
- * background relaim, we just requeue the inode for the next pass.
- *
- * Hence the order of actions after gaining the locks should be:
- *     bad             => reclaim
- *     shutdown        => unpin and reclaim
- *     pinned, delwri  => requeue
- *     pinned, sync    => unpin
- *     stale           => reclaim
- *     clean           => reclaim
- *     dirty, delwri   => flush and requeue
- *     dirty, sync     => flush, wait and reclaim
- */
-STATIC int
-xfs_reclaim_inode(
-       struct xfs_inode        *ip,
-       struct xfs_perag        *pag,
-       int                     sync_mode)
-{
-       int     error;
-
-restart:
-       error = 0;
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       if (!xfs_iflock_nowait(ip)) {
-               if (!(sync_mode & SYNC_WAIT))
-                       goto out;
-               xfs_iflock(ip);
-       }
-
-       if (is_bad_inode(VFS_I(ip)))
-               goto reclaim;
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-               xfs_iunpin_wait(ip);
-               goto reclaim;
-       }
-       if (xfs_ipincount(ip)) {
-               if (!(sync_mode & SYNC_WAIT)) {
-                       xfs_ifunlock(ip);
-                       goto out;
-               }
-               xfs_iunpin_wait(ip);
-       }
-       if (xfs_iflags_test(ip, XFS_ISTALE))
-               goto reclaim;
-       if (xfs_inode_clean(ip))
-               goto reclaim;
-
-       /*
-        * Now we have an inode that needs flushing.
-        *
-        * We do a nonblocking flush here even if we are doing a SYNC_WAIT
-        * reclaim as we can deadlock with inode cluster removal.
-        * xfs_ifree_cluster() can lock the inode buffer before it locks the
-        * ip->i_lock, and we are doing the exact opposite here. As a result,
-        * doing a blocking xfs_itobp() to get the cluster buffer will result
-        * in an ABBA deadlock with xfs_ifree_cluster().
-        *
-        * As xfs_ifree_cluser() must gather all inodes that are active in the
-        * cache to mark them stale, if we hit this case we don't actually want
-        * to do IO here - we want the inode marked stale so we can simply
-        * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush,
-        * just unlock the inode, back off and try again. Hopefully the next
-        * pass through will see the stale flag set on the inode.
-        */
-       error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
-       if (sync_mode & SYNC_WAIT) {
-               if (error == EAGAIN) {
-                       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-                       /* backoff longer than in xfs_ifree_cluster */
-                       delay(2);
-                       goto restart;
-               }
-               xfs_iflock(ip);
-               goto reclaim;
-       }
-
-       /*
-        * When we have to flush an inode but don't have SYNC_WAIT set, we
-        * flush the inode out using a delwri buffer and wait for the next
-        * call into reclaim to find it in a clean state instead of waiting for
-        * it now. We also don't return errors here - if the error is transient
-        * then the next reclaim pass will flush the inode, and if the error
-        * is permanent then the next sync reclaim will reclaim the inode and
-        * pass on the error.
-        */
-       if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-               xfs_warn(ip->i_mount,
-                       "inode 0x%llx background reclaim flush failed with %d",
-                       (long long)ip->i_ino, error);
-       }
-out:
-       xfs_iflags_clear(ip, XFS_IRECLAIM);
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       /*
-        * We could return EAGAIN here to make reclaim rescan the inode tree in
-        * a short while. However, this just burns CPU time scanning the tree
-        * waiting for IO to complete and xfssyncd never goes back to the idle
-        * state. Instead, return 0 to let the next scheduled background reclaim
-        * attempt to reclaim the inode again.
-        */
-       return 0;
-
-reclaim:
-       xfs_ifunlock(ip);
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-       XFS_STATS_INC(xs_ig_reclaims);
-       /*
-        * Remove the inode from the per-AG radix tree.
-        *
-        * Because radix_tree_delete won't complain even if the item was never
-        * added to the tree assert that it's been there before to catch
-        * problems with the inode life time early on.
-        */
-       spin_lock(&pag->pag_ici_lock);
-       if (!radix_tree_delete(&pag->pag_ici_root,
-                               XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
-               ASSERT(0);
-       __xfs_inode_clear_reclaim(pag, ip);
-       spin_unlock(&pag->pag_ici_lock);
-
-       /*
-        * Here we do an (almost) spurious inode lock in order to coordinate
-        * with inode cache radix tree lookups.  This is because the lookup
-        * can reference the inodes in the cache without taking references.
-        *
-        * We make that OK here by ensuring that we wait until the inode is
-        * unlocked after the lookup before we go ahead and free it.  We get
-        * both the ilock and the iolock because the code may need to drop the
-        * ilock one but will still hold the iolock.
-        */
-       xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-       xfs_qm_dqdetach(ip);
-       xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-
-       xfs_inode_free(ip);
-       return error;
-
-}
-
-/*
- * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
- * corrupted, we still want to try to reclaim all the inodes. If we don't,
- * then a shut down during filesystem unmount reclaim walk leak all the
- * unreclaimed inodes.
- */
-int
-xfs_reclaim_inodes_ag(
-       struct xfs_mount        *mp,
-       int                     flags,
-       int                     *nr_to_scan)
-{
-       struct xfs_perag        *pag;
-       int                     error = 0;
-       int                     last_error = 0;
-       xfs_agnumber_t          ag;
-       int                     trylock = flags & SYNC_TRYLOCK;
-       int                     skipped;
-
-restart:
-       ag = 0;
-       skipped = 0;
-       while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
-               unsigned long   first_index = 0;
-               int             done = 0;
-               int             nr_found = 0;
-
-               ag = pag->pag_agno + 1;
-
-               if (trylock) {
-                       if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
-                               skipped++;
-                               xfs_perag_put(pag);
-                               continue;
-                       }
-                       first_index = pag->pag_ici_reclaim_cursor;
-               } else
-                       mutex_lock(&pag->pag_ici_reclaim_lock);
-
-               do {
-                       struct xfs_inode *batch[XFS_LOOKUP_BATCH];
-                       int     i;
-
-                       rcu_read_lock();
-                       nr_found = radix_tree_gang_lookup_tag(
-                                       &pag->pag_ici_root,
-                                       (void **)batch, first_index,
-                                       XFS_LOOKUP_BATCH,
-                                       XFS_ICI_RECLAIM_TAG);
-                       if (!nr_found) {
-                               done = 1;
-                               rcu_read_unlock();
-                               break;
-                       }
-
-                       /*
-                        * Grab the inodes before we drop the lock. if we found
-                        * nothing, nr == 0 and the loop will be skipped.
-                        */
-                       for (i = 0; i < nr_found; i++) {
-                               struct xfs_inode *ip = batch[i];
-
-                               if (done || xfs_reclaim_inode_grab(ip, flags))
-                                       batch[i] = NULL;
-
-                               /*
-                                * Update the index for the next lookup. Catch
-                                * overflows into the next AG range which can
-                                * occur if we have inodes in the last block of
-                                * the AG and we are currently pointing to the
-                                * last inode.
-                                *
-                                * Because we may see inodes that are from the
-                                * wrong AG due to RCU freeing and
-                                * reallocation, only update the index if it
-                                * lies in this AG. It was a race that lead us
-                                * to see this inode, so another lookup from
-                                * the same index will not find it again.
-                                */
-                               if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
-                                                               pag->pag_agno)
-                                       continue;
-                               first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-                               if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
-                                       done = 1;
-                       }
-
-                       /* unlock now we've grabbed the inodes. */
-                       rcu_read_unlock();
-
-                       for (i = 0; i < nr_found; i++) {
-                               if (!batch[i])
-                                       continue;
-                               error = xfs_reclaim_inode(batch[i], pag, flags);
-                               if (error && last_error != EFSCORRUPTED)
-                                       last_error = error;
-                       }
-
-                       *nr_to_scan -= XFS_LOOKUP_BATCH;
-
-                       cond_resched();
-
-               } while (nr_found && !done && *nr_to_scan > 0);
-
-               if (trylock && !done)
-                       pag->pag_ici_reclaim_cursor = first_index;
-               else
-                       pag->pag_ici_reclaim_cursor = 0;
-               mutex_unlock(&pag->pag_ici_reclaim_lock);
-               xfs_perag_put(pag);
-       }
-
-       /*
-        * if we skipped any AG, and we still have scan count remaining, do
-        * another pass this time using blocking reclaim semantics (i.e
-        * waiting on the reclaim locks and ignoring the reclaim cursors). This
-        * ensure that when we get more reclaimers than AGs we block rather
-        * than spin trying to execute reclaim.
-        */
-       if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) {
-               trylock = 0;
-               goto restart;
-       }
-       return XFS_ERROR(last_error);
-}
-
-int
-xfs_reclaim_inodes(
-       xfs_mount_t     *mp,
-       int             mode)
-{
-       int             nr_to_scan = INT_MAX;
-
-       return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
-}
-
-/*
- * Scan a certain number of inodes for reclaim.
- *
- * When called we make sure that there is a background (fast) inode reclaim in
- * progress, while we will throttle the speed of reclaim via doing synchronous
- * reclaim of inodes. That means if we come across dirty inodes, we wait for
- * them to be cleaned, which we hope will not be very long due to the
- * background walker having already kicked the IO off on those dirty inodes.
- */
-void
-xfs_reclaim_inodes_nr(
-       struct xfs_mount        *mp,
-       int                     nr_to_scan)
-{
-       /* kick background reclaimer and push the AIL */
-       xfs_syncd_queue_reclaim(mp);
-       xfs_ail_push_all(mp->m_ail);
-
-       xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan);
-}
-
-/*
- * Return the number of reclaimable inodes in the filesystem for
- * the shrinker to determine how much to reclaim.
- */
-int
-xfs_reclaim_inodes_count(
-       struct xfs_mount        *mp)
-{
-       struct xfs_perag        *pag;
-       xfs_agnumber_t          ag = 0;
-       int                     reclaimable = 0;
-
-       while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
-               ag = pag->pag_agno + 1;
-               reclaimable += pag->pag_ici_reclaimable;
-               xfs_perag_put(pag);
-       }
-       return reclaimable;
-}
-
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h

deleted file mode 100644 (file)

index 941202e..0000000
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef XFS_SYNC_H
-#define XFS_SYNC_H 1
-
-struct xfs_mount;
-struct xfs_perag;
-
-#define SYNC_WAIT              0x0001  /* wait for i/o to complete */
-#define SYNC_TRYLOCK           0x0002  /* only try to lock inodes */
-
-extern struct workqueue_struct *xfs_syncd_wq;  /* sync workqueue */
-
-int xfs_syncd_init(struct xfs_mount *mp);
-void xfs_syncd_stop(struct xfs_mount *mp);
-
-int xfs_quiesce_data(struct xfs_mount *mp);
-void xfs_quiesce_attr(struct xfs_mount *mp);
-
-void xfs_flush_inodes(struct xfs_inode *ip);
-
-int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
-int xfs_reclaim_inodes_count(struct xfs_mount *mp);
-void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
-
-void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
-void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
-void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
-                               struct xfs_inode *ip);
-
-int xfs_sync_inode_grab(struct xfs_inode *ip);
-int xfs_inode_ag_iterator(struct xfs_mount *mp,
-       int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
-       int flags);
-
-#endif
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c

deleted file mode 100644 (file)

index ee2d2ad..0000000
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * Copyright (c) 2001-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include "xfs_error.h"
-
-static struct ctl_table_header *xfs_table_header;
-
-#ifdef CONFIG_PROC_FS
-STATIC int
-xfs_stats_clear_proc_handler(
-       ctl_table       *ctl,
-       int             write,
-       void            __user *buffer,
-       size_t          *lenp,
-       loff_t          *ppos)
-{
-       int             c, ret, *valp = ctl->data;
-       __uint32_t      vn_active;
-
-       ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
-
-       if (!ret && write && *valp) {
-               xfs_notice(NULL, "Clearing xfsstats");
-               for_each_possible_cpu(c) {
-                       preempt_disable();
-                       /* save vn_active, it's a universal truth! */
-                       vn_active = per_cpu(xfsstats, c).vn_active;
-                       memset(&per_cpu(xfsstats, c), 0,
-                              sizeof(struct xfsstats));
-                       per_cpu(xfsstats, c).vn_active = vn_active;
-                       preempt_enable();
-               }
-               xfs_stats_clear = 0;
-       }
-
-       return ret;
-}
-
-STATIC int
-xfs_panic_mask_proc_handler(
-       ctl_table       *ctl,
-       int             write,
-       void            __user *buffer,
-       size_t          *lenp,
-       loff_t          *ppos)
-{
-       int             ret, *valp = ctl->data;
-
-       ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
-       if (!ret && write) {
-               xfs_panic_mask = *valp;
-#ifdef DEBUG
-               xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES);
-#endif
-       }
-       return ret;
-}
-#endif /* CONFIG_PROC_FS */
-
-static ctl_table xfs_table[] = {
-       {
-               .procname       = "irix_sgid_inherit",
-               .data           = &xfs_params.sgid_inherit.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.sgid_inherit.min,
-               .extra2         = &xfs_params.sgid_inherit.max
-       },
-       {
-               .procname       = "irix_symlink_mode",
-               .data           = &xfs_params.symlink_mode.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.symlink_mode.min,
-               .extra2         = &xfs_params.symlink_mode.max
-       },
-       {
-               .procname       = "panic_mask",
-               .data           = &xfs_params.panic_mask.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = xfs_panic_mask_proc_handler,
-               .extra1         = &xfs_params.panic_mask.min,
-               .extra2         = &xfs_params.panic_mask.max
-       },
-
-       {
-               .procname       = "error_level",
-               .data           = &xfs_params.error_level.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.error_level.min,
-               .extra2         = &xfs_params.error_level.max
-       },
-       {
-               .procname       = "xfssyncd_centisecs",
-               .data           = &xfs_params.syncd_timer.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.syncd_timer.min,
-               .extra2         = &xfs_params.syncd_timer.max
-       },
-       {
-               .procname       = "inherit_sync",
-               .data           = &xfs_params.inherit_sync.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.inherit_sync.min,
-               .extra2         = &xfs_params.inherit_sync.max
-       },
-       {
-               .procname       = "inherit_nodump",
-               .data           = &xfs_params.inherit_nodump.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.inherit_nodump.min,
-               .extra2         = &xfs_params.inherit_nodump.max
-       },
-       {
-               .procname       = "inherit_noatime",
-               .data           = &xfs_params.inherit_noatim.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.inherit_noatim.min,
-               .extra2         = &xfs_params.inherit_noatim.max
-       },
-       {
-               .procname       = "xfsbufd_centisecs",
-               .data           = &xfs_params.xfs_buf_timer.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.xfs_buf_timer.min,
-               .extra2         = &xfs_params.xfs_buf_timer.max
-       },
-       {
-               .procname       = "age_buffer_centisecs",
-               .data           = &xfs_params.xfs_buf_age.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.xfs_buf_age.min,
-               .extra2         = &xfs_params.xfs_buf_age.max
-       },
-       {
-               .procname       = "inherit_nosymlinks",
-               .data           = &xfs_params.inherit_nosym.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.inherit_nosym.min,
-               .extra2         = &xfs_params.inherit_nosym.max
-       },
-       {
-               .procname       = "rotorstep",
-               .data           = &xfs_params.rotorstep.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.rotorstep.min,
-               .extra2         = &xfs_params.rotorstep.max
-       },
-       {
-               .procname       = "inherit_nodefrag",
-               .data           = &xfs_params.inherit_nodfrg.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.inherit_nodfrg.min,
-               .extra2         = &xfs_params.inherit_nodfrg.max
-       },
-       {
-               .procname       = "filestream_centisecs",
-               .data           = &xfs_params.fstrm_timer.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xfs_params.fstrm_timer.min,
-               .extra2         = &xfs_params.fstrm_timer.max,
-       },
-       /* please keep this the last entry */
-#ifdef CONFIG_PROC_FS
-       {
-               .procname       = "stats_clear",
-               .data           = &xfs_params.stats_clear.val,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = xfs_stats_clear_proc_handler,
-               .extra1         = &xfs_params.stats_clear.min,
-               .extra2         = &xfs_params.stats_clear.max
-       },
-#endif /* CONFIG_PROC_FS */
-
-       {}
-};
-
-static ctl_table xfs_dir_table[] = {
-       {
-               .procname       = "xfs",
-               .mode           = 0555,
-               .child          = xfs_table
-       },
-       {}
-};
-
-static ctl_table xfs_root_table[] = {
-       {
-               .procname       = "fs",
-               .mode           = 0555,
-               .child          = xfs_dir_table
-       },
-       {}
-};
-
-int
-xfs_sysctl_register(void)
-{
-       xfs_table_header = register_sysctl_table(xfs_root_table);
-       if (!xfs_table_header)
-               return -ENOMEM;
-       return 0;
-}
-
-void
-xfs_sysctl_unregister(void)
-{
-       unregister_sysctl_table(xfs_table_header);
-}
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h

deleted file mode 100644 (file)

index b9937d4..0000000
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2001-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SYSCTL_H__
-#define __XFS_SYSCTL_H__
-
-#include <linux/sysctl.h>
-
-/*
- * Tunable xfs parameters
- */
-
-typedef struct xfs_sysctl_val {
-       int min;
-       int val;
-       int max;
-} xfs_sysctl_val_t;
-
-typedef struct xfs_param {
-       xfs_sysctl_val_t sgid_inherit;  /* Inherit S_ISGID if process' GID is
-                                        * not a member of parent dir GID. */
-       xfs_sysctl_val_t symlink_mode;  /* Link creat mode affected by umask */
-       xfs_sysctl_val_t panic_mask;    /* bitmask to cause panic on errors. */
-       xfs_sysctl_val_t error_level;   /* Degree of reporting for problems  */
-       xfs_sysctl_val_t syncd_timer;   /* Interval between xfssyncd wakeups */
-       xfs_sysctl_val_t stats_clear;   /* Reset all XFS statistics to zero. */
-       xfs_sysctl_val_t inherit_sync;  /* Inherit the "sync" inode flag. */
-       xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */
-       xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */
-       xfs_sysctl_val_t xfs_buf_timer; /* Interval between xfsbufd wakeups. */
-       xfs_sysctl_val_t xfs_buf_age;   /* Metadata buffer age before flush. */
-       xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */
-       xfs_sysctl_val_t rotorstep;     /* inode32 AG rotoring control knob */
-       xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */
-       xfs_sysctl_val_t fstrm_timer;   /* Filestream dir-AG assoc'n timeout. */
-} xfs_param_t;
-
-/*
- * xfs_error_level:
- *
- * How much error reporting will be done when internal problems are
- * encountered.  These problems normally return an EFSCORRUPTED to their
- * caller, with no other information reported.
- *
- * 0   No error reports
- * 1   Report EFSCORRUPTED errors that will cause a filesystem shutdown
- * 5   Report all EFSCORRUPTED errors (all of the above errors, plus any
- *     additional errors that are known to not cause shutdowns)
- *
- * xfs_panic_mask bit 0x8 turns the error reports into panics
- */
-
-enum {
-       /* XFS_REFCACHE_SIZE = 1 */
-       /* XFS_REFCACHE_PURGE = 2 */
-       /* XFS_RESTRICT_CHOWN = 3 */
-       XFS_SGID_INHERIT = 4,
-       XFS_SYMLINK_MODE = 5,
-       XFS_PANIC_MASK = 6,
-       XFS_ERRLEVEL = 7,
-       XFS_SYNCD_TIMER = 8,
-       /* XFS_PROBE_DMAPI = 9 */
-       /* XFS_PROBE_IOOPS = 10 */
-       /* XFS_PROBE_QUOTA = 11 */
-       XFS_STATS_CLEAR = 12,
-       XFS_INHERIT_SYNC = 13,
-       XFS_INHERIT_NODUMP = 14,
-       XFS_INHERIT_NOATIME = 15,
-       XFS_BUF_TIMER = 16,
-       XFS_BUF_AGE = 17,
-       /* XFS_IO_BYPASS = 18 */
-       XFS_INHERIT_NOSYM = 19,
-       XFS_ROTORSTEP = 20,
-       XFS_INHERIT_NODFRG = 21,
-       XFS_FILESTREAM_TIMER = 22,
-};
-
-extern xfs_param_t     xfs_params;
-
-#ifdef CONFIG_SYSCTL
-extern int xfs_sysctl_register(void);
-extern void xfs_sysctl_unregister(void);
-#else
-# define xfs_sysctl_register()         (0)
-# define xfs_sysctl_unregister()       do { } while (0)
-#endif /* CONFIG_SYSCTL */
-
-#endif /* __XFS_SYSCTL_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c

deleted file mode 100644 (file)

index 88d25d4..0000000
--- a/fs/xfs/linux-2.6/xfs_trace.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2009, Christoph Hellwig
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_da_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_mount.h"
-#include "xfs_ialloc.h"
-#include "xfs_itable.h"
-#include "xfs_alloc.h"
-#include "xfs_bmap.h"
-#include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_log_priv.h"
-#include "xfs_buf_item.h"
-#include "xfs_quota.h"
-#include "xfs_iomap.h"
-#include "xfs_aops.h"
-#include "quota/xfs_dquot_item.h"
-#include "quota/xfs_dquot.h"
-#include "xfs_log_recover.h"
-#include "xfs_inode_item.h"
-
-/*
- * We include this last to have the helpers above available for the trace
- * event implementations.
- */
-#define CREATE_TRACE_POINTS
-#include "xfs_trace.h"
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h

deleted file mode 100644 (file)

index 690fc7a..0000000
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ /dev/null
@@ -1,1746 +0,0 @@
-/*
- * Copyright (c) 2009, Christoph Hellwig
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM xfs
-
-#if !defined(_TRACE_XFS_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_XFS_H
-
-#include <linux/tracepoint.h>
-
-struct xfs_agf;
-struct xfs_alloc_arg;
-struct xfs_attr_list_context;
-struct xfs_buf_log_item;
-struct xfs_da_args;
-struct xfs_da_node_entry;
-struct xfs_dquot;
-struct xlog_ticket;
-struct log;
-struct xlog_recover;
-struct xlog_recover_item;
-struct xfs_buf_log_format;
-struct xfs_inode_log_format;
-
-DECLARE_EVENT_CLASS(xfs_attr_list_class,
-       TP_PROTO(struct xfs_attr_list_context *ctx),
-       TP_ARGS(ctx),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(u32, hashval)
-               __field(u32, blkno)
-               __field(u32, offset)
-               __field(void *, alist)
-               __field(int, bufsize)
-               __field(int, count)
-               __field(int, firstu)
-               __field(int, dupcnt)
-               __field(int, flags)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
-               __entry->ino = ctx->dp->i_ino;
-               __entry->hashval = ctx->cursor->hashval;
-               __entry->blkno = ctx->cursor->blkno;
-               __entry->offset = ctx->cursor->offset;
-               __entry->alist = ctx->alist;
-               __entry->bufsize = ctx->bufsize;
-               __entry->count = ctx->count;
-               __entry->firstu = ctx->firstu;
-               __entry->flags = ctx->flags;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
-                 "alist 0x%p size %u count %u firstu %u flags %d %s",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                  __entry->ino,
-                  __entry->hashval,
-                  __entry->blkno,
-                  __entry->offset,
-                  __entry->dupcnt,
-                  __entry->alist,
-                  __entry->bufsize,
-                  __entry->count,
-                  __entry->firstu,
-                  __entry->flags,
-                  __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS)
-       )
-)
-
-#define DEFINE_ATTR_LIST_EVENT(name) \
-DEFINE_EVENT(xfs_attr_list_class, name, \
-       TP_PROTO(struct xfs_attr_list_context *ctx), \
-       TP_ARGS(ctx))
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf_end);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_full);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
-
-DECLARE_EVENT_CLASS(xfs_perag_class,
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,
-                unsigned long caller_ip),
-       TP_ARGS(mp, agno, refcount, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_agnumber_t, agno)
-               __field(int, refcount)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = mp->m_super->s_dev;
-               __entry->agno = agno;
-               __entry->refcount = refcount;
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d agno %u refcount %d caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->agno,
-                 __entry->refcount,
-                 (char *)__entry->caller_ip)
-);
-
-#define DEFINE_PERAG_REF_EVENT(name)   \
-DEFINE_EVENT(xfs_perag_class, name,    \
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,       \
-                unsigned long caller_ip),                                      \
-       TP_ARGS(mp, agno, refcount, caller_ip))
-DEFINE_PERAG_REF_EVENT(xfs_perag_get);
-DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
-DEFINE_PERAG_REF_EVENT(xfs_perag_put);
-DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
-DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
-
-TRACE_EVENT(xfs_attr_list_node_descend,
-       TP_PROTO(struct xfs_attr_list_context *ctx,
-                struct xfs_da_node_entry *btree),
-       TP_ARGS(ctx, btree),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(u32, hashval)
-               __field(u32, blkno)
-               __field(u32, offset)
-               __field(void *, alist)
-               __field(int, bufsize)
-               __field(int, count)
-               __field(int, firstu)
-               __field(int, dupcnt)
-               __field(int, flags)
-               __field(u32, bt_hashval)
-               __field(u32, bt_before)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
-               __entry->ino = ctx->dp->i_ino;
-               __entry->hashval = ctx->cursor->hashval;
-               __entry->blkno = ctx->cursor->blkno;
-               __entry->offset = ctx->cursor->offset;
-               __entry->alist = ctx->alist;
-               __entry->bufsize = ctx->bufsize;
-               __entry->count = ctx->count;
-               __entry->firstu = ctx->firstu;
-               __entry->flags = ctx->flags;
-               __entry->bt_hashval = be32_to_cpu(btree->hashval);
-               __entry->bt_before = be32_to_cpu(btree->before);
-       ),
-       TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
-                 "alist 0x%p size %u count %u firstu %u flags %d %s "
-                 "node hashval %u, node before %u",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                  __entry->ino,
-                  __entry->hashval,
-                  __entry->blkno,
-                  __entry->offset,
-                  __entry->dupcnt,
-                  __entry->alist,
-                  __entry->bufsize,
-                  __entry->count,
-                  __entry->firstu,
-                  __entry->flags,
-                  __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS),
-                  __entry->bt_hashval,
-                  __entry->bt_before)
-);
-
-TRACE_EVENT(xfs_iext_insert,
-       TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx,
-                struct xfs_bmbt_irec *r, int state, unsigned long caller_ip),
-       TP_ARGS(ip, idx, r, state, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(xfs_extnum_t, idx)
-               __field(xfs_fileoff_t, startoff)
-               __field(xfs_fsblock_t, startblock)
-               __field(xfs_filblks_t, blockcount)
-               __field(xfs_exntst_t, state)
-               __field(int, bmap_state)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->idx = idx;
-               __entry->startoff = r->br_startoff;
-               __entry->startblock = r->br_startblock;
-               __entry->blockcount = r->br_blockcount;
-               __entry->state = r->br_state;
-               __entry->bmap_state = state;
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
-                 "offset %lld block %lld count %lld flag %d caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
-                 (long)__entry->idx,
-                 __entry->startoff,
-                 (__int64_t)__entry->startblock,
-                 __entry->blockcount,
-                 __entry->state,
-                 (char *)__entry->caller_ip)
-);
-
-DECLARE_EVENT_CLASS(xfs_bmap_class,
-       TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state,
-                unsigned long caller_ip),
-       TP_ARGS(ip, idx, state, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(xfs_extnum_t, idx)
-               __field(xfs_fileoff_t, startoff)
-               __field(xfs_fsblock_t, startblock)
-               __field(xfs_filblks_t, blockcount)
-               __field(xfs_exntst_t, state)
-               __field(int, bmap_state)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               struct xfs_ifork        *ifp = (state & BMAP_ATTRFORK) ?
-                                               ip->i_afp : &ip->i_df;
-               struct xfs_bmbt_irec    r;
-
-               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r);
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->idx = idx;
-               __entry->startoff = r.br_startoff;
-               __entry->startblock = r.br_startblock;
-               __entry->blockcount = r.br_blockcount;
-               __entry->state = r.br_state;
-               __entry->bmap_state = state;
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
-                 "offset %lld block %lld count %lld flag %d caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
-                 (long)__entry->idx,
-                 __entry->startoff,
-                 (__int64_t)__entry->startblock,
-                 __entry->blockcount,
-                 __entry->state,
-                 (char *)__entry->caller_ip)
-)
-
-#define DEFINE_BMAP_EVENT(name) \
-DEFINE_EVENT(xfs_bmap_class, name, \
-       TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \
-                unsigned long caller_ip), \
-       TP_ARGS(ip, idx, state, caller_ip))
-DEFINE_BMAP_EVENT(xfs_iext_remove);
-DEFINE_BMAP_EVENT(xfs_bmap_pre_update);
-DEFINE_BMAP_EVENT(xfs_bmap_post_update);
-DEFINE_BMAP_EVENT(xfs_extlist);
-
-DECLARE_EVENT_CLASS(xfs_buf_class,
-       TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip),
-       TP_ARGS(bp, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_daddr_t, bno)
-               __field(size_t, buffer_length)
-               __field(int, hold)
-               __field(int, pincount)
-               __field(unsigned, lockval)
-               __field(unsigned, flags)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = bp->b_target->bt_dev;
-               __entry->bno = bp->b_bn;
-               __entry->buffer_length = bp->b_buffer_length;
-               __entry->hold = atomic_read(&bp->b_hold);
-               __entry->pincount = atomic_read(&bp->b_pin_count);
-               __entry->lockval = bp->b_sema.count;
-               __entry->flags = bp->b_flags;
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-                 "lock %d flags %s caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 (unsigned long long)__entry->bno,
-                 __entry->buffer_length,
-                 __entry->hold,
-                 __entry->pincount,
-                 __entry->lockval,
-                 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
-                 (void *)__entry->caller_ip)
-)
-
-#define DEFINE_BUF_EVENT(name) \
-DEFINE_EVENT(xfs_buf_class, name, \
-       TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \
-       TP_ARGS(bp, caller_ip))
-DEFINE_BUF_EVENT(xfs_buf_init);
-DEFINE_BUF_EVENT(xfs_buf_free);
-DEFINE_BUF_EVENT(xfs_buf_hold);
-DEFINE_BUF_EVENT(xfs_buf_rele);
-DEFINE_BUF_EVENT(xfs_buf_iodone);
-DEFINE_BUF_EVENT(xfs_buf_iorequest);
-DEFINE_BUF_EVENT(xfs_buf_bawrite);
-DEFINE_BUF_EVENT(xfs_buf_bdwrite);
-DEFINE_BUF_EVENT(xfs_buf_lock);
-DEFINE_BUF_EVENT(xfs_buf_lock_done);
-DEFINE_BUF_EVENT(xfs_buf_trylock);
-DEFINE_BUF_EVENT(xfs_buf_unlock);
-DEFINE_BUF_EVENT(xfs_buf_iowait);
-DEFINE_BUF_EVENT(xfs_buf_iowait_done);
-DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
-DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue);
-DEFINE_BUF_EVENT(xfs_buf_delwri_split);
-DEFINE_BUF_EVENT(xfs_buf_get_uncached);
-DEFINE_BUF_EVENT(xfs_bdstrat_shut);
-DEFINE_BUF_EVENT(xfs_buf_item_relse);
-DEFINE_BUF_EVENT(xfs_buf_item_iodone);
-DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
-DEFINE_BUF_EVENT(xfs_buf_error_relse);
-DEFINE_BUF_EVENT(xfs_trans_read_buf_io);
-DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
-
-/* not really buffer traces, but the buf provides useful information */
-DEFINE_BUF_EVENT(xfs_btree_corrupt);
-DEFINE_BUF_EVENT(xfs_da_btree_corrupt);
-DEFINE_BUF_EVENT(xfs_reset_dqcounts);
-DEFINE_BUF_EVENT(xfs_inode_item_push);
-
-/* pass flags explicitly */
-DECLARE_EVENT_CLASS(xfs_buf_flags_class,
-       TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip),
-       TP_ARGS(bp, flags, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_daddr_t, bno)
-               __field(size_t, buffer_length)
-               __field(int, hold)
-               __field(int, pincount)
-               __field(unsigned, lockval)
-               __field(unsigned, flags)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = bp->b_target->bt_dev;
-               __entry->bno = bp->b_bn;
-               __entry->buffer_length = bp->b_buffer_length;
-               __entry->flags = flags;
-               __entry->hold = atomic_read(&bp->b_hold);
-               __entry->pincount = atomic_read(&bp->b_pin_count);
-               __entry->lockval = bp->b_sema.count;
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-                 "lock %d flags %s caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 (unsigned long long)__entry->bno,
-                 __entry->buffer_length,
-                 __entry->hold,
-                 __entry->pincount,
-                 __entry->lockval,
-                 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
-                 (void *)__entry->caller_ip)
-)
-
-#define DEFINE_BUF_FLAGS_EVENT(name) \
-DEFINE_EVENT(xfs_buf_flags_class, name, \
-       TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \
-       TP_ARGS(bp, flags, caller_ip))
-DEFINE_BUF_FLAGS_EVENT(xfs_buf_find);
-DEFINE_BUF_FLAGS_EVENT(xfs_buf_get);
-DEFINE_BUF_FLAGS_EVENT(xfs_buf_read);
-
-TRACE_EVENT(xfs_buf_ioerror,
-       TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip),
-       TP_ARGS(bp, error, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_daddr_t, bno)
-               __field(size_t, buffer_length)
-               __field(unsigned, flags)
-               __field(int, hold)
-               __field(int, pincount)
-               __field(unsigned, lockval)
-               __field(int, error)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = bp->b_target->bt_dev;
-               __entry->bno = bp->b_bn;
-               __entry->buffer_length = bp->b_buffer_length;
-               __entry->hold = atomic_read(&bp->b_hold);
-               __entry->pincount = atomic_read(&bp->b_pin_count);
-               __entry->lockval = bp->b_sema.count;
-               __entry->error = error;
-               __entry->flags = bp->b_flags;
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-                 "lock %d error %d flags %s caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 (unsigned long long)__entry->bno,
-                 __entry->buffer_length,
-                 __entry->hold,
-                 __entry->pincount,
-                 __entry->lockval,
-                 __entry->error,
-                 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
-                 (void *)__entry->caller_ip)
-);
-
-DECLARE_EVENT_CLASS(xfs_buf_item_class,
-       TP_PROTO(struct xfs_buf_log_item *bip),
-       TP_ARGS(bip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_daddr_t, buf_bno)
-               __field(size_t, buf_len)
-               __field(int, buf_hold)
-               __field(int, buf_pincount)
-               __field(int, buf_lockval)
-               __field(unsigned, buf_flags)
-               __field(unsigned, bli_recur)
-               __field(int, bli_refcount)
-               __field(unsigned, bli_flags)
-               __field(void *, li_desc)
-               __field(unsigned, li_flags)
-       ),
-       TP_fast_assign(
-               __entry->dev = bip->bli_buf->b_target->bt_dev;
-               __entry->bli_flags = bip->bli_flags;
-               __entry->bli_recur = bip->bli_recur;
-               __entry->bli_refcount = atomic_read(&bip->bli_refcount);
-               __entry->buf_bno = bip->bli_buf->b_bn;
-               __entry->buf_len = bip->bli_buf->b_buffer_length;
-               __entry->buf_flags = bip->bli_buf->b_flags;
-               __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
-               __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
-               __entry->buf_lockval = bip->bli_buf->b_sema.count;
-               __entry->li_desc = bip->bli_item.li_desc;
-               __entry->li_flags = bip->bli_item.li_flags;
-       ),
-       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-                 "lock %d flags %s recur %d refcount %d bliflags %s "
-                 "lidesc 0x%p liflags %s",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 (unsigned long long)__entry->buf_bno,
-                 __entry->buf_len,
-                 __entry->buf_hold,
-                 __entry->buf_pincount,
-                 __entry->buf_lockval,
-                 __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS),
-                 __entry->bli_recur,
-                 __entry->bli_refcount,
-                 __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS),
-                 __entry->li_desc,
-                 __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS))
-)
-
-#define DEFINE_BUF_ITEM_EVENT(name) \
-DEFINE_EVENT(xfs_buf_item_class, name, \
-       TP_PROTO(struct xfs_buf_log_item *bip), \
-       TP_ARGS(bip))
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb_recur);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
-
-DECLARE_EVENT_CLASS(xfs_lock_class,
-       TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,
-                unsigned long caller_ip),
-       TP_ARGS(ip,  lock_flags, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(int, lock_flags)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->lock_flags = lock_flags;
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
-                 (void *)__entry->caller_ip)
-)
-
-#define DEFINE_LOCK_EVENT(name) \
-DEFINE_EVENT(xfs_lock_class, name, \
-       TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \
-                unsigned long caller_ip), \
-       TP_ARGS(ip,  lock_flags, caller_ip))
-DEFINE_LOCK_EVENT(xfs_ilock);
-DEFINE_LOCK_EVENT(xfs_ilock_nowait);
-DEFINE_LOCK_EVENT(xfs_ilock_demote);
-DEFINE_LOCK_EVENT(xfs_iunlock);
-
-DECLARE_EVENT_CLASS(xfs_inode_class,
-       TP_PROTO(struct xfs_inode *ip),
-       TP_ARGS(ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino)
-)
-
-#define DEFINE_INODE_EVENT(name) \
-DEFINE_EVENT(xfs_inode_class, name, \
-       TP_PROTO(struct xfs_inode *ip), \
-       TP_ARGS(ip))
-DEFINE_INODE_EVENT(xfs_iget_skip);
-DEFINE_INODE_EVENT(xfs_iget_reclaim);
-DEFINE_INODE_EVENT(xfs_iget_reclaim_fail);
-DEFINE_INODE_EVENT(xfs_iget_hit);
-DEFINE_INODE_EVENT(xfs_iget_miss);
-
-DEFINE_INODE_EVENT(xfs_getattr);
-DEFINE_INODE_EVENT(xfs_setattr);
-DEFINE_INODE_EVENT(xfs_readlink);
-DEFINE_INODE_EVENT(xfs_alloc_file_space);
-DEFINE_INODE_EVENT(xfs_free_file_space);
-DEFINE_INODE_EVENT(xfs_readdir);
-#ifdef CONFIG_XFS_POSIX_ACL
-DEFINE_INODE_EVENT(xfs_get_acl);
-#endif
-DEFINE_INODE_EVENT(xfs_vm_bmap);
-DEFINE_INODE_EVENT(xfs_file_ioctl);
-DEFINE_INODE_EVENT(xfs_file_compat_ioctl);
-DEFINE_INODE_EVENT(xfs_ioctl_setattr);
-DEFINE_INODE_EVENT(xfs_file_fsync);
-DEFINE_INODE_EVENT(xfs_destroy_inode);
-DEFINE_INODE_EVENT(xfs_write_inode);
-DEFINE_INODE_EVENT(xfs_evict_inode);
-
-DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
-DEFINE_INODE_EVENT(xfs_dquot_dqdetach);
-
-DECLARE_EVENT_CLASS(xfs_iref_class,
-       TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
-       TP_ARGS(ip, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(int, count)
-               __field(int, pincount)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->count = atomic_read(&VFS_I(ip)->i_count);
-               __entry->pincount = atomic_read(&ip->i_pincount);
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->count,
-                 __entry->pincount,
-                 (char *)__entry->caller_ip)
-)
-
-#define DEFINE_IREF_EVENT(name) \
-DEFINE_EVENT(xfs_iref_class, name, \
-       TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
-       TP_ARGS(ip, caller_ip))
-DEFINE_IREF_EVENT(xfs_ihold);
-DEFINE_IREF_EVENT(xfs_irele);
-DEFINE_IREF_EVENT(xfs_inode_pin);
-DEFINE_IREF_EVENT(xfs_inode_unpin);
-DEFINE_IREF_EVENT(xfs_inode_unpin_nowait);
-
-DECLARE_EVENT_CLASS(xfs_namespace_class,
-       TP_PROTO(struct xfs_inode *dp, struct xfs_name *name),
-       TP_ARGS(dp, name),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, dp_ino)
-               __dynamic_array(char, name, name->len)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(dp)->i_sb->s_dev;
-               __entry->dp_ino = dp->i_ino;
-               memcpy(__get_str(name), name->name, name->len);
-       ),
-       TP_printk("dev %d:%d dp ino 0x%llx name %s",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->dp_ino,
-                 __get_str(name))
-)
-
-#define DEFINE_NAMESPACE_EVENT(name) \
-DEFINE_EVENT(xfs_namespace_class, name, \
-       TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), \
-       TP_ARGS(dp, name))
-DEFINE_NAMESPACE_EVENT(xfs_remove);
-DEFINE_NAMESPACE_EVENT(xfs_link);
-DEFINE_NAMESPACE_EVENT(xfs_lookup);
-DEFINE_NAMESPACE_EVENT(xfs_create);
-DEFINE_NAMESPACE_EVENT(xfs_symlink);
-
-TRACE_EVENT(xfs_rename,
-       TP_PROTO(struct xfs_inode *src_dp, struct xfs_inode *target_dp,
-                struct xfs_name *src_name, struct xfs_name *target_name),
-       TP_ARGS(src_dp, target_dp, src_name, target_name),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, src_dp_ino)
-               __field(xfs_ino_t, target_dp_ino)
-               __dynamic_array(char, src_name, src_name->len)
-               __dynamic_array(char, target_name, target_name->len)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(src_dp)->i_sb->s_dev;
-               __entry->src_dp_ino = src_dp->i_ino;
-               __entry->target_dp_ino = target_dp->i_ino;
-               memcpy(__get_str(src_name), src_name->name, src_name->len);
-               memcpy(__get_str(target_name), target_name->name, target_name->len);
-       ),
-       TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx"
-                 " src name %s target name %s",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->src_dp_ino,
-                 __entry->target_dp_ino,
-                 __get_str(src_name),
-                 __get_str(target_name))
-)
-
-DECLARE_EVENT_CLASS(xfs_dquot_class,
-       TP_PROTO(struct xfs_dquot *dqp),
-       TP_ARGS(dqp),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(u32, id)
-               __field(unsigned, flags)
-               __field(unsigned, nrefs)
-               __field(unsigned long long, res_bcount)
-               __field(unsigned long long, bcount)
-               __field(unsigned long long, icount)
-               __field(unsigned long long, blk_hardlimit)
-               __field(unsigned long long, blk_softlimit)
-               __field(unsigned long long, ino_hardlimit)
-               __field(unsigned long long, ino_softlimit)
-       ), \
-       TP_fast_assign(
-               __entry->dev = dqp->q_mount->m_super->s_dev;
-               __entry->id = be32_to_cpu(dqp->q_core.d_id);
-               __entry->flags = dqp->dq_flags;
-               __entry->nrefs = dqp->q_nrefs;
-               __entry->res_bcount = dqp->q_res_bcount;
-               __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount);
-               __entry->icount = be64_to_cpu(dqp->q_core.d_icount);
-               __entry->blk_hardlimit =
-                       be64_to_cpu(dqp->q_core.d_blk_hardlimit);
-               __entry->blk_softlimit =
-                       be64_to_cpu(dqp->q_core.d_blk_softlimit);
-               __entry->ino_hardlimit =
-                       be64_to_cpu(dqp->q_core.d_ino_hardlimit);
-               __entry->ino_softlimit =
-                       be64_to_cpu(dqp->q_core.d_ino_softlimit);
-       ),
-       TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx "
-                 "bcnt 0x%llx bhardlimit 0x%llx bsoftlimit 0x%llx "
-                 "icnt 0x%llx ihardlimit 0x%llx isoftlimit 0x%llx]",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->id,
-                 __print_flags(__entry->flags, "|", XFS_DQ_FLAGS),
-                 __entry->nrefs,
-                 __entry->res_bcount,
-                 __entry->bcount,
-                 __entry->blk_hardlimit,
-                 __entry->blk_softlimit,
-                 __entry->icount,
-                 __entry->ino_hardlimit,
-                 __entry->ino_softlimit)
-)
-
-#define DEFINE_DQUOT_EVENT(name) \
-DEFINE_EVENT(xfs_dquot_class, name, \
-       TP_PROTO(struct xfs_dquot *dqp), \
-       TP_ARGS(dqp))
-DEFINE_DQUOT_EVENT(xfs_dqadjust);
-DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
-DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
-DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink);
-DEFINE_DQUOT_EVENT(xfs_dqattach_found);
-DEFINE_DQUOT_EVENT(xfs_dqattach_get);
-DEFINE_DQUOT_EVENT(xfs_dqinit);
-DEFINE_DQUOT_EVENT(xfs_dqreuse);
-DEFINE_DQUOT_EVENT(xfs_dqalloc);
-DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
-DEFINE_DQUOT_EVENT(xfs_dqread);
-DEFINE_DQUOT_EVENT(xfs_dqread_fail);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
-DEFINE_DQUOT_EVENT(xfs_dqget_hit);
-DEFINE_DQUOT_EVENT(xfs_dqget_miss);
-DEFINE_DQUOT_EVENT(xfs_dqput);
-DEFINE_DQUOT_EVENT(xfs_dqput_wait);
-DEFINE_DQUOT_EVENT(xfs_dqput_free);
-DEFINE_DQUOT_EVENT(xfs_dqrele);
-DEFINE_DQUOT_EVENT(xfs_dqflush);
-DEFINE_DQUOT_EVENT(xfs_dqflush_force);
-DEFINE_DQUOT_EVENT(xfs_dqflush_done);
-
-DECLARE_EVENT_CLASS(xfs_loggrant_class,
-       TP_PROTO(struct log *log, struct xlog_ticket *tic),
-       TP_ARGS(log, tic),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(unsigned, trans_type)
-               __field(char, ocnt)
-               __field(char, cnt)
-               __field(int, curr_res)
-               __field(int, unit_res)
-               __field(unsigned int, flags)
-               __field(int, reserveq)
-               __field(int, writeq)
-               __field(int, grant_reserve_cycle)
-               __field(int, grant_reserve_bytes)
-               __field(int, grant_write_cycle)
-               __field(int, grant_write_bytes)
-               __field(int, curr_cycle)
-               __field(int, curr_block)
-               __field(xfs_lsn_t, tail_lsn)
-       ),
-       TP_fast_assign(
-               __entry->dev = log->l_mp->m_super->s_dev;
-               __entry->trans_type = tic->t_trans_type;
-               __entry->ocnt = tic->t_ocnt;
-               __entry->cnt = tic->t_cnt;
-               __entry->curr_res = tic->t_curr_res;
-               __entry->unit_res = tic->t_unit_res;
-               __entry->flags = tic->t_flags;
-               __entry->reserveq = list_empty(&log->l_reserveq);
-               __entry->writeq = list_empty(&log->l_writeq);
-               xlog_crack_grant_head(&log->l_grant_reserve_head,
-                               &__entry->grant_reserve_cycle,
-                               &__entry->grant_reserve_bytes);
-               xlog_crack_grant_head(&log->l_grant_write_head,
-                               &__entry->grant_write_cycle,
-                               &__entry->grant_write_bytes);
-               __entry->curr_cycle = log->l_curr_cycle;
-               __entry->curr_block = log->l_curr_block;
-               __entry->tail_lsn = atomic64_read(&log->l_tail_lsn);
-       ),
-       TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
-                 "t_unit_res %u t_flags %s reserveq %s "
-                 "writeq %s grant_reserve_cycle %d "
-                 "grant_reserve_bytes %d grant_write_cycle %d "
-                 "grant_write_bytes %d curr_cycle %d curr_block %d "
-                 "tail_cycle %d tail_block %d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES),
-                 __entry->ocnt,
-                 __entry->cnt,
-                 __entry->curr_res,
-                 __entry->unit_res,
-                 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
-                 __entry->reserveq ? "empty" : "active",
-                 __entry->writeq ? "empty" : "active",
-                 __entry->grant_reserve_cycle,
-                 __entry->grant_reserve_bytes,
-                 __entry->grant_write_cycle,
-                 __entry->grant_write_bytes,
-                 __entry->curr_cycle,
-                 __entry->curr_block,
-                 CYCLE_LSN(__entry->tail_lsn),
-                 BLOCK_LSN(__entry->tail_lsn)
-       )
-)
-
-#define DEFINE_LOGGRANT_EVENT(name) \
-DEFINE_EVENT(xfs_loggrant_class, name, \
-       TP_PROTO(struct log *log, struct xlog_ticket *tic), \
-       TP_ARGS(log, tic))
-DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
-DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
-DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
-DEFINE_LOGGRANT_EVENT(xfs_log_umount_write);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_error);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
-DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter);
-DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit);
-DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub);
-
-DECLARE_EVENT_CLASS(xfs_file_class,
-       TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),
-       TP_ARGS(ip, count, offset, flags),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(xfs_fsize_t, size)
-               __field(xfs_fsize_t, new_size)
-               __field(loff_t, offset)
-               __field(size_t, count)
-               __field(int, flags)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->size = ip->i_d.di_size;
-               __entry->new_size = ip->i_new_size;
-               __entry->offset = offset;
-               __entry->count = count;
-               __entry->flags = flags;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
-                 "offset 0x%llx count 0x%zx ioflags %s",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->size,
-                 __entry->new_size,
-                 __entry->offset,
-                 __entry->count,
-                 __print_flags(__entry->flags, "|", XFS_IO_FLAGS))
-)
-
-#define DEFINE_RW_EVENT(name)          \
-DEFINE_EVENT(xfs_file_class, name,     \
-       TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \
-       TP_ARGS(ip, count, offset, flags))
-DEFINE_RW_EVENT(xfs_file_read);
-DEFINE_RW_EVENT(xfs_file_buffered_write);
-DEFINE_RW_EVENT(xfs_file_direct_write);
-DEFINE_RW_EVENT(xfs_file_splice_read);
-DEFINE_RW_EVENT(xfs_file_splice_write);
-
-DECLARE_EVENT_CLASS(xfs_page_class,
-       TP_PROTO(struct inode *inode, struct page *page, unsigned long off),
-       TP_ARGS(inode, page, off),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(pgoff_t, pgoff)
-               __field(loff_t, size)
-               __field(unsigned long, offset)
-               __field(int, delalloc)
-               __field(int, unwritten)
-       ),
-       TP_fast_assign(
-               int delalloc = -1, unwritten = -1;
-
-               if (page_has_buffers(page))
-                       xfs_count_page_state(page, &delalloc, &unwritten);
-               __entry->dev = inode->i_sb->s_dev;
-               __entry->ino = XFS_I(inode)->i_ino;
-               __entry->pgoff = page_offset(page);
-               __entry->size = i_size_read(inode);
-               __entry->offset = off;
-               __entry->delalloc = delalloc;
-               __entry->unwritten = unwritten;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
-                 "delalloc %d unwritten %d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->pgoff,
-                 __entry->size,
-                 __entry->offset,
-                 __entry->delalloc,
-                 __entry->unwritten)
-)
-
-#define DEFINE_PAGE_EVENT(name)                \
-DEFINE_EVENT(xfs_page_class, name,     \
-       TP_PROTO(struct inode *inode, struct page *page, unsigned long off),    \
-       TP_ARGS(inode, page, off))
-DEFINE_PAGE_EVENT(xfs_writepage);
-DEFINE_PAGE_EVENT(xfs_releasepage);
-DEFINE_PAGE_EVENT(xfs_invalidatepage);
-
-DECLARE_EVENT_CLASS(xfs_imap_class,
-       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
-                int type, struct xfs_bmbt_irec *irec),
-       TP_ARGS(ip, offset, count, type, irec),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(loff_t, size)
-               __field(loff_t, new_size)
-               __field(loff_t, offset)
-               __field(size_t, count)
-               __field(int, type)
-               __field(xfs_fileoff_t, startoff)
-               __field(xfs_fsblock_t, startblock)
-               __field(xfs_filblks_t, blockcount)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->size = ip->i_d.di_size;
-               __entry->new_size = ip->i_new_size;
-               __entry->offset = offset;
-               __entry->count = count;
-               __entry->type = type;
-               __entry->startoff = irec ? irec->br_startoff : 0;
-               __entry->startblock = irec ? irec->br_startblock : 0;
-               __entry->blockcount = irec ? irec->br_blockcount : 0;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
-                 "offset 0x%llx count %zd type %s "
-                 "startoff 0x%llx startblock %lld blockcount 0x%llx",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->size,
-                 __entry->new_size,
-                 __entry->offset,
-                 __entry->count,
-                 __print_symbolic(__entry->type, XFS_IO_TYPES),
-                 __entry->startoff,
-                 (__int64_t)__entry->startblock,
-                 __entry->blockcount)
-)
-
-#define DEFINE_IOMAP_EVENT(name)       \
-DEFINE_EVENT(xfs_imap_class, name,     \
-       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
-                int type, struct xfs_bmbt_irec *irec),         \
-       TP_ARGS(ip, offset, count, type, irec))
-DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
-DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
-DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
-DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
-
-DECLARE_EVENT_CLASS(xfs_simple_io_class,
-       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
-       TP_ARGS(ip, offset, count),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(loff_t, isize)
-               __field(loff_t, disize)
-               __field(loff_t, new_size)
-               __field(loff_t, offset)
-               __field(size_t, count)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->isize = ip->i_size;
-               __entry->disize = ip->i_d.di_size;
-               __entry->new_size = ip->i_new_size;
-               __entry->offset = offset;
-               __entry->count = count;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx "
-                 "offset 0x%llx count %zd",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->isize,
-                 __entry->disize,
-                 __entry->new_size,
-                 __entry->offset,
-                 __entry->count)
-);
-
-#define DEFINE_SIMPLE_IO_EVENT(name)   \
-DEFINE_EVENT(xfs_simple_io_class, name,        \
-       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),        \
-       TP_ARGS(ip, offset, count))
-DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
-DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
-DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
-DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
-
-DECLARE_EVENT_CLASS(xfs_itrunc_class,
-       TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
-       TP_ARGS(ip, new_size),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(xfs_fsize_t, size)
-               __field(xfs_fsize_t, new_size)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->size = ip->i_d.di_size;
-               __entry->new_size = new_size;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->size,
-                 __entry->new_size)
-)
-
-#define DEFINE_ITRUNC_EVENT(name) \
-DEFINE_EVENT(xfs_itrunc_class, name, \
-       TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
-       TP_ARGS(ip, new_size))
-DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start);
-DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end);
-
-TRACE_EVENT(xfs_pagecache_inval,
-       TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
-       TP_ARGS(ip, start, finish),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(xfs_fsize_t, size)
-               __field(xfs_off_t, start)
-               __field(xfs_off_t, finish)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->size = ip->i_d.di_size;
-               __entry->start = start;
-               __entry->finish = finish;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx size 0x%llx start 0x%llx finish 0x%llx",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->size,
-                 __entry->start,
-                 __entry->finish)
-);
-
-TRACE_EVENT(xfs_bunmap,
-       TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len,
-                int flags, unsigned long caller_ip),
-       TP_ARGS(ip, bno, len, flags, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(xfs_fsize_t, size)
-               __field(xfs_fileoff_t, bno)
-               __field(xfs_filblks_t, len)
-               __field(unsigned long, caller_ip)
-               __field(int, flags)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->ino = ip->i_ino;
-               __entry->size = ip->i_d.di_size;
-               __entry->bno = bno;
-               __entry->len = len;
-               __entry->caller_ip = caller_ip;
-               __entry->flags = flags;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
-                 "flags %s caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->size,
-                 __entry->bno,
-                 __entry->len,
-                 __print_flags(__entry->flags, "|", XFS_BMAPI_FLAGS),
-                 (void *)__entry->caller_ip)
-
-);
-
-DECLARE_EVENT_CLASS(xfs_busy_class,
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
-                xfs_agblock_t agbno, xfs_extlen_t len),
-       TP_ARGS(mp, agno, agbno, len),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_agnumber_t, agno)
-               __field(xfs_agblock_t, agbno)
-               __field(xfs_extlen_t, len)
-       ),
-       TP_fast_assign(
-               __entry->dev = mp->m_super->s_dev;
-               __entry->agno = agno;
-               __entry->agbno = agbno;
-               __entry->len = len;
-       ),
-       TP_printk("dev %d:%d agno %u agbno %u len %u",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->agno,
-                 __entry->agbno,
-                 __entry->len)
-);
-#define DEFINE_BUSY_EVENT(name) \
-DEFINE_EVENT(xfs_busy_class, name, \
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
-                xfs_agblock_t agbno, xfs_extlen_t len), \
-       TP_ARGS(mp, agno, agbno, len))
-DEFINE_BUSY_EVENT(xfs_alloc_busy);
-DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem);
-DEFINE_BUSY_EVENT(xfs_alloc_busy_force);
-DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse);
-DEFINE_BUSY_EVENT(xfs_alloc_busy_clear);
-
-TRACE_EVENT(xfs_alloc_busy_trim,
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
-                xfs_agblock_t agbno, xfs_extlen_t len,
-                xfs_agblock_t tbno, xfs_extlen_t tlen),
-       TP_ARGS(mp, agno, agbno, len, tbno, tlen),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_agnumber_t, agno)
-               __field(xfs_agblock_t, agbno)
-               __field(xfs_extlen_t, len)
-               __field(xfs_agblock_t, tbno)
-               __field(xfs_extlen_t, tlen)
-       ),
-       TP_fast_assign(
-               __entry->dev = mp->m_super->s_dev;
-               __entry->agno = agno;
-               __entry->agbno = agbno;
-               __entry->len = len;
-               __entry->tbno = tbno;
-               __entry->tlen = tlen;
-       ),
-       TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->agno,
-                 __entry->agbno,
-                 __entry->len,
-                 __entry->tbno,
-                 __entry->tlen)
-);
-
-TRACE_EVENT(xfs_trans_commit_lsn,
-       TP_PROTO(struct xfs_trans *trans),
-       TP_ARGS(trans),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(struct xfs_trans *, tp)
-               __field(xfs_lsn_t, lsn)
-       ),
-       TP_fast_assign(
-               __entry->dev = trans->t_mountp->m_super->s_dev;
-               __entry->tp = trans;
-               __entry->lsn = trans->t_commit_lsn;
-       ),
-       TP_printk("dev %d:%d trans 0x%p commit_lsn 0x%llx",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->tp,
-                 __entry->lsn)
-);
-
-TRACE_EVENT(xfs_agf,
-       TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
-                unsigned long caller_ip),
-       TP_ARGS(mp, agf, flags, caller_ip),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_agnumber_t, agno)
-               __field(int, flags)
-               __field(__u32, length)
-               __field(__u32, bno_root)
-               __field(__u32, cnt_root)
-               __field(__u32, bno_level)
-               __field(__u32, cnt_level)
-               __field(__u32, flfirst)
-               __field(__u32, fllast)
-               __field(__u32, flcount)
-               __field(__u32, freeblks)
-               __field(__u32, longest)
-               __field(unsigned long, caller_ip)
-       ),
-       TP_fast_assign(
-               __entry->dev = mp->m_super->s_dev;
-               __entry->agno = be32_to_cpu(agf->agf_seqno),
-               __entry->flags = flags;
-               __entry->length = be32_to_cpu(agf->agf_length),
-               __entry->bno_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]),
-               __entry->cnt_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
-               __entry->bno_level =
-                               be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
-               __entry->cnt_level =
-                               be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
-               __entry->flfirst = be32_to_cpu(agf->agf_flfirst),
-               __entry->fllast = be32_to_cpu(agf->agf_fllast),
-               __entry->flcount = be32_to_cpu(agf->agf_flcount),
-               __entry->freeblks = be32_to_cpu(agf->agf_freeblks),
-               __entry->longest = be32_to_cpu(agf->agf_longest);
-               __entry->caller_ip = caller_ip;
-       ),
-       TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
-                 "levels b %u c %u flfirst %u fllast %u flcount %u "
-                 "freeblks %u longest %u caller %pf",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->agno,
-                 __print_flags(__entry->flags, "|", XFS_AGF_FLAGS),
-                 __entry->length,
-                 __entry->bno_root,
-                 __entry->cnt_root,
-                 __entry->bno_level,
-                 __entry->cnt_level,
-                 __entry->flfirst,
-                 __entry->fllast,
-                 __entry->flcount,
-                 __entry->freeblks,
-                 __entry->longest,
-                 (void *)__entry->caller_ip)
-);
-
-TRACE_EVENT(xfs_free_extent,
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
-                xfs_extlen_t len, bool isfl, int haveleft, int haveright),
-       TP_ARGS(mp, agno, agbno, len, isfl, haveleft, haveright),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_agnumber_t, agno)
-               __field(xfs_agblock_t, agbno)
-               __field(xfs_extlen_t, len)
-               __field(int, isfl)
-               __field(int, haveleft)
-               __field(int, haveright)
-       ),
-       TP_fast_assign(
-               __entry->dev = mp->m_super->s_dev;
-               __entry->agno = agno;
-               __entry->agbno = agbno;
-               __entry->len = len;
-               __entry->isfl = isfl;
-               __entry->haveleft = haveleft;
-               __entry->haveright = haveright;
-       ),
-       TP_printk("dev %d:%d agno %u agbno %u len %u isfl %d %s",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->agno,
-                 __entry->agbno,
-                 __entry->len,
-                 __entry->isfl,
-                 __entry->haveleft ?
-                       (__entry->haveright ? "both" : "left") :
-                       (__entry->haveright ? "right" : "none"))
-
-);
-
-DECLARE_EVENT_CLASS(xfs_alloc_class,
-       TP_PROTO(struct xfs_alloc_arg *args),
-       TP_ARGS(args),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_agnumber_t, agno)
-               __field(xfs_agblock_t, agbno)
-               __field(xfs_extlen_t, minlen)
-               __field(xfs_extlen_t, maxlen)
-               __field(xfs_extlen_t, mod)
-               __field(xfs_extlen_t, prod)
-               __field(xfs_extlen_t, minleft)
-               __field(xfs_extlen_t, total)
-               __field(xfs_extlen_t, alignment)
-               __field(xfs_extlen_t, minalignslop)
-               __field(xfs_extlen_t, len)
-               __field(short, type)
-               __field(short, otype)
-               __field(char, wasdel)
-               __field(char, wasfromfl)
-               __field(char, isfl)
-               __field(char, userdata)
-               __field(xfs_fsblock_t, firstblock)
-       ),
-       TP_fast_assign(
-               __entry->dev = args->mp->m_super->s_dev;
-               __entry->agno = args->agno;
-               __entry->agbno = args->agbno;
-               __entry->minlen = args->minlen;
-               __entry->maxlen = args->maxlen;
-               __entry->mod = args->mod;
-               __entry->prod = args->prod;
-               __entry->minleft = args->minleft;
-               __entry->total = args->total;
-               __entry->alignment = args->alignment;
-               __entry->minalignslop = args->minalignslop;
-               __entry->len = args->len;
-               __entry->type = args->type;
-               __entry->otype = args->otype;
-               __entry->wasdel = args->wasdel;
-               __entry->wasfromfl = args->wasfromfl;
-               __entry->isfl = args->isfl;
-               __entry->userdata = args->userdata;
-               __entry->firstblock = args->firstblock;
-       ),
-       TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
-                 "prod %u minleft %u total %u alignment %u minalignslop %u "
-                 "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d "
-                 "userdata %d firstblock 0x%llx",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->agno,
-                 __entry->agbno,
-                 __entry->minlen,
-                 __entry->maxlen,
-                 __entry->mod,
-                 __entry->prod,
-                 __entry->minleft,
-                 __entry->total,
-                 __entry->alignment,
-                 __entry->minalignslop,
-                 __entry->len,
-                 __print_symbolic(__entry->type, XFS_ALLOC_TYPES),
-                 __print_symbolic(__entry->otype, XFS_ALLOC_TYPES),
-                 __entry->wasdel,
-                 __entry->wasfromfl,
-                 __entry->isfl,
-                 __entry->userdata,
-                 (unsigned long long)__entry->firstblock)
-)
-
-#define DEFINE_ALLOC_EVENT(name) \
-DEFINE_EVENT(xfs_alloc_class, name, \
-       TP_PROTO(struct xfs_alloc_arg *args), \
-       TP_ARGS(args))
-DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
-DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound);
-DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_busy);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_neither);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_done);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_error);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_busy);
-DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist);
-DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
-DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
-DEFINE_ALLOC_EVENT(xfs_alloc_small_error);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed);
-
-DECLARE_EVENT_CLASS(xfs_dir2_class,
-       TP_PROTO(struct xfs_da_args *args),
-       TP_ARGS(args),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __dynamic_array(char, name, args->namelen)
-               __field(int, namelen)
-               __field(xfs_dahash_t, hashval)
-               __field(xfs_ino_t, inumber)
-               __field(int, op_flags)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
-               __entry->ino = args->dp->i_ino;
-               if (args->namelen)
-                       memcpy(__get_str(name), args->name, args->namelen);
-               __entry->namelen = args->namelen;
-               __entry->hashval = args->hashval;
-               __entry->inumber = args->inumber;
-               __entry->op_flags = args->op_flags;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x "
-                 "inumber 0x%llx op_flags %s",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->namelen,
-                 __entry->namelen ? __get_str(name) : NULL,
-                 __entry->namelen,
-                 __entry->hashval,
-                 __entry->inumber,
-                 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS))
-)
-
-#define DEFINE_DIR2_EVENT(name) \
-DEFINE_EVENT(xfs_dir2_class, name, \
-       TP_PROTO(struct xfs_da_args *args), \
-       TP_ARGS(args))
-DEFINE_DIR2_EVENT(xfs_dir2_sf_addname);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_create);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_replace);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_removename);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block);
-DEFINE_DIR2_EVENT(xfs_dir2_block_addname);
-DEFINE_DIR2_EVENT(xfs_dir2_block_lookup);
-DEFINE_DIR2_EVENT(xfs_dir2_block_replace);
-DEFINE_DIR2_EVENT(xfs_dir2_block_removename);
-DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf);
-DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node);
-DEFINE_DIR2_EVENT(xfs_dir2_node_addname);
-DEFINE_DIR2_EVENT(xfs_dir2_node_lookup);
-DEFINE_DIR2_EVENT(xfs_dir2_node_replace);
-DEFINE_DIR2_EVENT(xfs_dir2_node_removename);
-DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf);
-
-DECLARE_EVENT_CLASS(xfs_dir2_space_class,
-       TP_PROTO(struct xfs_da_args *args, int idx),
-       TP_ARGS(args, idx),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(int, op_flags)
-               __field(int, idx)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
-               __entry->ino = args->dp->i_ino;
-               __entry->op_flags = args->op_flags;
-               __entry->idx = idx;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
-                 __entry->idx)
-)
-
-#define DEFINE_DIR2_SPACE_EVENT(name) \
-DEFINE_EVENT(xfs_dir2_space_class, name, \
-       TP_PROTO(struct xfs_da_args *args, int idx), \
-       TP_ARGS(args, idx))
-DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add);
-DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove);
-DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode);
-DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode);
-
-TRACE_EVENT(xfs_dir2_leafn_moveents,
-       TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count),
-       TP_ARGS(args, src_idx, dst_idx, count),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(int, op_flags)
-               __field(int, src_idx)
-               __field(int, dst_idx)
-               __field(int, count)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
-               __entry->ino = args->dp->i_ino;
-               __entry->op_flags = args->op_flags;
-               __entry->src_idx = src_idx;
-               __entry->dst_idx = dst_idx;
-               __entry->count = count;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx op_flags %s "
-                 "src_idx %d dst_idx %d count %d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
-                 __entry->src_idx,
-                 __entry->dst_idx,
-                 __entry->count)
-);
-
-#define XFS_SWAPEXT_INODES \
-       { 0,    "target" }, \
-       { 1,    "temp" }
-
-#define XFS_INODE_FORMAT_STR \
-       { 0,    "invalid" }, \
-       { 1,    "local" }, \
-       { 2,    "extent" }, \
-       { 3,    "btree" }
-
-DECLARE_EVENT_CLASS(xfs_swap_extent_class,
-       TP_PROTO(struct xfs_inode *ip, int which),
-       TP_ARGS(ip, which),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(int, which)
-               __field(xfs_ino_t, ino)
-               __field(int, format)
-               __field(int, nex)
-               __field(int, max_nex)
-               __field(int, broot_size)
-               __field(int, fork_off)
-       ),
-       TP_fast_assign(
-               __entry->dev = VFS_I(ip)->i_sb->s_dev;
-               __entry->which = which;
-               __entry->ino = ip->i_ino;
-               __entry->format = ip->i_d.di_format;
-               __entry->nex = ip->i_d.di_nextents;
-               __entry->max_nex = ip->i_df.if_ext_max;
-               __entry->broot_size = ip->i_df.if_broot_bytes;
-               __entry->fork_off = XFS_IFORK_BOFF(ip);
-       ),
-       TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, "
-                 "Max in-fork extents %d, broot size %d, fork offset %d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __print_symbolic(__entry->which, XFS_SWAPEXT_INODES),
-                 __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR),
-                 __entry->nex,
-                 __entry->max_nex,
-                 __entry->broot_size,
-                 __entry->fork_off)
-)
-
-#define DEFINE_SWAPEXT_EVENT(name) \
-DEFINE_EVENT(xfs_swap_extent_class, name, \
-       TP_PROTO(struct xfs_inode *ip, int which), \
-       TP_ARGS(ip, which))
-
-DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
-DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
-
-DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
-       TP_PROTO(struct log *log, struct xlog_recover *trans,
-               struct xlog_recover_item *item, int pass),
-       TP_ARGS(log, trans, item, pass),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(unsigned long, item)
-               __field(xlog_tid_t, tid)
-               __field(int, type)
-               __field(int, pass)
-               __field(int, count)
-               __field(int, total)
-       ),
-       TP_fast_assign(
-               __entry->dev = log->l_mp->m_super->s_dev;
-               __entry->item = (unsigned long)item;
-               __entry->tid = trans->r_log_tid;
-               __entry->type = ITEM_TYPE(item);
-               __entry->pass = pass;
-               __entry->count = item->ri_cnt;
-               __entry->total = item->ri_total;
-       ),
-       TP_printk("dev %d:%d trans 0x%x, pass %d, item 0x%p, item type %s "
-                 "item region count/total %d/%d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->tid,
-                 __entry->pass,
-                 (void *)__entry->item,
-                 __print_symbolic(__entry->type, XFS_LI_TYPE_DESC),
-                 __entry->count,
-                 __entry->total)
-)
-
-#define DEFINE_LOG_RECOVER_ITEM(name) \
-DEFINE_EVENT(xfs_log_recover_item_class, name, \
-       TP_PROTO(struct log *log, struct xlog_recover *trans, \
-               struct xlog_recover_item *item, int pass), \
-       TP_ARGS(log, trans, item, pass))
-
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add);
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add_cont);
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_head);
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail);
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover);
-
-DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
-       TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f),
-       TP_ARGS(log, buf_f),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(__int64_t, blkno)
-               __field(unsigned short, len)
-               __field(unsigned short, flags)
-               __field(unsigned short, size)
-               __field(unsigned int, map_size)
-       ),
-       TP_fast_assign(
-               __entry->dev = log->l_mp->m_super->s_dev;
-               __entry->blkno = buf_f->blf_blkno;
-               __entry->len = buf_f->blf_len;
-               __entry->flags = buf_f->blf_flags;
-               __entry->size = buf_f->blf_size;
-               __entry->map_size = buf_f->blf_map_size;
-       ),
-       TP_printk("dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, "
-                       "map_size %d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->blkno,
-                 __entry->len,
-                 __entry->flags,
-                 __entry->size,
-                 __entry->map_size)
-)
-
-#define DEFINE_LOG_RECOVER_BUF_ITEM(name) \
-DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \
-       TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \
-       TP_ARGS(log, buf_f))
-
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf);
-
-DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
-       TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f),
-       TP_ARGS(log, in_f),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_ino_t, ino)
-               __field(unsigned short, size)
-               __field(int, fields)
-               __field(unsigned short, asize)
-               __field(unsigned short, dsize)
-               __field(__int64_t, blkno)
-               __field(int, len)
-               __field(int, boffset)
-       ),
-       TP_fast_assign(
-               __entry->dev = log->l_mp->m_super->s_dev;
-               __entry->ino = in_f->ilf_ino;
-               __entry->size = in_f->ilf_size;
-               __entry->fields = in_f->ilf_fields;
-               __entry->asize = in_f->ilf_asize;
-               __entry->dsize = in_f->ilf_dsize;
-               __entry->blkno = in_f->ilf_blkno;
-               __entry->len = in_f->ilf_len;
-               __entry->boffset = in_f->ilf_boffset;
-       ),
-       TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, "
-                       "dsize %d, blkno 0x%llx, len %d, boffset %d",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->ino,
-                 __entry->size,
-                 __entry->fields,
-                 __entry->asize,
-                 __entry->dsize,
-                 __entry->blkno,
-                 __entry->len,
-                 __entry->boffset)
-)
-#define DEFINE_LOG_RECOVER_INO_ITEM(name) \
-DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \
-       TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \
-       TP_ARGS(log, in_f))
-
-DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover);
-DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel);
-DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip);
-
-DECLARE_EVENT_CLASS(xfs_discard_class,
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
-                xfs_agblock_t agbno, xfs_extlen_t len),
-       TP_ARGS(mp, agno, agbno, len),
-       TP_STRUCT__entry(
-               __field(dev_t, dev)
-               __field(xfs_agnumber_t, agno)
-               __field(xfs_agblock_t, agbno)
-               __field(xfs_extlen_t, len)
-       ),
-       TP_fast_assign(
-               __entry->dev = mp->m_super->s_dev;
-               __entry->agno = agno;
-               __entry->agbno = agbno;
-               __entry->len = len;
-       ),
-       TP_printk("dev %d:%d agno %u agbno %u len %u\n",
-                 MAJOR(__entry->dev), MINOR(__entry->dev),
-                 __entry->agno,
-                 __entry->agbno,
-                 __entry->len)
-)
-
-#define DEFINE_DISCARD_EVENT(name) \
-DEFINE_EVENT(xfs_discard_class, name, \
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
-                xfs_agblock_t agbno, xfs_extlen_t len), \
-       TP_ARGS(mp, agno, agbno, len))
-DEFINE_DISCARD_EVENT(xfs_discard_extent);
-DEFINE_DISCARD_EVENT(xfs_discard_toosmall);
-DEFINE_DISCARD_EVENT(xfs_discard_exclude);
-DEFINE_DISCARD_EVENT(xfs_discard_busy);
-
-#endif /* _TRACE_XFS_H */
-
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE xfs_trace
-#include <trace/define_trace.h>
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h

deleted file mode 100644 (file)

index 7c220b4..0000000
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_VNODE_H__
-#define __XFS_VNODE_H__
-
-#include "xfs_fs.h"
-
-struct file;
-struct xfs_inode;
-struct xfs_iomap;
-struct attrlist_cursor_kern;
-
-/*
- * Return values for xfs_inactive.  A return value of
- * VN_INACTIVE_NOCACHE implies that the file system behavior
- * has disassociated its state and bhv_desc_t from the vnode.
- */
-#define        VN_INACTIVE_CACHE       0
-#define        VN_INACTIVE_NOCACHE     1
-
-/*
- * Flags for read/write calls - same values as IRIX
- */
-#define IO_ISDIRECT    0x00004         /* bypass page cache */
-#define IO_INVIS       0x00020         /* don't update inode timestamps */
-
-#define XFS_IO_FLAGS \
-       { IO_ISDIRECT,  "DIRECT" }, \
-       { IO_INVIS,     "INVIS"}
-
-/*
- * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
- */
-#define FI_NONE                        0       /* none */
-#define FI_REMAPF              1       /* Do a remapf prior to the operation */
-#define FI_REMAPF_LOCKED       2       /* Do a remapf prior to the operation.
-                                          Prevent VM access to the pages until
-                                          the operation completes. */
-
-/*
- * Some useful predicates.
- */
-#define VN_MAPPED(vp)  mapping_mapped(vp->i_mapping)
-#define VN_CACHED(vp)  (vp->i_mapping->nrpages)
-#define VN_DIRTY(vp)   mapping_tagged(vp->i_mapping, \
-                                       PAGECACHE_TAG_DIRTY)
-
-
-#endif /* __XFS_VNODE_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c

deleted file mode 100644 (file)

index 87d3e03..0000000
--- a/fs/xfs/linux-2.6/xfs_xattr.c
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Copyright (C) 2008 Christoph Hellwig.
- * Portions Copyright (C) 2000-2008 Silicon Graphics, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-#include "xfs.h"
-#include "xfs_da_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_acl.h"
-#include "xfs_vnodeops.h"
-
-#include <linux/posix_acl_xattr.h>
-#include <linux/xattr.h>
-
-
-static int
-xfs_xattr_get(struct dentry *dentry, const char *name,
-               void *value, size_t size, int xflags)
-{
-       struct xfs_inode *ip = XFS_I(dentry->d_inode);
-       int error, asize = size;
-
-       if (strcmp(name, "") == 0)
-               return -EINVAL;
-
-       /* Convert Linux syscall to XFS internal ATTR flags */
-       if (!size) {
-               xflags |= ATTR_KERNOVAL;
-               value = NULL;
-       }
-
-       error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags);
-       if (error)
-               return error;
-       return asize;
-}
-
-static int
-xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
-               size_t size, int flags, int xflags)
-{
-       struct xfs_inode *ip = XFS_I(dentry->d_inode);
-
-       if (strcmp(name, "") == 0)
-               return -EINVAL;
-
-       /* Convert Linux syscall to XFS internal ATTR flags */
-       if (flags & XATTR_CREATE)
-               xflags |= ATTR_CREATE;
-       if (flags & XATTR_REPLACE)
-               xflags |= ATTR_REPLACE;
-
-       if (!value)
-               return -xfs_attr_remove(ip, (unsigned char *)name, xflags);
-       return -xfs_attr_set(ip, (unsigned char *)name,
-                               (void *)value, size, xflags);
-}
-
-static const struct xattr_handler xfs_xattr_user_handler = {
-       .prefix = XATTR_USER_PREFIX,
-       .flags  = 0, /* no flags implies user namespace */
-       .get    = xfs_xattr_get,
-       .set    = xfs_xattr_set,
-};
-
-static const struct xattr_handler xfs_xattr_trusted_handler = {
-       .prefix = XATTR_TRUSTED_PREFIX,
-       .flags  = ATTR_ROOT,
-       .get    = xfs_xattr_get,
-       .set    = xfs_xattr_set,
-};
-
-static const struct xattr_handler xfs_xattr_security_handler = {
-       .prefix = XATTR_SECURITY_PREFIX,
-       .flags  = ATTR_SECURE,
-       .get    = xfs_xattr_get,
-       .set    = xfs_xattr_set,
-};
-
-const struct xattr_handler *xfs_xattr_handlers[] = {
-       &xfs_xattr_user_handler,
-       &xfs_xattr_trusted_handler,
-       &xfs_xattr_security_handler,
-#ifdef CONFIG_XFS_POSIX_ACL
-       &xfs_xattr_acl_access_handler,
-       &xfs_xattr_acl_default_handler,
-#endif
-       NULL
-};
-
-static unsigned int xfs_xattr_prefix_len(int flags)
-{
-       if (flags & XFS_ATTR_SECURE)
-               return sizeof("security");
-       else if (flags & XFS_ATTR_ROOT)
-               return sizeof("trusted");
-       else
-               return sizeof("user");
-}
-
-static const char *xfs_xattr_prefix(int flags)
-{
-       if (flags & XFS_ATTR_SECURE)
-               return xfs_xattr_security_handler.prefix;
-       else if (flags & XFS_ATTR_ROOT)
-               return xfs_xattr_trusted_handler.prefix;
-       else
-               return xfs_xattr_user_handler.prefix;
-}
-
-static int
-xfs_xattr_put_listent(
-       struct xfs_attr_list_context *context,
-       int             flags,
-       unsigned char   *name,
-       int             namelen,
-       int             valuelen,
-       unsigned char   *value)
-{
-       unsigned int prefix_len = xfs_xattr_prefix_len(flags);
-       char *offset;
-       int arraytop;
-
-       ASSERT(context->count >= 0);
-
-       /*
-        * Only show root namespace entries if we are actually allowed to
-        * see them.
-        */
-       if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN))
-               return 0;
-
-       arraytop = context->count + prefix_len + namelen + 1;
-       if (arraytop > context->firstu) {
-               context->count = -1;    /* insufficient space */
-               return 1;
-       }
-       offset = (char *)context->alist + context->count;
-       strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
-       offset += prefix_len;
-       strncpy(offset, (char *)name, namelen);                 /* real name */
-       offset += namelen;
-       *offset = '\0';
-       context->count += prefix_len + namelen + 1;
-       return 0;
-}
-
-static int
-xfs_xattr_put_listent_sizes(
-       struct xfs_attr_list_context *context,
-       int             flags,
-       unsigned char   *name,
-       int             namelen,
-       int             valuelen,
-       unsigned char   *value)
-{
-       context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
-       return 0;
-}
-
-static int
-list_one_attr(const char *name, const size_t len, void *data,
-               size_t size, ssize_t *result)
-{
-       char *p = data + *result;
-
-       *result += len;
-       if (!size)
-               return 0;
-       if (*result > size)
-               return -ERANGE;
-
-       strcpy(p, name);
-       return 0;
-}
-
-ssize_t
-xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
-{
-       struct xfs_attr_list_context context;
-       struct attrlist_cursor_kern cursor = { 0 };
-       struct inode            *inode = dentry->d_inode;
-       int                     error;
-
-       /*
-        * First read the regular on-disk attributes.
-        */
-       memset(&context, 0, sizeof(context));
-       context.dp = XFS_I(inode);
-       context.cursor = &cursor;
-       context.resynch = 1;
-       context.alist = data;
-       context.bufsize = size;
-       context.firstu = context.bufsize;
-
-       if (size)
-               context.put_listent = xfs_xattr_put_listent;
-       else
-               context.put_listent = xfs_xattr_put_listent_sizes;
-
-       xfs_attr_list_int(&context);
-       if (context.count < 0)
-               return -ERANGE;
-
-       /*
-        * Then add the two synthetic ACL attributes.
-        */
-       if (posix_acl_access_exists(inode)) {
-               error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
-                               strlen(POSIX_ACL_XATTR_ACCESS) + 1,
-                               data, size, &context.count);
-               if (error)
-                       return error;
-       }
-
-       if (posix_acl_default_exists(inode)) {
-               error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
-                               strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
-                               data, size, &context.count);
-               if (error)
-                       return error;
-       }
-
-       return context.count;
-}
diff --git a/fs/xfs/mrlock.h b/fs/xfs/mrlock.h

new file mode 100644 (file)

index 0000000..ff6a198
--- /dev/null
+++ b/fs/xfs/mrlock.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SUPPORT_MRLOCK_H__
+#define __XFS_SUPPORT_MRLOCK_H__
+
+#include <linux/rwsem.h>
+
+typedef struct {
+       struct rw_semaphore     mr_lock;
+#ifdef DEBUG
+       int                     mr_writer;
+#endif
+} mrlock_t;
+
+#ifdef DEBUG
+#define mrinit(mrp, name)      \
+       do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0)
+#else
+#define mrinit(mrp, name)      \
+       do { init_rwsem(&(mrp)->mr_lock); } while (0)
+#endif
+
+#define mrlock_init(mrp, t,n,s)        mrinit(mrp, n)
+#define mrfree(mrp)            do { } while (0)
+
+static inline void mraccess_nested(mrlock_t *mrp, int subclass)
+{
+       down_read_nested(&mrp->mr_lock, subclass);
+}
+
+static inline void mrupdate_nested(mrlock_t *mrp, int subclass)
+{
+       down_write_nested(&mrp->mr_lock, subclass);
+#ifdef DEBUG
+       mrp->mr_writer = 1;
+#endif
+}
+
+static inline int mrtryaccess(mrlock_t *mrp)
+{
+       return down_read_trylock(&mrp->mr_lock);
+}
+
+static inline int mrtryupdate(mrlock_t *mrp)
+{
+       if (!down_write_trylock(&mrp->mr_lock))
+               return 0;
+#ifdef DEBUG
+       mrp->mr_writer = 1;
+#endif
+       return 1;
+}
+
+static inline void mrunlock_excl(mrlock_t *mrp)
+{
+#ifdef DEBUG
+       mrp->mr_writer = 0;
+#endif
+       up_write(&mrp->mr_lock);
+}
+
+static inline void mrunlock_shared(mrlock_t *mrp)
+{
+       up_read(&mrp->mr_lock);
+}
+
+static inline void mrdemote(mrlock_t *mrp)
+{
+#ifdef DEBUG
+       mrp->mr_writer = 0;
+#endif
+       downgrade_write(&mrp->mr_lock);
+}
+
+#endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c

deleted file mode 100644 (file)

index 837f311..0000000
--- a/fs/xfs/quota/xfs_dquot.c
+++ /dev/null
@@ -1,1458 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_space.h"
-#include "xfs_trans_priv.h"
-#include "xfs_qm.h"
-#include "xfs_trace.h"
-
-
-/*
-   LOCK ORDER
-
-   inode lock              (ilock)
-   dquot hash-chain lock    (hashlock)
-   xqm dquot freelist lock  (freelistlock
-   mount's dquot list lock  (mplistlock)
-   user dquot lock - lock ordering among dquots is based on the uid or gid
-   group dquot lock - similar to udquots. Between the two dquots, the udquot
-                     has to be locked first.
-   pin lock - the dquot lock must be held to take this lock.
-   flush lock - ditto.
-*/
-
-#ifdef DEBUG
-xfs_buftarg_t *xfs_dqerror_target;
-int xfs_do_dqerror;
-int xfs_dqreq_num;
-int xfs_dqerror_mod = 33;
-#endif
-
-static struct lock_class_key xfs_dquot_other_class;
-
-/*
- * Allocate and initialize a dquot. We don't always allocate fresh memory;
- * we try to reclaim a free dquot if the number of incore dquots are above
- * a threshold.
- * The only field inside the core that gets initialized at this point
- * is the d_id field. The idea is to fill in the entire q_core
- * when we read in the on disk dquot.
- */
-STATIC xfs_dquot_t *
-xfs_qm_dqinit(
-       xfs_mount_t  *mp,
-       xfs_dqid_t   id,
-       uint         type)
-{
-       xfs_dquot_t     *dqp;
-       boolean_t       brandnewdquot;
-
-       brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
-       dqp->dq_flags = type;
-       dqp->q_core.d_id = cpu_to_be32(id);
-       dqp->q_mount = mp;
-
-       /*
-        * No need to re-initialize these if this is a reclaimed dquot.
-        */
-       if (brandnewdquot) {
-               INIT_LIST_HEAD(&dqp->q_freelist);
-               mutex_init(&dqp->q_qlock);
-               init_waitqueue_head(&dqp->q_pinwait);
-
-               /*
-                * Because we want to use a counting completion, complete
-                * the flush completion once to allow a single access to
-                * the flush completion without blocking.
-                */
-               init_completion(&dqp->q_flush);
-               complete(&dqp->q_flush);
-
-               trace_xfs_dqinit(dqp);
-       } else {
-               /*
-                * Only the q_core portion was zeroed in dqreclaim_one().
-                * So, we need to reset others.
-                */
-               dqp->q_nrefs = 0;
-               dqp->q_blkno = 0;
-               INIT_LIST_HEAD(&dqp->q_mplist);
-               INIT_LIST_HEAD(&dqp->q_hashlist);
-               dqp->q_bufoffset = 0;
-               dqp->q_fileoffset = 0;
-               dqp->q_transp = NULL;
-               dqp->q_gdquot = NULL;
-               dqp->q_res_bcount = 0;
-               dqp->q_res_icount = 0;
-               dqp->q_res_rtbcount = 0;
-               atomic_set(&dqp->q_pincount, 0);
-               dqp->q_hash = NULL;
-               ASSERT(list_empty(&dqp->q_freelist));
-
-               trace_xfs_dqreuse(dqp);
-       }
-
-       /*
-        * In either case we need to make sure group quotas have a different
-        * lock class than user quotas, to make sure lockdep knows we can
-        * locks of one of each at the same time.
-        */
-       if (!(type & XFS_DQ_USER))
-               lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
-
-       /*
-        * log item gets initialized later
-        */
-       return (dqp);
-}
-
-/*
- * This is called to free all the memory associated with a dquot
- */
-void
-xfs_qm_dqdestroy(
-       xfs_dquot_t     *dqp)
-{
-       ASSERT(list_empty(&dqp->q_freelist));
-
-       mutex_destroy(&dqp->q_qlock);
-       kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
-
-       atomic_dec(&xfs_Gqm->qm_totaldquots);
-}
-
-/*
- * This is what a 'fresh' dquot inside a dquot chunk looks like on disk.
- */
-STATIC void
-xfs_qm_dqinit_core(
-       xfs_dqid_t      id,
-       uint            type,
-       xfs_dqblk_t     *d)
-{
-       /*
-        * Caller has zero'd the entire dquot 'chunk' already.
-        */
-       d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
-       d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
-       d->dd_diskdq.d_id = cpu_to_be32(id);
-       d->dd_diskdq.d_flags = type;
-}
-
-/*
- * If default limits are in force, push them into the dquot now.
- * We overwrite the dquot limits only if they are zero and this
- * is not the root dquot.
- */
-void
-xfs_qm_adjust_dqlimits(
-       xfs_mount_t             *mp,
-       xfs_disk_dquot_t        *d)
-{
-       xfs_quotainfo_t         *q = mp->m_quotainfo;
-
-       ASSERT(d->d_id);
-
-       if (q->qi_bsoftlimit && !d->d_blk_softlimit)
-               d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit);
-       if (q->qi_bhardlimit && !d->d_blk_hardlimit)
-               d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit);
-       if (q->qi_isoftlimit && !d->d_ino_softlimit)
-               d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit);
-       if (q->qi_ihardlimit && !d->d_ino_hardlimit)
-               d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit);
-       if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit)
-               d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit);
-       if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
-               d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit);
-}
-
-/*
- * Check the limits and timers of a dquot and start or reset timers
- * if necessary.
- * This gets called even when quota enforcement is OFF, which makes our
- * life a little less complicated. (We just don't reject any quota
- * reservations in that case, when enforcement is off).
- * We also return 0 as the values of the timers in Q_GETQUOTA calls, when
- * enforcement's off.
- * In contrast, warnings are a little different in that they don't
- * 'automatically' get started when limits get exceeded.  They do
- * get reset to zero, however, when we find the count to be under
- * the soft limit (they are only ever set non-zero via userspace).
- */
-void
-xfs_qm_adjust_dqtimers(
-       xfs_mount_t             *mp,
-       xfs_disk_dquot_t        *d)
-{
-       ASSERT(d->d_id);
-
-#ifdef DEBUG
-       if (d->d_blk_hardlimit)
-               ASSERT(be64_to_cpu(d->d_blk_softlimit) <=
-                      be64_to_cpu(d->d_blk_hardlimit));
-       if (d->d_ino_hardlimit)
-               ASSERT(be64_to_cpu(d->d_ino_softlimit) <=
-                      be64_to_cpu(d->d_ino_hardlimit));
-       if (d->d_rtb_hardlimit)
-               ASSERT(be64_to_cpu(d->d_rtb_softlimit) <=
-                      be64_to_cpu(d->d_rtb_hardlimit));
-#endif
-
-       if (!d->d_btimer) {
-               if ((d->d_blk_softlimit &&
-                    (be64_to_cpu(d->d_bcount) >=
-                     be64_to_cpu(d->d_blk_softlimit))) ||
-                   (d->d_blk_hardlimit &&
-                    (be64_to_cpu(d->d_bcount) >=
-                     be64_to_cpu(d->d_blk_hardlimit)))) {
-                       d->d_btimer = cpu_to_be32(get_seconds() +
-                                       mp->m_quotainfo->qi_btimelimit);
-               } else {
-                       d->d_bwarns = 0;
-               }
-       } else {
-               if ((!d->d_blk_softlimit ||
-                    (be64_to_cpu(d->d_bcount) <
-                     be64_to_cpu(d->d_blk_softlimit))) &&
-                   (!d->d_blk_hardlimit ||
-                   (be64_to_cpu(d->d_bcount) <
-                    be64_to_cpu(d->d_blk_hardlimit)))) {
-                       d->d_btimer = 0;
-               }
-       }
-
-       if (!d->d_itimer) {
-               if ((d->d_ino_softlimit &&
-                    (be64_to_cpu(d->d_icount) >=
-                     be64_to_cpu(d->d_ino_softlimit))) ||
-                   (d->d_ino_hardlimit &&
-                    (be64_to_cpu(d->d_icount) >=
-                     be64_to_cpu(d->d_ino_hardlimit)))) {
-                       d->d_itimer = cpu_to_be32(get_seconds() +
-                                       mp->m_quotainfo->qi_itimelimit);
-               } else {
-                       d->d_iwarns = 0;
-               }
-       } else {
-               if ((!d->d_ino_softlimit ||
-                    (be64_to_cpu(d->d_icount) <
-                     be64_to_cpu(d->d_ino_softlimit)))  &&
-                   (!d->d_ino_hardlimit ||
-                    (be64_to_cpu(d->d_icount) <
-                     be64_to_cpu(d->d_ino_hardlimit)))) {
-                       d->d_itimer = 0;
-               }
-       }
-
-       if (!d->d_rtbtimer) {
-               if ((d->d_rtb_softlimit &&
-                    (be64_to_cpu(d->d_rtbcount) >=
-                     be64_to_cpu(d->d_rtb_softlimit))) ||
-                   (d->d_rtb_hardlimit &&
-                    (be64_to_cpu(d->d_rtbcount) >=
-                     be64_to_cpu(d->d_rtb_hardlimit)))) {
-                       d->d_rtbtimer = cpu_to_be32(get_seconds() +
-                                       mp->m_quotainfo->qi_rtbtimelimit);
-               } else {
-                       d->d_rtbwarns = 0;
-               }
-       } else {
-               if ((!d->d_rtb_softlimit ||
-                    (be64_to_cpu(d->d_rtbcount) <
-                     be64_to_cpu(d->d_rtb_softlimit))) &&
-                   (!d->d_rtb_hardlimit ||
-                    (be64_to_cpu(d->d_rtbcount) <
-                     be64_to_cpu(d->d_rtb_hardlimit)))) {
-                       d->d_rtbtimer = 0;
-               }
-       }
-}
-
-/*
- * initialize a buffer full of dquots and log the whole thing
- */
-STATIC void
-xfs_qm_init_dquot_blk(
-       xfs_trans_t     *tp,
-       xfs_mount_t     *mp,
-       xfs_dqid_t      id,
-       uint            type,
-       xfs_buf_t       *bp)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       xfs_dqblk_t     *d;
-       int             curid, i;
-
-       ASSERT(tp);
-       ASSERT(XFS_BUF_ISBUSY(bp));
-       ASSERT(xfs_buf_islocked(bp));
-
-       d = (xfs_dqblk_t *)XFS_BUF_PTR(bp);
-
-       /*
-        * ID of the first dquot in the block - id's are zero based.
-        */
-       curid = id - (id % q->qi_dqperchunk);
-       ASSERT(curid >= 0);
-       memset(d, 0, BBTOB(q->qi_dqchunklen));
-       for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++)
-               xfs_qm_dqinit_core(curid, type, d);
-       xfs_trans_dquot_buf(tp, bp,
-                           (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
-                           ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
-                            XFS_BLF_GDQUOT_BUF)));
-       xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
-}
-
-
-
-/*
- * Allocate a block and fill it with dquots.
- * This is called when the bmapi finds a hole.
- */
-STATIC int
-xfs_qm_dqalloc(
-       xfs_trans_t     **tpp,
-       xfs_mount_t     *mp,
-       xfs_dquot_t     *dqp,
-       xfs_inode_t     *quotip,
-       xfs_fileoff_t   offset_fsb,
-       xfs_buf_t       **O_bpp)
-{
-       xfs_fsblock_t   firstblock;
-       xfs_bmap_free_t flist;
-       xfs_bmbt_irec_t map;
-       int             nmaps, error, committed;
-       xfs_buf_t       *bp;
-       xfs_trans_t     *tp = *tpp;
-
-       ASSERT(tp != NULL);
-
-       trace_xfs_dqalloc(dqp);
-
-       /*
-        * Initialize the bmap freelist prior to calling bmapi code.
-        */
-       xfs_bmap_init(&flist, &firstblock);
-       xfs_ilock(quotip, XFS_ILOCK_EXCL);
-       /*
-        * Return if this type of quotas is turned off while we didn't
-        * have an inode lock
-        */
-       if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
-               xfs_iunlock(quotip, XFS_ILOCK_EXCL);
-               return (ESRCH);
-       }
-
-       xfs_trans_ijoin_ref(tp, quotip, XFS_ILOCK_EXCL);
-       nmaps = 1;
-       if ((error = xfs_bmapi(tp, quotip,
-                             offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB,
-                             XFS_BMAPI_METADATA | XFS_BMAPI_WRITE,
-                             &firstblock,
-                             XFS_QM_DQALLOC_SPACE_RES(mp),
-                             &map, &nmaps, &flist))) {
-               goto error0;
-       }
-       ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
-       ASSERT(nmaps == 1);
-       ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
-              (map.br_startblock != HOLESTARTBLOCK));
-
-       /*
-        * Keep track of the blkno to save a lookup later
-        */
-       dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
-
-       /* now we can just get the buffer (there's nothing to read yet) */
-       bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
-                              dqp->q_blkno,
-                              mp->m_quotainfo->qi_dqchunklen,
-                              0);
-       if (!bp || (error = XFS_BUF_GETERROR(bp)))
-               goto error1;
-       /*
-        * Make a chunk of dquots out of this buffer and log
-        * the entire thing.
-        */
-       xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id),
-                             dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
-
-       /*
-        * xfs_bmap_finish() may commit the current transaction and
-        * start a second transaction if the freelist is not empty.
-        *
-        * Since we still want to modify this buffer, we need to
-        * ensure that the buffer is not released on commit of
-        * the first transaction and ensure the buffer is added to the
-        * second transaction.
-        *
-        * If there is only one transaction then don't stop the buffer
-        * from being released when it commits later on.
-        */
-
-       xfs_trans_bhold(tp, bp);
-
-       if ((error = xfs_bmap_finish(tpp, &flist, &committed))) {
-               goto error1;
-       }
-
-       if (committed) {
-               tp = *tpp;
-               xfs_trans_bjoin(tp, bp);
-       } else {
-               xfs_trans_bhold_release(tp, bp);
-       }
-
-       *O_bpp = bp;
-       return 0;
-
-      error1:
-       xfs_bmap_cancel(&flist);
-      error0:
-       xfs_iunlock(quotip, XFS_ILOCK_EXCL);
-
-       return (error);
-}
-
-/*
- * Maps a dquot to the buffer containing its on-disk version.
- * This returns a ptr to the buffer containing the on-disk dquot
- * in the bpp param, and a ptr to the on-disk dquot within that buffer
- */
-STATIC int
-xfs_qm_dqtobp(
-       xfs_trans_t             **tpp,
-       xfs_dquot_t             *dqp,
-       xfs_disk_dquot_t        **O_ddpp,
-       xfs_buf_t               **O_bpp,
-       uint                    flags)
-{
-       xfs_bmbt_irec_t map;
-       int             nmaps = 1, error;
-       xfs_buf_t       *bp;
-       xfs_inode_t     *quotip = XFS_DQ_TO_QIP(dqp);
-       xfs_mount_t     *mp = dqp->q_mount;
-       xfs_disk_dquot_t *ddq;
-       xfs_dqid_t      id = be32_to_cpu(dqp->q_core.d_id);
-       xfs_trans_t     *tp = (tpp ? *tpp : NULL);
-
-       dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
-
-       xfs_ilock(quotip, XFS_ILOCK_SHARED);
-       if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
-               /*
-                * Return if this type of quotas is turned off while we
-                * didn't have the quota inode lock.
-                */
-               xfs_iunlock(quotip, XFS_ILOCK_SHARED);
-               return ESRCH;
-       }
-
-       /*
-        * Find the block map; no allocations yet
-        */
-       error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
-                         XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
-                         NULL, 0, &map, &nmaps, NULL);
-
-       xfs_iunlock(quotip, XFS_ILOCK_SHARED);
-       if (error)
-               return error;
-
-       ASSERT(nmaps == 1);
-       ASSERT(map.br_blockcount == 1);
-
-       /*
-        * Offset of dquot in the (fixed sized) dquot chunk.
-        */
-       dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
-               sizeof(xfs_dqblk_t);
-
-       ASSERT(map.br_startblock != DELAYSTARTBLOCK);
-       if (map.br_startblock == HOLESTARTBLOCK) {
-               /*
-                * We don't allocate unless we're asked to
-                */
-               if (!(flags & XFS_QMOPT_DQALLOC))
-                       return ENOENT;
-
-               ASSERT(tp);
-               error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
-                                       dqp->q_fileoffset, &bp);
-               if (error)
-                       return error;
-               tp = *tpp;
-       } else {
-               trace_xfs_dqtobp_read(dqp);
-
-               /*
-                * store the blkno etc so that we don't have to do the
-                * mapping all the time
-                */
-               dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
-
-               error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
-                                          dqp->q_blkno,
-                                          mp->m_quotainfo->qi_dqchunklen,
-                                          0, &bp);
-               if (error || !bp)
-                       return XFS_ERROR(error);
-       }
-
-       ASSERT(XFS_BUF_ISBUSY(bp));
-       ASSERT(xfs_buf_islocked(bp));
-
-       /*
-        * calculate the location of the dquot inside the buffer.
-        */
-       ddq = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset);
-
-       /*
-        * A simple sanity check in case we got a corrupted dquot...
-        */
-       error = xfs_qm_dqcheck(mp, ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES,
-                          flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN),
-                          "dqtobp");
-       if (error) {
-               if (!(flags & XFS_QMOPT_DQREPAIR)) {
-                       xfs_trans_brelse(tp, bp);
-                       return XFS_ERROR(EIO);
-               }
-               XFS_BUF_BUSY(bp); /* We dirtied this */
-       }
-
-       *O_bpp = bp;
-       *O_ddpp = ddq;
-
-       return (0);
-}
-
-
-/*
- * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
- * and release the buffer immediately.
- *
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_dqread(
-       xfs_trans_t     **tpp,
-       xfs_dqid_t      id,
-       xfs_dquot_t     *dqp,   /* dquot to get filled in */
-       uint            flags)
-{
-       xfs_disk_dquot_t *ddqp;
-       xfs_buf_t        *bp;
-       int              error;
-       xfs_trans_t      *tp;
-
-       ASSERT(tpp);
-
-       trace_xfs_dqread(dqp);
-
-       /*
-        * get a pointer to the on-disk dquot and the buffer containing it
-        * dqp already knows its own type (GROUP/USER).
-        */
-       if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) {
-               return (error);
-       }
-       tp = *tpp;
-
-       /* copy everything from disk dquot to the incore dquot */
-       memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
-       ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
-       xfs_qm_dquot_logitem_init(dqp);
-
-       /*
-        * Reservation counters are defined as reservation plus current usage
-        * to avoid having to add every time.
-        */
-       dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount);
-       dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
-       dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
-
-       /* Mark the buf so that this will stay incore a little longer */
-       XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF);
-
-       /*
-        * We got the buffer with a xfs_trans_read_buf() (in dqtobp())
-        * So we need to release with xfs_trans_brelse().
-        * The strategy here is identical to that of inodes; we lock
-        * the dquot in xfs_qm_dqget() before making it accessible to
-        * others. This is because dquots, like inodes, need a good level of
-        * concurrency, and we don't want to take locks on the entire buffers
-        * for dquot accesses.
-        * Note also that the dquot buffer may even be dirty at this point, if
-        * this particular dquot was repaired. We still aren't afraid to
-        * brelse it because we have the changes incore.
-        */
-       ASSERT(XFS_BUF_ISBUSY(bp));
-       ASSERT(xfs_buf_islocked(bp));
-       xfs_trans_brelse(tp, bp);
-
-       return (error);
-}
-
-
-/*
- * allocate an incore dquot from the kernel heap,
- * and fill its core with quota information kept on disk.
- * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk
- * if it wasn't already allocated.
- */
-STATIC int
-xfs_qm_idtodq(
-       xfs_mount_t     *mp,
-       xfs_dqid_t      id,      /* gid or uid, depending on type */
-       uint            type,    /* UDQUOT or GDQUOT */
-       uint            flags,   /* DQALLOC, DQREPAIR */
-       xfs_dquot_t     **O_dqpp)/* OUT : incore dquot, not locked */
-{
-       xfs_dquot_t     *dqp;
-       int             error;
-       xfs_trans_t     *tp;
-       int             cancelflags=0;
-
-       dqp = xfs_qm_dqinit(mp, id, type);
-       tp = NULL;
-       if (flags & XFS_QMOPT_DQALLOC) {
-               tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
-               error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
-                               XFS_WRITE_LOG_RES(mp) +
-                               BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 +
-                               128,
-                               0,
-                               XFS_TRANS_PERM_LOG_RES,
-                               XFS_WRITE_LOG_COUNT);
-               if (error) {
-                       cancelflags = 0;
-                       goto error0;
-               }
-               cancelflags = XFS_TRANS_RELEASE_LOG_RES;
-       }
-
-       /*
-        * Read it from disk; xfs_dqread() takes care of
-        * all the necessary initialization of dquot's fields (locks, etc)
-        */
-       if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) {
-               /*
-                * This can happen if quotas got turned off (ESRCH),
-                * or if the dquot didn't exist on disk and we ask to
-                * allocate (ENOENT).
-                */
-               trace_xfs_dqread_fail(dqp);
-               cancelflags |= XFS_TRANS_ABORT;
-               goto error0;
-       }
-       if (tp) {
-               if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES)))
-                       goto error1;
-       }
-
-       *O_dqpp = dqp;
-       return (0);
-
- error0:
-       ASSERT(error);
-       if (tp)
-               xfs_trans_cancel(tp, cancelflags);
- error1:
-       xfs_qm_dqdestroy(dqp);
-       *O_dqpp = NULL;
-       return (error);
-}
-
-/*
- * Lookup a dquot in the incore dquot hashtable. We keep two separate
- * hashtables for user and group dquots; and, these are global tables
- * inside the XQM, not per-filesystem tables.
- * The hash chain must be locked by caller, and it is left locked
- * on return. Returning dquot is locked.
- */
-STATIC int
-xfs_qm_dqlookup(
-       xfs_mount_t             *mp,
-       xfs_dqid_t              id,
-       xfs_dqhash_t            *qh,
-       xfs_dquot_t             **O_dqpp)
-{
-       xfs_dquot_t             *dqp;
-       uint                    flist_locked;
-
-       ASSERT(mutex_is_locked(&qh->qh_lock));
-
-       flist_locked = B_FALSE;
-
-       /*
-        * Traverse the hashchain looking for a match
-        */
-       list_for_each_entry(dqp, &qh->qh_list, q_hashlist) {
-               /*
-                * We already have the hashlock. We don't need the
-                * dqlock to look at the id field of the dquot, since the
-                * id can't be modified without the hashlock anyway.
-                */
-               if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) {
-                       trace_xfs_dqlookup_found(dqp);
-
-                       /*
-                        * All in core dquots must be on the dqlist of mp
-                        */
-                       ASSERT(!list_empty(&dqp->q_mplist));
-
-                       xfs_dqlock(dqp);
-                       if (dqp->q_nrefs == 0) {
-                               ASSERT(!list_empty(&dqp->q_freelist));
-                               if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
-                                       trace_xfs_dqlookup_want(dqp);
-
-                                       /*
-                                        * We may have raced with dqreclaim_one()
-                                        * (and lost). So, flag that we don't
-                                        * want the dquot to be reclaimed.
-                                        */
-                                       dqp->dq_flags |= XFS_DQ_WANT;
-                                       xfs_dqunlock(dqp);
-                                       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-                                       xfs_dqlock(dqp);
-                                       dqp->dq_flags &= ~(XFS_DQ_WANT);
-                               }
-                               flist_locked = B_TRUE;
-                       }
-
-                       /*
-                        * id couldn't have changed; we had the hashlock all
-                        * along
-                        */
-                       ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
-
-                       if (flist_locked) {
-                               if (dqp->q_nrefs != 0) {
-                                       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-                                       flist_locked = B_FALSE;
-                               } else {
-                                       /* take it off the freelist */
-                                       trace_xfs_dqlookup_freelist(dqp);
-                                       list_del_init(&dqp->q_freelist);
-                                       xfs_Gqm->qm_dqfrlist_cnt--;
-                               }
-                       }
-
-                       XFS_DQHOLD(dqp);
-
-                       if (flist_locked)
-                               mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-                       /*
-                        * move the dquot to the front of the hashchain
-                        */
-                       ASSERT(mutex_is_locked(&qh->qh_lock));
-                       list_move(&dqp->q_hashlist, &qh->qh_list);
-                       trace_xfs_dqlookup_done(dqp);
-                       *O_dqpp = dqp;
-                       return 0;
-               }
-       }
-
-       *O_dqpp = NULL;
-       ASSERT(mutex_is_locked(&qh->qh_lock));
-       return (1);
-}
-
-/*
- * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
- * a locked dquot, doing an allocation (if requested) as needed.
- * When both an inode and an id are given, the inode's id takes precedence.
- * That is, if the id changes while we don't hold the ilock inside this
- * function, the new dquot is returned, not necessarily the one requested
- * in the id argument.
- */
-int
-xfs_qm_dqget(
-       xfs_mount_t     *mp,
-       xfs_inode_t     *ip,      /* locked inode (optional) */
-       xfs_dqid_t      id,       /* uid/projid/gid depending on type */
-       uint            type,     /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */
-       uint            flags,    /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
-       xfs_dquot_t     **O_dqpp) /* OUT : locked incore dquot */
-{
-       xfs_dquot_t     *dqp;
-       xfs_dqhash_t    *h;
-       uint            version;
-       int             error;
-
-       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-       if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
-           (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
-           (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
-               return (ESRCH);
-       }
-       h = XFS_DQ_HASH(mp, id, type);
-
-#ifdef DEBUG
-       if (xfs_do_dqerror) {
-               if ((xfs_dqerror_target == mp->m_ddev_targp) &&
-                   (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
-                       xfs_debug(mp, "Returning error in dqget");
-                       return (EIO);
-               }
-       }
-#endif
-
- again:
-
-#ifdef DEBUG
-       ASSERT(type == XFS_DQ_USER ||
-              type == XFS_DQ_PROJ ||
-              type == XFS_DQ_GROUP);
-       if (ip) {
-               ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-               if (type == XFS_DQ_USER)
-                       ASSERT(ip->i_udquot == NULL);
-               else
-                       ASSERT(ip->i_gdquot == NULL);
-       }
-#endif
-       mutex_lock(&h->qh_lock);
-
-       /*
-        * Look in the cache (hashtable).
-        * The chain is kept locked during lookup.
-        */
-       if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) {
-               XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
-               /*
-                * The dquot was found, moved to the front of the chain,
-                * taken off the freelist if it was on it, and locked
-                * at this point. Just unlock the hashchain and return.
-                */
-               ASSERT(*O_dqpp);
-               ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
-               mutex_unlock(&h->qh_lock);
-               trace_xfs_dqget_hit(*O_dqpp);
-               return (0);     /* success */
-       }
-       XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
-
-       /*
-        * Dquot cache miss. We don't want to keep the inode lock across
-        * a (potential) disk read. Also we don't want to deal with the lock
-        * ordering between quotainode and this inode. OTOH, dropping the inode
-        * lock here means dealing with a chown that can happen before
-        * we re-acquire the lock.
-        */
-       if (ip)
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       /*
-        * Save the hashchain version stamp, and unlock the chain, so that
-        * we don't keep the lock across a disk read
-        */
-       version = h->qh_version;
-       mutex_unlock(&h->qh_lock);
-
-       /*
-        * Allocate the dquot on the kernel heap, and read the ondisk
-        * portion off the disk. Also, do all the necessary initialization
-        * This can return ENOENT if dquot didn't exist on disk and we didn't
-        * ask it to allocate; ESRCH if quotas got turned off suddenly.
-        */
-       if ((error = xfs_qm_idtodq(mp, id, type,
-                                 flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR|
-                                          XFS_QMOPT_DOWARN),
-                                 &dqp))) {
-               if (ip)
-                       xfs_ilock(ip, XFS_ILOCK_EXCL);
-               return (error);
-       }
-
-       /*
-        * See if this is mount code calling to look at the overall quota limits
-        * which are stored in the id == 0 user or group's dquot.
-        * Since we may not have done a quotacheck by this point, just return
-        * the dquot without attaching it to any hashtables, lists, etc, or even
-        * taking a reference.
-        * The caller must dqdestroy this once done.
-        */
-       if (flags & XFS_QMOPT_DQSUSER) {
-               ASSERT(id == 0);
-               ASSERT(! ip);
-               goto dqret;
-       }
-
-       /*
-        * Dquot lock comes after hashlock in the lock ordering
-        */
-       if (ip) {
-               xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-               /*
-                * A dquot could be attached to this inode by now, since
-                * we had dropped the ilock.
-                */
-               if (type == XFS_DQ_USER) {
-                       if (!XFS_IS_UQUOTA_ON(mp)) {
-                               /* inode stays locked on return */
-                               xfs_qm_dqdestroy(dqp);
-                               return XFS_ERROR(ESRCH);
-                       }
-                       if (ip->i_udquot) {
-                               xfs_qm_dqdestroy(dqp);
-                               dqp = ip->i_udquot;
-                               xfs_dqlock(dqp);
-                               goto dqret;
-                       }
-               } else {
-                       if (!XFS_IS_OQUOTA_ON(mp)) {
-                               /* inode stays locked on return */
-                               xfs_qm_dqdestroy(dqp);
-                               return XFS_ERROR(ESRCH);
-                       }
-                       if (ip->i_gdquot) {
-                               xfs_qm_dqdestroy(dqp);
-                               dqp = ip->i_gdquot;
-                               xfs_dqlock(dqp);
-                               goto dqret;
-                       }
-               }
-       }
-
-       /*
-        * Hashlock comes after ilock in lock order
-        */
-       mutex_lock(&h->qh_lock);
-       if (version != h->qh_version) {
-               xfs_dquot_t *tmpdqp;
-               /*
-                * Now, see if somebody else put the dquot in the
-                * hashtable before us. This can happen because we didn't
-                * keep the hashchain lock. We don't have to worry about
-                * lock order between the two dquots here since dqp isn't
-                * on any findable lists yet.
-                */
-               if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) {
-                       /*
-                        * Duplicate found. Just throw away the new dquot
-                        * and start over.
-                        */
-                       xfs_qm_dqput(tmpdqp);
-                       mutex_unlock(&h->qh_lock);
-                       xfs_qm_dqdestroy(dqp);
-                       XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
-                       goto again;
-               }
-       }
-
-       /*
-        * Put the dquot at the beginning of the hash-chain and mp's list
-        * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
-        */
-       ASSERT(mutex_is_locked(&h->qh_lock));
-       dqp->q_hash = h;
-       list_add(&dqp->q_hashlist, &h->qh_list);
-       h->qh_version++;
-
-       /*
-        * Attach this dquot to this filesystem's list of all dquots,
-        * kept inside the mount structure in m_quotainfo field
-        */
-       mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
-
-       /*
-        * We return a locked dquot to the caller, with a reference taken
-        */
-       xfs_dqlock(dqp);
-       dqp->q_nrefs = 1;
-
-       list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist);
-       mp->m_quotainfo->qi_dquots++;
-       mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
-       mutex_unlock(&h->qh_lock);
- dqret:
-       ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       trace_xfs_dqget_miss(dqp);
-       *O_dqpp = dqp;
-       return (0);
-}
-
-
-/*
- * Release a reference to the dquot (decrement ref-count)
- * and unlock it. If there is a group quota attached to this
- * dquot, carefully release that too without tripping over
- * deadlocks'n'stuff.
- */
-void
-xfs_qm_dqput(
-       xfs_dquot_t     *dqp)
-{
-       xfs_dquot_t     *gdqp;
-
-       ASSERT(dqp->q_nrefs > 0);
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
-       trace_xfs_dqput(dqp);
-
-       if (dqp->q_nrefs != 1) {
-               dqp->q_nrefs--;
-               xfs_dqunlock(dqp);
-               return;
-       }
-
-       /*
-        * drop the dqlock and acquire the freelist and dqlock
-        * in the right order; but try to get it out-of-order first
-        */
-       if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
-               trace_xfs_dqput_wait(dqp);
-               xfs_dqunlock(dqp);
-               mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-               xfs_dqlock(dqp);
-       }
-
-       while (1) {
-               gdqp = NULL;
-
-               /* We can't depend on nrefs being == 1 here */
-               if (--dqp->q_nrefs == 0) {
-                       trace_xfs_dqput_free(dqp);
-
-                       list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
-                       xfs_Gqm->qm_dqfrlist_cnt++;
-
-                       /*
-                        * If we just added a udquot to the freelist, then
-                        * we want to release the gdquot reference that
-                        * it (probably) has. Otherwise it'll keep the
-                        * gdquot from getting reclaimed.
-                        */
-                       if ((gdqp = dqp->q_gdquot)) {
-                               /*
-                                * Avoid a recursive dqput call
-                                */
-                               xfs_dqlock(gdqp);
-                               dqp->q_gdquot = NULL;
-                       }
-               }
-               xfs_dqunlock(dqp);
-
-               /*
-                * If we had a group quota inside the user quota as a hint,
-                * release it now.
-                */
-               if (! gdqp)
-                       break;
-               dqp = gdqp;
-       }
-       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-}
-
-/*
- * Release a dquot. Flush it if dirty, then dqput() it.
- * dquot must not be locked.
- */
-void
-xfs_qm_dqrele(
-       xfs_dquot_t     *dqp)
-{
-       if (!dqp)
-               return;
-
-       trace_xfs_dqrele(dqp);
-
-       xfs_dqlock(dqp);
-       /*
-        * We don't care to flush it if the dquot is dirty here.
-        * That will create stutters that we want to avoid.
-        * Instead we do a delayed write when we try to reclaim
-        * a dirty dquot. Also xfs_sync will take part of the burden...
-        */
-       xfs_qm_dqput(dqp);
-}
-
-/*
- * This is the dquot flushing I/O completion routine.  It is called
- * from interrupt level when the buffer containing the dquot is
- * flushed to disk.  It is responsible for removing the dquot logitem
- * from the AIL if it has not been re-logged, and unlocking the dquot's
- * flush lock. This behavior is very similar to that of inodes..
- */
-STATIC void
-xfs_qm_dqflush_done(
-       struct xfs_buf          *bp,
-       struct xfs_log_item     *lip)
-{
-       xfs_dq_logitem_t        *qip = (struct xfs_dq_logitem *)lip;
-       xfs_dquot_t             *dqp = qip->qli_dquot;
-       struct xfs_ail          *ailp = lip->li_ailp;
-
-       /*
-        * We only want to pull the item from the AIL if its
-        * location in the log has not changed since we started the flush.
-        * Thus, we only bother if the dquot's lsn has
-        * not changed. First we check the lsn outside the lock
-        * since it's cheaper, and then we recheck while
-        * holding the lock before removing the dquot from the AIL.
-        */
-       if ((lip->li_flags & XFS_LI_IN_AIL) &&
-           lip->li_lsn == qip->qli_flush_lsn) {
-
-               /* xfs_trans_ail_delete() drops the AIL lock. */
-               spin_lock(&ailp->xa_lock);
-               if (lip->li_lsn == qip->qli_flush_lsn)
-                       xfs_trans_ail_delete(ailp, lip);
-               else
-                       spin_unlock(&ailp->xa_lock);
-       }
-
-       /*
-        * Release the dq's flush lock since we're done with it.
-        */
-       xfs_dqfunlock(dqp);
-}
-
-/*
- * Write a modified dquot to disk.
- * The dquot must be locked and the flush lock too taken by caller.
- * The flush lock will not be unlocked until the dquot reaches the disk,
- * but the dquot is free to be unlocked and modified by the caller
- * in the interim. Dquot is still locked on return. This behavior is
- * identical to that of inodes.
- */
-int
-xfs_qm_dqflush(
-       xfs_dquot_t             *dqp,
-       uint                    flags)
-{
-       struct xfs_mount        *mp = dqp->q_mount;
-       struct xfs_buf          *bp;
-       struct xfs_disk_dquot   *ddqp;
-       int                     error;
-
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-       ASSERT(!completion_done(&dqp->q_flush));
-
-       trace_xfs_dqflush(dqp);
-
-       /*
-        * If not dirty, or it's pinned and we are not supposed to block, nada.
-        */
-       if (!XFS_DQ_IS_DIRTY(dqp) ||
-           (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) {
-               xfs_dqfunlock(dqp);
-               return 0;
-       }
-       xfs_qm_dqunpin_wait(dqp);
-
-       /*
-        * This may have been unpinned because the filesystem is shutting
-        * down forcibly. If that's the case we must not write this dquot
-        * to disk, because the log record didn't make it to disk!
-        */
-       if (XFS_FORCED_SHUTDOWN(mp)) {
-               dqp->dq_flags &= ~XFS_DQ_DIRTY;
-               xfs_dqfunlock(dqp);
-               return XFS_ERROR(EIO);
-       }
-
-       /*
-        * Get the buffer containing the on-disk dquot
-        */
-       error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
-                                  mp->m_quotainfo->qi_dqchunklen, 0, &bp);
-       if (error) {
-               ASSERT(error != ENOENT);
-               xfs_dqfunlock(dqp);
-               return error;
-       }
-
-       /*
-        * Calculate the location of the dquot inside the buffer.
-        */
-       ddqp = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset);
-
-       /*
-        * A simple sanity check in case we got a corrupted dquot..
-        */
-       error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
-                          XFS_QMOPT_DOWARN, "dqflush (incore copy)");
-       if (error) {
-               xfs_buf_relse(bp);
-               xfs_dqfunlock(dqp);
-               xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-               return XFS_ERROR(EIO);
-       }
-
-       /* This is the only portion of data that needs to persist */
-       memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
-
-       /*
-        * Clear the dirty field and remember the flush lsn for later use.
-        */
-       dqp->dq_flags &= ~XFS_DQ_DIRTY;
-
-       xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
-                                       &dqp->q_logitem.qli_item.li_lsn);
-
-       /*
-        * Attach an iodone routine so that we can remove this dquot from the
-        * AIL and release the flush lock once the dquot is synced to disk.
-        */
-       xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done,
-                                 &dqp->q_logitem.qli_item);
-
-       /*
-        * If the buffer is pinned then push on the log so we won't
-        * get stuck waiting in the write for too long.
-        */
-       if (XFS_BUF_ISPINNED(bp)) {
-               trace_xfs_dqflush_force(dqp);
-               xfs_log_force(mp, 0);
-       }
-
-       if (flags & SYNC_WAIT)
-               error = xfs_bwrite(mp, bp);
-       else
-               xfs_bdwrite(mp, bp);
-
-       trace_xfs_dqflush_done(dqp);
-
-       /*
-        * dqp is still locked, but caller is free to unlock it now.
-        */
-       return error;
-
-}
-
-int
-xfs_qm_dqlock_nowait(
-       xfs_dquot_t *dqp)
-{
-       return mutex_trylock(&dqp->q_qlock);
-}
-
-void
-xfs_dqlock(
-       xfs_dquot_t *dqp)
-{
-       mutex_lock(&dqp->q_qlock);
-}
-
-void
-xfs_dqunlock(
-       xfs_dquot_t *dqp)
-{
-       mutex_unlock(&(dqp->q_qlock));
-       if (dqp->q_logitem.qli_dquot == dqp) {
-               /* Once was dqp->q_mount, but might just have been cleared */
-               xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
-                                       (xfs_log_item_t*)&(dqp->q_logitem));
-       }
-}
-
-
-void
-xfs_dqunlock_nonotify(
-       xfs_dquot_t *dqp)
-{
-       mutex_unlock(&(dqp->q_qlock));
-}
-
-/*
- * Lock two xfs_dquot structures.
- *
- * To avoid deadlocks we always lock the quota structure with
- * the lowerd id first.
- */
-void
-xfs_dqlock2(
-       xfs_dquot_t     *d1,
-       xfs_dquot_t     *d2)
-{
-       if (d1 && d2) {
-               ASSERT(d1 != d2);
-               if (be32_to_cpu(d1->q_core.d_id) >
-                   be32_to_cpu(d2->q_core.d_id)) {
-                       mutex_lock(&d2->q_qlock);
-                       mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED);
-               } else {
-                       mutex_lock(&d1->q_qlock);
-                       mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED);
-               }
-       } else if (d1) {
-               mutex_lock(&d1->q_qlock);
-       } else if (d2) {
-               mutex_lock(&d2->q_qlock);
-       }
-}
-
-
-/*
- * Take a dquot out of the mount's dqlist as well as the hashlist.
- * This is called via unmount as well as quotaoff, and the purge
- * will always succeed unless there are soft (temp) references
- * outstanding.
- *
- * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
- * that we're returning! XXXsup - not cool.
- */
-/* ARGSUSED */
-int
-xfs_qm_dqpurge(
-       xfs_dquot_t     *dqp)
-{
-       xfs_dqhash_t    *qh = dqp->q_hash;
-       xfs_mount_t     *mp = dqp->q_mount;
-
-       ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
-       ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));
-
-       xfs_dqlock(dqp);
-       /*
-        * We really can't afford to purge a dquot that is
-        * referenced, because these are hard refs.
-        * It shouldn't happen in general because we went thru _all_ inodes in
-        * dqrele_all_inodes before calling this and didn't let the mountlock go.
-        * However it is possible that we have dquots with temporary
-        * references that are not attached to an inode. e.g. see xfs_setattr().
-        */
-       if (dqp->q_nrefs != 0) {
-               xfs_dqunlock(dqp);
-               mutex_unlock(&dqp->q_hash->qh_lock);
-               return (1);
-       }
-
-       ASSERT(!list_empty(&dqp->q_freelist));
-
-       /*
-        * If we're turning off quotas, we have to make sure that, for
-        * example, we don't delete quota disk blocks while dquots are
-        * in the process of getting written to those disk blocks.
-        * This dquot might well be on AIL, and we can't leave it there
-        * if we're turning off quotas. Basically, we need this flush
-        * lock, and are willing to block on it.
-        */
-       if (!xfs_dqflock_nowait(dqp)) {
-               /*
-                * Block on the flush lock after nudging dquot buffer,
-                * if it is incore.
-                */
-               xfs_qm_dqflock_pushbuf_wait(dqp);
-       }
-
-       /*
-        * XXXIf we're turning this type of quotas off, we don't care
-        * about the dirty metadata sitting in this dquot. OTOH, if
-        * we're unmounting, we do care, so we flush it and wait.
-        */
-       if (XFS_DQ_IS_DIRTY(dqp)) {
-               int     error;
-
-               /* dqflush unlocks dqflock */
-               /*
-                * Given that dqpurge is a very rare occurrence, it is OK
-                * that we're holding the hashlist and mplist locks
-                * across the disk write. But, ... XXXsup
-                *
-                * We don't care about getting disk errors here. We need
-                * to purge this dquot anyway, so we go ahead regardless.
-                */
-               error = xfs_qm_dqflush(dqp, SYNC_WAIT);
-               if (error)
-                       xfs_warn(mp, "%s: dquot %p flush failed",
-                               __func__, dqp);
-               xfs_dqflock(dqp);
-       }
-       ASSERT(atomic_read(&dqp->q_pincount) == 0);
-       ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
-              !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
-
-       list_del_init(&dqp->q_hashlist);
-       qh->qh_version++;
-       list_del_init(&dqp->q_mplist);
-       mp->m_quotainfo->qi_dqreclaims++;
-       mp->m_quotainfo->qi_dquots--;
-       /*
-        * XXX Move this to the front of the freelist, if we can get the
-        * freelist lock.
-        */
-       ASSERT(!list_empty(&dqp->q_freelist));
-
-       dqp->q_mount = NULL;
-       dqp->q_hash = NULL;
-       dqp->dq_flags = XFS_DQ_INACTIVE;
-       memset(&dqp->q_core, 0, sizeof(dqp->q_core));
-       xfs_dqfunlock(dqp);
-       xfs_dqunlock(dqp);
-       mutex_unlock(&qh->qh_lock);
-       return (0);
-}
-
-
-/*
- * Give the buffer a little push if it is incore and
- * wait on the flush lock.
- */
-void
-xfs_qm_dqflock_pushbuf_wait(
-       xfs_dquot_t     *dqp)
-{
-       xfs_mount_t     *mp = dqp->q_mount;
-       xfs_buf_t       *bp;
-
-       /*
-        * Check to see if the dquot has been flushed delayed
-        * write.  If so, grab its buffer and send it
-        * out immediately.  We'll be able to acquire
-        * the flush lock when the I/O completes.
-        */
-       bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno,
-                       mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
-       if (!bp)
-               goto out_lock;
-
-       if (XFS_BUF_ISDELAYWRITE(bp)) {
-               if (XFS_BUF_ISPINNED(bp))
-                       xfs_log_force(mp, 0);
-               xfs_buf_delwri_promote(bp);
-               wake_up_process(bp->b_target->bt_task);
-       }
-       xfs_buf_relse(bp);
-out_lock:
-       xfs_dqflock(dqp);
-}
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h

deleted file mode 100644 (file)

index 34b7e94..0000000
--- a/fs/xfs/quota/xfs_dquot.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DQUOT_H__
-#define __XFS_DQUOT_H__
-
-/*
- * Dquots are structures that hold quota information about a user or a group,
- * much like inodes are for files. In fact, dquots share many characteristics
- * with inodes. However, dquots can also be a centralized resource, relative
- * to a collection of inodes. In this respect, dquots share some characteristics
- * of the superblock.
- * XFS dquots exploit both those in its algorithms. They make every attempt
- * to not be a bottleneck when quotas are on and have minimal impact, if any,
- * when quotas are off.
- */
-
-/*
- * The hash chain headers (hash buckets)
- */
-typedef struct xfs_dqhash {
-       struct list_head  qh_list;
-       struct mutex      qh_lock;
-       uint              qh_version;   /* ever increasing version */
-       uint              qh_nelems;    /* number of dquots on the list */
-} xfs_dqhash_t;
-
-struct xfs_mount;
-struct xfs_trans;
-
-/*
- * The incore dquot structure
- */
-typedef struct xfs_dquot {
-       uint             dq_flags;      /* various flags (XFS_DQ_*) */
-       struct list_head q_freelist;    /* global free list of dquots */
-       struct list_head q_mplist;      /* mount's list of dquots */
-       struct list_head q_hashlist;    /* gloabl hash list of dquots */
-       xfs_dqhash_t    *q_hash;        /* the hashchain header */
-       struct xfs_mount*q_mount;       /* filesystem this relates to */
-       struct xfs_trans*q_transp;      /* trans this belongs to currently */
-       uint             q_nrefs;       /* # active refs from inodes */
-       xfs_daddr_t      q_blkno;       /* blkno of dquot buffer */
-       int              q_bufoffset;   /* off of dq in buffer (# dquots) */
-       xfs_fileoff_t    q_fileoffset;  /* offset in quotas file */
-
-       struct xfs_dquot*q_gdquot;      /* group dquot, hint only */
-       xfs_disk_dquot_t q_core;        /* actual usage & quotas */
-       xfs_dq_logitem_t q_logitem;     /* dquot log item */
-       xfs_qcnt_t       q_res_bcount;  /* total regular nblks used+reserved */
-       xfs_qcnt_t       q_res_icount;  /* total inos allocd+reserved */
-       xfs_qcnt_t       q_res_rtbcount;/* total realtime blks used+reserved */
-       struct mutex     q_qlock;       /* quota lock */
-       struct completion q_flush;      /* flush completion queue */
-       atomic_t          q_pincount;   /* dquot pin count */
-       wait_queue_head_t q_pinwait;    /* dquot pinning wait queue */
-} xfs_dquot_t;
-
-/*
- * Lock hierarchy for q_qlock:
- *     XFS_QLOCK_NORMAL is the implicit default,
- *     XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2
- */
-enum {
-       XFS_QLOCK_NORMAL = 0,
-       XFS_QLOCK_NESTED,
-};
-
-#define XFS_DQHOLD(dqp)                ((dqp)->q_nrefs++)
-
-/*
- * Manage the q_flush completion queue embedded in the dquot.  This completion
- * queue synchronizes processes attempting to flush the in-core dquot back to
- * disk.
- */
-static inline void xfs_dqflock(xfs_dquot_t *dqp)
-{
-       wait_for_completion(&dqp->q_flush);
-}
-
-static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp)
-{
-       return try_wait_for_completion(&dqp->q_flush);
-}
-
-static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
-{
-       complete(&dqp->q_flush);
-}
-
-#define XFS_DQ_IS_LOCKED(dqp)  (mutex_is_locked(&((dqp)->q_qlock)))
-#define XFS_DQ_IS_DIRTY(dqp)   ((dqp)->dq_flags & XFS_DQ_DIRTY)
-#define XFS_QM_ISUDQ(dqp)      ((dqp)->dq_flags & XFS_DQ_USER)
-#define XFS_QM_ISPDQ(dqp)      ((dqp)->dq_flags & XFS_DQ_PROJ)
-#define XFS_QM_ISGDQ(dqp)      ((dqp)->dq_flags & XFS_DQ_GROUP)
-#define XFS_DQ_TO_QINF(dqp)    ((dqp)->q_mount->m_quotainfo)
-#define XFS_DQ_TO_QIP(dqp)     (XFS_QM_ISUDQ(dqp) ? \
-                                XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \
-                                XFS_DQ_TO_QINF(dqp)->qi_gquotaip)
-
-#define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \
-                                    (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
-                                    (XFS_IS_OQUOTA_ON((d)->q_mount))))
-
-extern void            xfs_qm_dqdestroy(xfs_dquot_t *);
-extern int             xfs_qm_dqflush(xfs_dquot_t *, uint);
-extern int             xfs_qm_dqpurge(xfs_dquot_t *);
-extern void            xfs_qm_dqunpin_wait(xfs_dquot_t *);
-extern int             xfs_qm_dqlock_nowait(xfs_dquot_t *);
-extern void            xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);
-extern void            xfs_qm_adjust_dqtimers(xfs_mount_t *,
-                                       xfs_disk_dquot_t *);
-extern void            xfs_qm_adjust_dqlimits(xfs_mount_t *,
-                                       xfs_disk_dquot_t *);
-extern int             xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
-                                       xfs_dqid_t, uint, uint, xfs_dquot_t **);
-extern void            xfs_qm_dqput(xfs_dquot_t *);
-extern void            xfs_dqlock(xfs_dquot_t *);
-extern void            xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *);
-extern void            xfs_dqunlock(xfs_dquot_t *);
-extern void            xfs_dqunlock_nonotify(xfs_dquot_t *);
-
-#endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c

deleted file mode 100644 (file)

index 9e0e2fa..0000000
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ /dev/null
@@ -1,529 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_priv.h"
-#include "xfs_qm.h"
-
-static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip)
-{
-       return container_of(lip, struct xfs_dq_logitem, qli_item);
-}
-
-/*
- * returns the number of iovecs needed to log the given dquot item.
- */
-STATIC uint
-xfs_qm_dquot_logitem_size(
-       struct xfs_log_item     *lip)
-{
-       /*
-        * we need only two iovecs, one for the format, one for the real thing
-        */
-       return 2;
-}
-
-/*
- * fills in the vector of log iovecs for the given dquot log item.
- */
-STATIC void
-xfs_qm_dquot_logitem_format(
-       struct xfs_log_item     *lip,
-       struct xfs_log_iovec    *logvec)
-{
-       struct xfs_dq_logitem   *qlip = DQUOT_ITEM(lip);
-
-       logvec->i_addr = &qlip->qli_format;
-       logvec->i_len  = sizeof(xfs_dq_logformat_t);
-       logvec->i_type = XLOG_REG_TYPE_QFORMAT;
-       logvec++;
-       logvec->i_addr = &qlip->qli_dquot->q_core;
-       logvec->i_len  = sizeof(xfs_disk_dquot_t);
-       logvec->i_type = XLOG_REG_TYPE_DQUOT;
-
-       ASSERT(2 == lip->li_desc->lid_size);
-       qlip->qli_format.qlf_size = 2;
-
-}
-
-/*
- * Increment the pin count of the given dquot.
- */
-STATIC void
-xfs_qm_dquot_logitem_pin(
-       struct xfs_log_item     *lip)
-{
-       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
-
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-       atomic_inc(&dqp->q_pincount);
-}
-
-/*
- * Decrement the pin count of the given dquot, and wake up
- * anyone in xfs_dqwait_unpin() if the count goes to 0.         The
- * dquot must have been previously pinned with a call to
- * xfs_qm_dquot_logitem_pin().
- */
-STATIC void
-xfs_qm_dquot_logitem_unpin(
-       struct xfs_log_item     *lip,
-       int                     remove)
-{
-       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
-
-       ASSERT(atomic_read(&dqp->q_pincount) > 0);
-       if (atomic_dec_and_test(&dqp->q_pincount))
-               wake_up(&dqp->q_pinwait);
-}
-
-/*
- * Given the logitem, this writes the corresponding dquot entry to disk
- * asynchronously. This is called with the dquot entry securely locked;
- * we simply get xfs_qm_dqflush() to do the work, and unlock the dquot
- * at the end.
- */
-STATIC void
-xfs_qm_dquot_logitem_push(
-       struct xfs_log_item     *lip)
-{
-       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
-       int                     error;
-
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-       ASSERT(!completion_done(&dqp->q_flush));
-
-       /*
-        * Since we were able to lock the dquot's flush lock and
-        * we found it on the AIL, the dquot must be dirty.  This
-        * is because the dquot is removed from the AIL while still
-        * holding the flush lock in xfs_dqflush_done().  Thus, if
-        * we found it in the AIL and were able to obtain the flush
-        * lock without sleeping, then there must not have been
-        * anyone in the process of flushing the dquot.
-        */
-       error = xfs_qm_dqflush(dqp, 0);
-       if (error)
-               xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
-                       __func__, error, dqp);
-       xfs_dqunlock(dqp);
-}
-
-STATIC xfs_lsn_t
-xfs_qm_dquot_logitem_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-       /*
-        * We always re-log the entire dquot when it becomes dirty,
-        * so, the latest copy _is_ the only one that matters.
-        */
-       return lsn;
-}
-
-/*
- * This is called to wait for the given dquot to be unpinned.
- * Most of these pin/unpin routines are plagiarized from inode code.
- */
-void
-xfs_qm_dqunpin_wait(
-       struct xfs_dquot        *dqp)
-{
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-       if (atomic_read(&dqp->q_pincount) == 0)
-               return;
-
-       /*
-        * Give the log a push so we don't wait here too long.
-        */
-       xfs_log_force(dqp->q_mount, 0);
-       wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
-}
-
-/*
- * This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that
- * the dquot is locked by us, but the flush lock isn't. So, here we are
- * going to see if the relevant dquot buffer is incore, waiting on DELWRI.
- * If so, we want to push it out to help us take this item off the AIL as soon
- * as possible.
- *
- * We must not be holding the AIL lock at this point. Calling incore() to
- * search the buffer cache can be a time consuming thing, and AIL lock is a
- * spinlock.
- */
-STATIC void
-xfs_qm_dquot_logitem_pushbuf(
-       struct xfs_log_item     *lip)
-{
-       struct xfs_dq_logitem   *qlip = DQUOT_ITEM(lip);
-       struct xfs_dquot        *dqp = qlip->qli_dquot;
-       struct xfs_buf          *bp;
-
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
-       /*
-        * If flushlock isn't locked anymore, chances are that the
-        * inode flush completed and the inode was taken off the AIL.
-        * So, just get out.
-        */
-       if (completion_done(&dqp->q_flush) ||
-           !(lip->li_flags & XFS_LI_IN_AIL)) {
-               xfs_dqunlock(dqp);
-               return;
-       }
-
-       bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno,
-                       dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
-       xfs_dqunlock(dqp);
-       if (!bp)
-               return;
-       if (XFS_BUF_ISDELAYWRITE(bp))
-               xfs_buf_delwri_promote(bp);
-       xfs_buf_relse(bp);
-}
-
-/*
- * This is called to attempt to lock the dquot associated with this
- * dquot log item.  Don't sleep on the dquot lock or the flush lock.
- * If the flush lock is already held, indicating that the dquot has
- * been or is in the process of being flushed, then see if we can
- * find the dquot's buffer in the buffer cache without sleeping.  If
- * we can and it is marked delayed write, then we want to send it out.
- * We delay doing so until the push routine, though, to avoid sleeping
- * in any device strategy routines.
- */
-STATIC uint
-xfs_qm_dquot_logitem_trylock(
-       struct xfs_log_item     *lip)
-{
-       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
-
-       if (atomic_read(&dqp->q_pincount) > 0)
-               return XFS_ITEM_PINNED;
-
-       if (!xfs_qm_dqlock_nowait(dqp))
-               return XFS_ITEM_LOCKED;
-
-       if (!xfs_dqflock_nowait(dqp)) {
-               /*
-                * dquot has already been flushed to the backing buffer,
-                * leave it locked, pushbuf routine will unlock it.
-                */
-               return XFS_ITEM_PUSHBUF;
-       }
-
-       ASSERT(lip->li_flags & XFS_LI_IN_AIL);
-       return XFS_ITEM_SUCCESS;
-}
-
-/*
- * Unlock the dquot associated with the log item.
- * Clear the fields of the dquot and dquot log item that
- * are specific to the current transaction.  If the
- * hold flags is set, do not unlock the dquot.
- */
-STATIC void
-xfs_qm_dquot_logitem_unlock(
-       struct xfs_log_item     *lip)
-{
-       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
-
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
-       /*
-        * Clear the transaction pointer in the dquot
-        */
-       dqp->q_transp = NULL;
-
-       /*
-        * dquots are never 'held' from getting unlocked at the end of
-        * a transaction.  Their locking and unlocking is hidden inside the
-        * transaction layer, within trans_commit. Hence, no LI_HOLD flag
-        * for the logitem.
-        */
-       xfs_dqunlock(dqp);
-}
-
-/*
- * this needs to stamp an lsn into the dquot, I think.
- * rpc's that look at user dquot's would then have to
- * push on the dependency recorded in the dquot
- */
-STATIC void
-xfs_qm_dquot_logitem_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-}
-
-/*
- * This is the ops vector for dquots
- */
-static struct xfs_item_ops xfs_dquot_item_ops = {
-       .iop_size       = xfs_qm_dquot_logitem_size,
-       .iop_format     = xfs_qm_dquot_logitem_format,
-       .iop_pin        = xfs_qm_dquot_logitem_pin,
-       .iop_unpin      = xfs_qm_dquot_logitem_unpin,
-       .iop_trylock    = xfs_qm_dquot_logitem_trylock,
-       .iop_unlock     = xfs_qm_dquot_logitem_unlock,
-       .iop_committed  = xfs_qm_dquot_logitem_committed,
-       .iop_push       = xfs_qm_dquot_logitem_push,
-       .iop_pushbuf    = xfs_qm_dquot_logitem_pushbuf,
-       .iop_committing = xfs_qm_dquot_logitem_committing
-};
-
-/*
- * Initialize the dquot log item for a newly allocated dquot.
- * The dquot isn't locked at this point, but it isn't on any of the lists
- * either, so we don't care.
- */
-void
-xfs_qm_dquot_logitem_init(
-       struct xfs_dquot        *dqp)
-{
-       struct xfs_dq_logitem   *lp = &dqp->q_logitem;
-
-       xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT,
-                                       &xfs_dquot_item_ops);
-       lp->qli_dquot = dqp;
-       lp->qli_format.qlf_type = XFS_LI_DQUOT;
-       lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id);
-       lp->qli_format.qlf_blkno = dqp->q_blkno;
-       lp->qli_format.qlf_len = 1;
-       /*
-        * This is just the offset of this dquot within its buffer
-        * (which is currently 1 FSB and probably won't change).
-        * Hence 32 bits for this offset should be just fine.
-        * Alternatively, we can store (bufoffset / sizeof(xfs_dqblk_t))
-        * here, and recompute it at recovery time.
-        */
-       lp->qli_format.qlf_boffset = (__uint32_t)dqp->q_bufoffset;
-}
-
-/*------------------  QUOTAOFF LOG ITEMS  -------------------*/
-
-static inline struct xfs_qoff_logitem *QOFF_ITEM(struct xfs_log_item *lip)
-{
-       return container_of(lip, struct xfs_qoff_logitem, qql_item);
-}
-
-
-/*
- * This returns the number of iovecs needed to log the given quotaoff item.
- * We only need 1 iovec for an quotaoff item.  It just logs the
- * quotaoff_log_format structure.
- */
-STATIC uint
-xfs_qm_qoff_logitem_size(
-       struct xfs_log_item     *lip)
-{
-       return 1;
-}
-
-/*
- * This is called to fill in the vector of log iovecs for the
- * given quotaoff log item. We use only 1 iovec, and we point that
- * at the quotaoff_log_format structure embedded in the quotaoff item.
- * It is at this point that we assert that all of the extent
- * slots in the quotaoff item have been filled.
- */
-STATIC void
-xfs_qm_qoff_logitem_format(
-       struct xfs_log_item     *lip,
-       struct xfs_log_iovec    *log_vector)
-{
-       struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip);
-
-       ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF);
-
-       log_vector->i_addr = &qflip->qql_format;
-       log_vector->i_len = sizeof(xfs_qoff_logitem_t);
-       log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF;
-       qflip->qql_format.qf_size = 1;
-}
-
-/*
- * Pinning has no meaning for an quotaoff item, so just return.
- */
-STATIC void
-xfs_qm_qoff_logitem_pin(
-       struct xfs_log_item     *lip)
-{
-}
-
-/*
- * Since pinning has no meaning for an quotaoff item, unpinning does
- * not either.
- */
-STATIC void
-xfs_qm_qoff_logitem_unpin(
-       struct xfs_log_item     *lip,
-       int                     remove)
-{
-}
-
-/*
- * Quotaoff items have no locking, so just return success.
- */
-STATIC uint
-xfs_qm_qoff_logitem_trylock(
-       struct xfs_log_item     *lip)
-{
-       return XFS_ITEM_LOCKED;
-}
-
-/*
- * Quotaoff items have no locking or pushing, so return failure
- * so that the caller doesn't bother with us.
- */
-STATIC void
-xfs_qm_qoff_logitem_unlock(
-       struct xfs_log_item     *lip)
-{
-}
-
-/*
- * The quotaoff-start-item is logged only once and cannot be moved in the log,
- * so simply return the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_qm_qoff_logitem_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-       return lsn;
-}
-
-/*
- * There isn't much you can do to push on an quotaoff item.  It is simply
- * stuck waiting for the log to be flushed to disk.
- */
-STATIC void
-xfs_qm_qoff_logitem_push(
-       struct xfs_log_item     *lip)
-{
-}
-
-
-STATIC xfs_lsn_t
-xfs_qm_qoffend_logitem_committed(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               lsn)
-{
-       struct xfs_qoff_logitem *qfe = QOFF_ITEM(lip);
-       struct xfs_qoff_logitem *qfs = qfe->qql_start_lip;
-       struct xfs_ail          *ailp = qfs->qql_item.li_ailp;
-
-       /*
-        * Delete the qoff-start logitem from the AIL.
-        * xfs_trans_ail_delete() drops the AIL lock.
-        */
-       spin_lock(&ailp->xa_lock);
-       xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs);
-
-       kmem_free(qfs);
-       kmem_free(qfe);
-       return (xfs_lsn_t)-1;
-}
-
-/*
- * XXX rcc - don't know quite what to do with this.  I think we can
- * just ignore it.  The only time that isn't the case is if we allow
- * the client to somehow see that quotas have been turned off in which
- * we can't allow that to get back until the quotaoff hits the disk.
- * So how would that happen?  Also, do we need different routines for
- * quotaoff start and quotaoff end?  I suspect the answer is yes but
- * to be sure, I need to look at the recovery code and see how quota off
- * recovery is handled (do we roll forward or back or do something else).
- * If we roll forwards or backwards, then we need two separate routines,
- * one that does nothing and one that stamps in the lsn that matters
- * (truly makes the quotaoff irrevocable).  If we do something else,
- * then maybe we don't need two.
- */
-STATIC void
-xfs_qm_qoff_logitem_committing(
-       struct xfs_log_item     *lip,
-       xfs_lsn_t               commit_lsn)
-{
-}
-
-static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
-       .iop_size       = xfs_qm_qoff_logitem_size,
-       .iop_format     = xfs_qm_qoff_logitem_format,
-       .iop_pin        = xfs_qm_qoff_logitem_pin,
-       .iop_unpin      = xfs_qm_qoff_logitem_unpin,
-       .iop_trylock    = xfs_qm_qoff_logitem_trylock,
-       .iop_unlock     = xfs_qm_qoff_logitem_unlock,
-       .iop_committed  = xfs_qm_qoffend_logitem_committed,
-       .iop_push       = xfs_qm_qoff_logitem_push,
-       .iop_committing = xfs_qm_qoff_logitem_committing
-};
-
-/*
- * This is the ops vector shared by all quotaoff-start log items.
- */
-static struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
-       .iop_size       = xfs_qm_qoff_logitem_size,
-       .iop_format     = xfs_qm_qoff_logitem_format,
-       .iop_pin        = xfs_qm_qoff_logitem_pin,
-       .iop_unpin      = xfs_qm_qoff_logitem_unpin,
-       .iop_trylock    = xfs_qm_qoff_logitem_trylock,
-       .iop_unlock     = xfs_qm_qoff_logitem_unlock,
-       .iop_committed  = xfs_qm_qoff_logitem_committed,
-       .iop_push       = xfs_qm_qoff_logitem_push,
-       .iop_committing = xfs_qm_qoff_logitem_committing
-};
-
-/*
- * Allocate and initialize an quotaoff item of the correct quota type(s).
- */
-struct xfs_qoff_logitem *
-xfs_qm_qoff_logitem_init(
-       struct xfs_mount        *mp,
-       struct xfs_qoff_logitem *start,
-       uint                    flags)
-{
-       struct xfs_qoff_logitem *qf;
-
-       qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), KM_SLEEP);
-
-       xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ?
-                       &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops);
-       qf->qql_item.li_mountp = mp;
-       qf->qql_format.qf_type = XFS_LI_QUOTAOFF;
-       qf->qql_format.qf_flags = flags;
-       qf->qql_start_lip = start;
-       return qf;
-}
diff --git a/fs/xfs/quota/xfs_dquot_item.h b/fs/xfs/quota/xfs_dquot_item.h

deleted file mode 100644 (file)

index 5acae2a..0000000
--- a/fs/xfs/quota/xfs_dquot_item.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DQUOT_ITEM_H__
-#define __XFS_DQUOT_ITEM_H__
-
-struct xfs_dquot;
-struct xfs_trans;
-struct xfs_mount;
-struct xfs_qoff_logitem;
-
-typedef struct xfs_dq_logitem {
-       xfs_log_item_t           qli_item;         /* common portion */
-       struct xfs_dquot        *qli_dquot;        /* dquot ptr */
-       xfs_lsn_t                qli_flush_lsn;    /* lsn at last flush */
-       xfs_dq_logformat_t       qli_format;       /* logged structure */
-} xfs_dq_logitem_t;
-
-typedef struct xfs_qoff_logitem {
-       xfs_log_item_t           qql_item;      /* common portion */
-       struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
-       xfs_qoff_logformat_t     qql_format;    /* logged structure */
-} xfs_qoff_logitem_t;
-
-
-extern void               xfs_qm_dquot_logitem_init(struct xfs_dquot *);
-extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *,
-                                       struct xfs_qoff_logitem *, uint);
-extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *,
-                                       struct xfs_qoff_logitem *, uint);
-extern void               xfs_trans_log_quotaoff_item(struct xfs_trans *,
-                                       struct xfs_qoff_logitem *);
-
-#endif /* __XFS_DQUOT_ITEM_H__ */
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c

deleted file mode 100644 (file)

index 46e54ad..0000000
--- a/fs/xfs/quota/xfs_qm.c
+++ /dev/null
@@ -1,2416 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_ialloc.h"
-#include "xfs_itable.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_bmap.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_space.h"
-#include "xfs_utils.h"
-#include "xfs_qm.h"
-#include "xfs_trace.h"
-
-/*
- * The global quota manager. There is only one of these for the entire
- * system, _not_ one per file system. XQM keeps track of the overall
- * quota functionality, including maintaining the freelist and hash
- * tables of dquots.
- */
-struct mutex   xfs_Gqm_lock;
-struct xfs_qm  *xfs_Gqm;
-uint           ndquot;
-
-kmem_zone_t    *qm_dqzone;
-kmem_zone_t    *qm_dqtrxzone;
-
-STATIC void    xfs_qm_list_init(xfs_dqlist_t *, char *, int);
-STATIC void    xfs_qm_list_destroy(xfs_dqlist_t *);
-
-STATIC int     xfs_qm_init_quotainos(xfs_mount_t *);
-STATIC int     xfs_qm_init_quotainfo(xfs_mount_t *);
-STATIC int     xfs_qm_shake(struct shrinker *, struct shrink_control *);
-
-static struct shrinker xfs_qm_shaker = {
-       .shrink = xfs_qm_shake,
-       .seeks = DEFAULT_SEEKS,
-};
-
-/*
- * Initialize the XQM structure.
- * Note that there is not one quota manager per file system.
- */
-STATIC struct xfs_qm *
-xfs_Gqm_init(void)
-{
-       xfs_dqhash_t    *udqhash, *gdqhash;
-       xfs_qm_t        *xqm;
-       size_t          hsize;
-       uint            i;
-
-       /*
-        * Initialize the dquot hash tables.
-        */
-       udqhash = kmem_zalloc_greedy(&hsize,
-                                    XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
-                                    XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t));
-       if (!udqhash)
-               goto out;
-
-       gdqhash = kmem_zalloc_large(hsize);
-       if (!gdqhash)
-               goto out_free_udqhash;
-
-       hsize /= sizeof(xfs_dqhash_t);
-       ndquot = hsize << 8;
-
-       xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
-       xqm->qm_dqhashmask = hsize - 1;
-       xqm->qm_usr_dqhtable = udqhash;
-       xqm->qm_grp_dqhtable = gdqhash;
-       ASSERT(xqm->qm_usr_dqhtable != NULL);
-       ASSERT(xqm->qm_grp_dqhtable != NULL);
-
-       for (i = 0; i < hsize; i++) {
-               xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
-               xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
-       }
-
-       /*
-        * Freelist of all dquots of all file systems
-        */
-       INIT_LIST_HEAD(&xqm->qm_dqfrlist);
-       xqm->qm_dqfrlist_cnt = 0;
-       mutex_init(&xqm->qm_dqfrlist_lock);
-
-       /*
-        * dquot zone. we register our own low-memory callback.
-        */
-       if (!qm_dqzone) {
-               xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
-                                               "xfs_dquots");
-               qm_dqzone = xqm->qm_dqzone;
-       } else
-               xqm->qm_dqzone = qm_dqzone;
-
-       register_shrinker(&xfs_qm_shaker);
-
-       /*
-        * The t_dqinfo portion of transactions.
-        */
-       if (!qm_dqtrxzone) {
-               xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
-                                                  "xfs_dqtrx");
-               qm_dqtrxzone = xqm->qm_dqtrxzone;
-       } else
-               xqm->qm_dqtrxzone = qm_dqtrxzone;
-
-       atomic_set(&xqm->qm_totaldquots, 0);
-       xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
-       xqm->qm_nrefs = 0;
-       return xqm;
-
- out_free_udqhash:
-       kmem_free_large(udqhash);
- out:
-       return NULL;
-}
-
-/*
- * Destroy the global quota manager when its reference count goes to zero.
- */
-STATIC void
-xfs_qm_destroy(
-       struct xfs_qm   *xqm)
-{
-       struct xfs_dquot *dqp, *n;
-       int             hsize, i;
-
-       ASSERT(xqm != NULL);
-       ASSERT(xqm->qm_nrefs == 0);
-       unregister_shrinker(&xfs_qm_shaker);
-       hsize = xqm->qm_dqhashmask + 1;
-       for (i = 0; i < hsize; i++) {
-               xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
-               xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
-       }
-       kmem_free_large(xqm->qm_usr_dqhtable);
-       kmem_free_large(xqm->qm_grp_dqhtable);
-       xqm->qm_usr_dqhtable = NULL;
-       xqm->qm_grp_dqhtable = NULL;
-       xqm->qm_dqhashmask = 0;
-
-       /* frlist cleanup */
-       mutex_lock(&xqm->qm_dqfrlist_lock);
-       list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
-               xfs_dqlock(dqp);
-               list_del_init(&dqp->q_freelist);
-               xfs_Gqm->qm_dqfrlist_cnt--;
-               xfs_dqunlock(dqp);
-               xfs_qm_dqdestroy(dqp);
-       }
-       mutex_unlock(&xqm->qm_dqfrlist_lock);
-       mutex_destroy(&xqm->qm_dqfrlist_lock);
-       kmem_free(xqm);
-}
-
-/*
- * Called at mount time to let XQM know that another file system is
- * starting quotas. This isn't crucial information as the individual mount
- * structures are pretty independent, but it helps the XQM keep a
- * global view of what's going on.
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_hold_quotafs_ref(
-       struct xfs_mount *mp)
-{
-       /*
-        * Need to lock the xfs_Gqm structure for things like this. For example,
-        * the structure could disappear between the entry to this routine and
-        * a HOLD operation if not locked.
-        */
-       mutex_lock(&xfs_Gqm_lock);
-
-       if (!xfs_Gqm) {
-               xfs_Gqm = xfs_Gqm_init();
-               if (!xfs_Gqm) {
-                       mutex_unlock(&xfs_Gqm_lock);
-                       return ENOMEM;
-               }
-       }
-
-       /*
-        * We can keep a list of all filesystems with quotas mounted for
-        * debugging and statistical purposes, but ...
-        * Just take a reference and get out.
-        */
-       xfs_Gqm->qm_nrefs++;
-       mutex_unlock(&xfs_Gqm_lock);
-
-       return 0;
-}
-
-
-/*
- * Release the reference that a filesystem took at mount time,
- * so that we know when we need to destroy the entire quota manager.
- */
-/* ARGSUSED */
-STATIC void
-xfs_qm_rele_quotafs_ref(
-       struct xfs_mount *mp)
-{
-       xfs_dquot_t     *dqp, *n;
-
-       ASSERT(xfs_Gqm);
-       ASSERT(xfs_Gqm->qm_nrefs > 0);
-
-       /*
-        * Go thru the freelist and destroy all inactive dquots.
-        */
-       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-
-       list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) {
-               xfs_dqlock(dqp);
-               if (dqp->dq_flags & XFS_DQ_INACTIVE) {
-                       ASSERT(dqp->q_mount == NULL);
-                       ASSERT(! XFS_DQ_IS_DIRTY(dqp));
-                       ASSERT(list_empty(&dqp->q_hashlist));
-                       ASSERT(list_empty(&dqp->q_mplist));
-                       list_del_init(&dqp->q_freelist);
-                       xfs_Gqm->qm_dqfrlist_cnt--;
-                       xfs_dqunlock(dqp);
-                       xfs_qm_dqdestroy(dqp);
-               } else {
-                       xfs_dqunlock(dqp);
-               }
-       }
-       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-
-       /*
-        * Destroy the entire XQM. If somebody mounts with quotaon, this'll
-        * be restarted.
-        */
-       mutex_lock(&xfs_Gqm_lock);
-       if (--xfs_Gqm->qm_nrefs == 0) {
-               xfs_qm_destroy(xfs_Gqm);
-               xfs_Gqm = NULL;
-       }
-       mutex_unlock(&xfs_Gqm_lock);
-}
-
-/*
- * Just destroy the quotainfo structure.
- */
-void
-xfs_qm_unmount(
-       struct xfs_mount        *mp)
-{
-       if (mp->m_quotainfo) {
-               xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
-               xfs_qm_destroy_quotainfo(mp);
-       }
-}
-
-
-/*
- * This is called from xfs_mountfs to start quotas and initialize all
- * necessary data structures like quotainfo.  This is also responsible for
- * running a quotacheck as necessary.  We are guaranteed that the superblock
- * is consistently read in at this point.
- *
- * If we fail here, the mount will continue with quota turned off. We don't
- * need to inidicate success or failure at all.
- */
-void
-xfs_qm_mount_quotas(
-       xfs_mount_t     *mp)
-{
-       int             error = 0;
-       uint            sbf;
-
-       /*
-        * If quotas on realtime volumes is not supported, we disable
-        * quotas immediately.
-        */
-       if (mp->m_sb.sb_rextents) {
-               xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
-               mp->m_qflags = 0;
-               goto write_changes;
-       }
-
-       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-       /*
-        * Allocate the quotainfo structure inside the mount struct, and
-        * create quotainode(s), and change/rev superblock if necessary.
-        */
-       error = xfs_qm_init_quotainfo(mp);
-       if (error) {
-               /*
-                * We must turn off quotas.
-                */
-               ASSERT(mp->m_quotainfo == NULL);
-               mp->m_qflags = 0;
-               goto write_changes;
-       }
-       /*
-        * If any of the quotas are not consistent, do a quotacheck.
-        */
-       if (XFS_QM_NEED_QUOTACHECK(mp)) {
-               error = xfs_qm_quotacheck(mp);
-               if (error) {
-                       /* Quotacheck failed and disabled quotas. */
-                       return;
-               }
-       }
-       /* 
-        * If one type of quotas is off, then it will lose its
-        * quotachecked status, since we won't be doing accounting for
-        * that type anymore.
-        */
-       if (!XFS_IS_UQUOTA_ON(mp))
-               mp->m_qflags &= ~XFS_UQUOTA_CHKD;
-       if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp)))
-               mp->m_qflags &= ~XFS_OQUOTA_CHKD;
-
- write_changes:
-       /*
-        * We actually don't have to acquire the m_sb_lock at all.
-        * This can only be called from mount, and that's single threaded. XXX
-        */
-       spin_lock(&mp->m_sb_lock);
-       sbf = mp->m_sb.sb_qflags;
-       mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
-       spin_unlock(&mp->m_sb_lock);
-
-       if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
-               if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
-                       /*
-                        * We could only have been turning quotas off.
-                        * We aren't in very good shape actually because
-                        * the incore structures are convinced that quotas are
-                        * off, but the on disk superblock doesn't know that !
-                        */
-                       ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
-                       xfs_alert(mp, "%s: Superblock update failed!",
-                               __func__);
-               }
-       }
-
-       if (error) {
-               xfs_warn(mp, "Failed to initialize disk quotas.");
-               return;
-       }
-}
-
-/*
- * Called from the vfsops layer.
- */
-void
-xfs_qm_unmount_quotas(
-       xfs_mount_t     *mp)
-{
-       /*
-        * Release the dquots that root inode, et al might be holding,
-        * before we flush quotas and blow away the quotainfo structure.
-        */
-       ASSERT(mp->m_rootip);
-       xfs_qm_dqdetach(mp->m_rootip);
-       if (mp->m_rbmip)
-               xfs_qm_dqdetach(mp->m_rbmip);
-       if (mp->m_rsumip)
-               xfs_qm_dqdetach(mp->m_rsumip);
-
-       /*
-        * Release the quota inodes.
-        */
-       if (mp->m_quotainfo) {
-               if (mp->m_quotainfo->qi_uquotaip) {
-                       IRELE(mp->m_quotainfo->qi_uquotaip);
-                       mp->m_quotainfo->qi_uquotaip = NULL;
-               }
-               if (mp->m_quotainfo->qi_gquotaip) {
-                       IRELE(mp->m_quotainfo->qi_gquotaip);
-                       mp->m_quotainfo->qi_gquotaip = NULL;
-               }
-       }
-}
-
-/*
- * Flush all dquots of the given file system to disk. The dquots are
- * _not_ purged from memory here, just their data written to disk.
- */
-STATIC int
-xfs_qm_dqflush_all(
-       struct xfs_mount        *mp,
-       int                     sync_mode)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       int                     recl;
-       struct xfs_dquot        *dqp;
-       int                     error;
-
-       if (!q)
-               return 0;
-again:
-       mutex_lock(&q->qi_dqlist_lock);
-       list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
-               xfs_dqlock(dqp);
-               if (! XFS_DQ_IS_DIRTY(dqp)) {
-                       xfs_dqunlock(dqp);
-                       continue;
-               }
-
-               /* XXX a sentinel would be better */
-               recl = q->qi_dqreclaims;
-               if (!xfs_dqflock_nowait(dqp)) {
-                       /*
-                        * If we can't grab the flush lock then check
-                        * to see if the dquot has been flushed delayed
-                        * write.  If so, grab its buffer and send it
-                        * out immediately.  We'll be able to acquire
-                        * the flush lock when the I/O completes.
-                        */
-                       xfs_qm_dqflock_pushbuf_wait(dqp);
-               }
-               /*
-                * Let go of the mplist lock. We don't want to hold it
-                * across a disk write.
-                */
-               mutex_unlock(&q->qi_dqlist_lock);
-               error = xfs_qm_dqflush(dqp, sync_mode);
-               xfs_dqunlock(dqp);
-               if (error)
-                       return error;
-
-               mutex_lock(&q->qi_dqlist_lock);
-               if (recl != q->qi_dqreclaims) {
-                       mutex_unlock(&q->qi_dqlist_lock);
-                       /* XXX restart limit */
-                       goto again;
-               }
-       }
-
-       mutex_unlock(&q->qi_dqlist_lock);
-       /* return ! busy */
-       return 0;
-}
-/*
- * Release the group dquot pointers the user dquots may be
- * carrying around as a hint. mplist is locked on entry and exit.
- */
-STATIC void
-xfs_qm_detach_gdquots(
-       struct xfs_mount        *mp)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       struct xfs_dquot        *dqp, *gdqp;
-       int                     nrecl;
-
- again:
-       ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
-       list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
-               xfs_dqlock(dqp);
-               if ((gdqp = dqp->q_gdquot)) {
-                       xfs_dqlock(gdqp);
-                       dqp->q_gdquot = NULL;
-               }
-               xfs_dqunlock(dqp);
-
-               if (gdqp) {
-                       /*
-                        * Can't hold the mplist lock across a dqput.
-                        * XXXmust convert to marker based iterations here.
-                        */
-                       nrecl = q->qi_dqreclaims;
-                       mutex_unlock(&q->qi_dqlist_lock);
-                       xfs_qm_dqput(gdqp);
-
-                       mutex_lock(&q->qi_dqlist_lock);
-                       if (nrecl != q->qi_dqreclaims)
-                               goto again;
-               }
-       }
-}
-
-/*
- * Go through all the incore dquots of this file system and take them
- * off the mplist and hashlist, if the dquot type matches the dqtype
- * parameter. This is used when turning off quota accounting for
- * users and/or groups, as well as when the filesystem is unmounting.
- */
-STATIC int
-xfs_qm_dqpurge_int(
-       struct xfs_mount        *mp,
-       uint                    flags)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       struct xfs_dquot        *dqp, *n;
-       uint                    dqtype;
-       int                     nrecl;
-       int                     nmisses;
-
-       if (!q)
-               return 0;
-
-       dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
-       dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
-       dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
-
-       mutex_lock(&q->qi_dqlist_lock);
-
-       /*
-        * In the first pass through all incore dquots of this filesystem,
-        * we release the group dquot pointers the user dquots may be
-        * carrying around as a hint. We need to do this irrespective of
-        * what's being turned off.
-        */
-       xfs_qm_detach_gdquots(mp);
-
-      again:
-       nmisses = 0;
-       ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
-       /*
-        * Try to get rid of all of the unwanted dquots. The idea is to
-        * get them off mplist and hashlist, but leave them on freelist.
-        */
-       list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
-               /*
-                * It's OK to look at the type without taking dqlock here.
-                * We're holding the mplist lock here, and that's needed for
-                * a dqreclaim.
-                */
-               if ((dqp->dq_flags & dqtype) == 0)
-                       continue;
-
-               if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
-                       nrecl = q->qi_dqreclaims;
-                       mutex_unlock(&q->qi_dqlist_lock);
-                       mutex_lock(&dqp->q_hash->qh_lock);
-                       mutex_lock(&q->qi_dqlist_lock);
-
-                       /*
-                        * XXXTheoretically, we can get into a very long
-                        * ping pong game here.
-                        * No one can be adding dquots to the mplist at
-                        * this point, but somebody might be taking things off.
-                        */
-                       if (nrecl != q->qi_dqreclaims) {
-                               mutex_unlock(&dqp->q_hash->qh_lock);
-                               goto again;
-                       }
-               }
-
-               /*
-                * Take the dquot off the mplist and hashlist. It may remain on
-                * freelist in INACTIVE state.
-                */
-               nmisses += xfs_qm_dqpurge(dqp);
-       }
-       mutex_unlock(&q->qi_dqlist_lock);
-       return nmisses;
-}
-
-int
-xfs_qm_dqpurge_all(
-       xfs_mount_t     *mp,
-       uint            flags)
-{
-       int             ndquots;
-
-       /*
-        * Purge the dquot cache.
-        * None of the dquots should really be busy at this point.
-        */
-       if (mp->m_quotainfo) {
-               while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
-                       delay(ndquots * 10);
-               }
-       }
-       return 0;
-}
-
-STATIC int
-xfs_qm_dqattach_one(
-       xfs_inode_t     *ip,
-       xfs_dqid_t      id,
-       uint            type,
-       uint            doalloc,
-       xfs_dquot_t     *udqhint, /* hint */
-       xfs_dquot_t     **IO_idqpp)
-{
-       xfs_dquot_t     *dqp;
-       int             error;
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       error = 0;
-
-       /*
-        * See if we already have it in the inode itself. IO_idqpp is
-        * &i_udquot or &i_gdquot. This made the code look weird, but
-        * made the logic a lot simpler.
-        */
-       dqp = *IO_idqpp;
-       if (dqp) {
-               trace_xfs_dqattach_found(dqp);
-               return 0;
-       }
-
-       /*
-        * udqhint is the i_udquot field in inode, and is non-NULL only
-        * when the type arg is group/project. Its purpose is to save a
-        * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
-        * the user dquot.
-        */
-       if (udqhint) {
-               ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
-               xfs_dqlock(udqhint);
-
-               /*
-                * No need to take dqlock to look at the id.
-                *
-                * The ID can't change until it gets reclaimed, and it won't
-                * be reclaimed as long as we have a ref from inode and we
-                * hold the ilock.
-                */
-               dqp = udqhint->q_gdquot;
-               if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
-                       xfs_dqlock(dqp);
-                       XFS_DQHOLD(dqp);
-                       ASSERT(*IO_idqpp == NULL);
-                       *IO_idqpp = dqp;
-
-                       xfs_dqunlock(dqp);
-                       xfs_dqunlock(udqhint);
-                       return 0;
-               }
-
-               /*
-                * We can't hold a dquot lock when we call the dqget code.
-                * We'll deadlock in no time, because of (not conforming to)
-                * lock ordering - the inodelock comes before any dquot lock,
-                * and we may drop and reacquire the ilock in xfs_qm_dqget().
-                */
-               xfs_dqunlock(udqhint);
-       }
-
-       /*
-        * Find the dquot from somewhere. This bumps the
-        * reference count of dquot and returns it locked.
-        * This can return ENOENT if dquot didn't exist on
-        * disk and we didn't ask it to allocate;
-        * ESRCH if quotas got turned off suddenly.
-        */
-       error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp);
-       if (error)
-               return error;
-
-       trace_xfs_dqattach_get(dqp);
-
-       /*
-        * dqget may have dropped and re-acquired the ilock, but it guarantees
-        * that the dquot returned is the one that should go in the inode.
-        */
-       *IO_idqpp = dqp;
-       xfs_dqunlock(dqp);
-       return 0;
-}
-
-
-/*
- * Given a udquot and gdquot, attach a ptr to the group dquot in the
- * udquot as a hint for future lookups. The idea sounds simple, but the
- * execution isn't, because the udquot might have a group dquot attached
- * already and getting rid of that gets us into lock ordering constraints.
- * The process is complicated more by the fact that the dquots may or may not
- * be locked on entry.
- */
-STATIC void
-xfs_qm_dqattach_grouphint(
-       xfs_dquot_t     *udq,
-       xfs_dquot_t     *gdq)
-{
-       xfs_dquot_t     *tmp;
-
-       xfs_dqlock(udq);
-
-       if ((tmp = udq->q_gdquot)) {
-               if (tmp == gdq) {
-                       xfs_dqunlock(udq);
-                       return;
-               }
-
-               udq->q_gdquot = NULL;
-               /*
-                * We can't keep any dqlocks when calling dqrele,
-                * because the freelist lock comes before dqlocks.
-                */
-               xfs_dqunlock(udq);
-               /*
-                * we took a hard reference once upon a time in dqget,
-                * so give it back when the udquot no longer points at it
-                * dqput() does the unlocking of the dquot.
-                */
-               xfs_qm_dqrele(tmp);
-
-               xfs_dqlock(udq);
-               xfs_dqlock(gdq);
-
-       } else {
-               ASSERT(XFS_DQ_IS_LOCKED(udq));
-               xfs_dqlock(gdq);
-       }
-
-       ASSERT(XFS_DQ_IS_LOCKED(udq));
-       ASSERT(XFS_DQ_IS_LOCKED(gdq));
-       /*
-        * Somebody could have attached a gdquot here,
-        * when we dropped the uqlock. If so, just do nothing.
-        */
-       if (udq->q_gdquot == NULL) {
-               XFS_DQHOLD(gdq);
-               udq->q_gdquot = gdq;
-       }
-
-       xfs_dqunlock(gdq);
-       xfs_dqunlock(udq);
-}
-
-
-/*
- * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
- * into account.
- * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
- * Inode may get unlocked and relocked in here, and the caller must deal with
- * the consequences.
- */
-int
-xfs_qm_dqattach_locked(
-       xfs_inode_t     *ip,
-       uint            flags)
-{
-       xfs_mount_t     *mp = ip->i_mount;
-       uint            nquotas = 0;
-       int             error = 0;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) ||
-           !XFS_IS_QUOTA_ON(mp) ||
-           !XFS_NOT_DQATTACHED(mp, ip) ||
-           ip->i_ino == mp->m_sb.sb_uquotino ||
-           ip->i_ino == mp->m_sb.sb_gquotino)
-               return 0;
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-
-       if (XFS_IS_UQUOTA_ON(mp)) {
-               error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
-                                               flags & XFS_QMOPT_DQALLOC,
-                                               NULL, &ip->i_udquot);
-               if (error)
-                       goto done;
-               nquotas++;
-       }
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       if (XFS_IS_OQUOTA_ON(mp)) {
-               error = XFS_IS_GQUOTA_ON(mp) ?
-                       xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
-                                               flags & XFS_QMOPT_DQALLOC,
-                                               ip->i_udquot, &ip->i_gdquot) :
-                       xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,
-                                               flags & XFS_QMOPT_DQALLOC,
-                                               ip->i_udquot, &ip->i_gdquot);
-               /*
-                * Don't worry about the udquot that we may have
-                * attached above. It'll get detached, if not already.
-                */
-               if (error)
-                       goto done;
-               nquotas++;
-       }
-
-       /*
-        * Attach this group quota to the user quota as a hint.
-        * This WON'T, in general, result in a thrash.
-        */
-       if (nquotas == 2) {
-               ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-               ASSERT(ip->i_udquot);
-               ASSERT(ip->i_gdquot);
-
-               /*
-                * We may or may not have the i_udquot locked at this point,
-                * but this check is OK since we don't depend on the i_gdquot to
-                * be accurate 100% all the time. It is just a hint, and this
-                * will succeed in general.
-                */
-               if (ip->i_udquot->q_gdquot == ip->i_gdquot)
-                       goto done;
-               /*
-                * Attach i_gdquot to the gdquot hint inside the i_udquot.
-                */
-               xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
-       }
-
- done:
-#ifdef DEBUG
-       if (!error) {
-               if (XFS_IS_UQUOTA_ON(mp))
-                       ASSERT(ip->i_udquot);
-               if (XFS_IS_OQUOTA_ON(mp))
-                       ASSERT(ip->i_gdquot);
-       }
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-#endif
-       return error;
-}
-
-int
-xfs_qm_dqattach(
-       struct xfs_inode        *ip,
-       uint                    flags)
-{
-       int                     error;
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       error = xfs_qm_dqattach_locked(ip, flags);
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-       return error;
-}
-
-/*
- * Release dquots (and their references) if any.
- * The inode should be locked EXCL except when this's called by
- * xfs_ireclaim.
- */
-void
-xfs_qm_dqdetach(
-       xfs_inode_t     *ip)
-{
-       if (!(ip->i_udquot || ip->i_gdquot))
-               return;
-
-       trace_xfs_dquot_dqdetach(ip);
-
-       ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
-       ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
-       if (ip->i_udquot) {
-               xfs_qm_dqrele(ip->i_udquot);
-               ip->i_udquot = NULL;
-       }
-       if (ip->i_gdquot) {
-               xfs_qm_dqrele(ip->i_gdquot);
-               ip->i_gdquot = NULL;
-       }
-}
-
-int
-xfs_qm_sync(
-       struct xfs_mount        *mp,
-       int                     flags)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       int                     recl, restarts;
-       struct xfs_dquot        *dqp;
-       int                     error;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-               return 0;
-
-       restarts = 0;
-
-  again:
-       mutex_lock(&q->qi_dqlist_lock);
-       /*
-        * dqpurge_all() also takes the mplist lock and iterate thru all dquots
-        * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
-        * when we have the mplist lock, we know that dquots will be consistent
-        * as long as we have it locked.
-        */
-       if (!XFS_IS_QUOTA_ON(mp)) {
-               mutex_unlock(&q->qi_dqlist_lock);
-               return 0;
-       }
-       ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
-       list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
-               /*
-                * If this is vfs_sync calling, then skip the dquots that
-                * don't 'seem' to be dirty. ie. don't acquire dqlock.
-                * This is very similar to what xfs_sync does with inodes.
-                */
-               if (flags & SYNC_TRYLOCK) {
-                       if (!XFS_DQ_IS_DIRTY(dqp))
-                               continue;
-                       if (!xfs_qm_dqlock_nowait(dqp))
-                               continue;
-               } else {
-                       xfs_dqlock(dqp);
-               }
-
-               /*
-                * Now, find out for sure if this dquot is dirty or not.
-                */
-               if (! XFS_DQ_IS_DIRTY(dqp)) {
-                       xfs_dqunlock(dqp);
-                       continue;
-               }
-
-               /* XXX a sentinel would be better */
-               recl = q->qi_dqreclaims;
-               if (!xfs_dqflock_nowait(dqp)) {
-                       if (flags & SYNC_TRYLOCK) {
-                               xfs_dqunlock(dqp);
-                               continue;
-                       }
-                       /*
-                        * If we can't grab the flush lock then if the caller
-                        * really wanted us to give this our best shot, so
-                        * see if we can give a push to the buffer before we wait
-                        * on the flush lock. At this point, we know that
-                        * even though the dquot is being flushed,
-                        * it has (new) dirty data.
-                        */
-                       xfs_qm_dqflock_pushbuf_wait(dqp);
-               }
-               /*
-                * Let go of the mplist lock. We don't want to hold it
-                * across a disk write
-                */
-               mutex_unlock(&q->qi_dqlist_lock);
-               error = xfs_qm_dqflush(dqp, flags);
-               xfs_dqunlock(dqp);
-               if (error && XFS_FORCED_SHUTDOWN(mp))
-                       return 0;       /* Need to prevent umount failure */
-               else if (error)
-                       return error;
-
-               mutex_lock(&q->qi_dqlist_lock);
-               if (recl != q->qi_dqreclaims) {
-                       if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
-                               break;
-
-                       mutex_unlock(&q->qi_dqlist_lock);
-                       goto again;
-               }
-       }
-
-       mutex_unlock(&q->qi_dqlist_lock);
-       return 0;
-}
-
-/*
- * The hash chains and the mplist use the same xfs_dqhash structure as
- * their list head, but we can take the mplist qh_lock and one of the
- * hash qh_locks at the same time without any problem as they aren't
- * related.
- */
-static struct lock_class_key xfs_quota_mplist_class;
-
-/*
- * This initializes all the quota information that's kept in the
- * mount structure
- */
-STATIC int
-xfs_qm_init_quotainfo(
-       xfs_mount_t     *mp)
-{
-       xfs_quotainfo_t *qinf;
-       int             error;
-       xfs_dquot_t     *dqp;
-
-       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-       /*
-        * Tell XQM that we exist as soon as possible.
-        */
-       if ((error = xfs_qm_hold_quotafs_ref(mp))) {
-               return error;
-       }
-
-       qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
-
-       /*
-        * See if quotainodes are setup, and if not, allocate them,
-        * and change the superblock accordingly.
-        */
-       if ((error = xfs_qm_init_quotainos(mp))) {
-               kmem_free(qinf);
-               mp->m_quotainfo = NULL;
-               return error;
-       }
-
-       INIT_LIST_HEAD(&qinf->qi_dqlist);
-       mutex_init(&qinf->qi_dqlist_lock);
-       lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class);
-
-       qinf->qi_dqreclaims = 0;
-
-       /* mutex used to serialize quotaoffs */
-       mutex_init(&qinf->qi_quotaofflock);
-
-       /* Precalc some constants */
-       qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
-       ASSERT(qinf->qi_dqchunklen);
-       qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
-       do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
-
-       mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
-
-       /*
-        * We try to get the limits from the superuser's limits fields.
-        * This is quite hacky, but it is standard quota practice.
-        * We look at the USR dquot with id == 0 first, but if user quotas
-        * are not enabled we goto the GRP dquot with id == 0.
-        * We don't really care to keep separate default limits for user
-        * and group quotas, at least not at this point.
-        */
-       error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
-                            XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 
-                            (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
-                               XFS_DQ_PROJ),
-                            XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
-                            &dqp);
-       if (! error) {
-               xfs_disk_dquot_t        *ddqp = &dqp->q_core;
-
-               /*
-                * The warnings and timers set the grace period given to
-                * a user or group before he or she can not perform any
-                * more writing. If it is zero, a default is used.
-                */
-               qinf->qi_btimelimit = ddqp->d_btimer ?
-                       be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT;
-               qinf->qi_itimelimit = ddqp->d_itimer ?
-                       be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT;
-               qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ?
-                       be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT;
-               qinf->qi_bwarnlimit = ddqp->d_bwarns ?
-                       be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT;
-               qinf->qi_iwarnlimit = ddqp->d_iwarns ?
-                       be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
-               qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
-                       be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
-               qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
-               qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
-               qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
-               qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
-               qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
-               qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
- 
-               /*
-                * We sent the XFS_QMOPT_DQSUSER flag to dqget because
-                * we don't want this dquot cached. We haven't done a
-                * quotacheck yet, and quotacheck doesn't like incore dquots.
-                */
-               xfs_qm_dqdestroy(dqp);
-       } else {
-               qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
-               qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
-               qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
-               qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
-               qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
-               qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
-       }
-
-       return 0;
-}
-
-
-/*
- * Gets called when unmounting a filesystem or when all quotas get
- * turned off.
- * This purges the quota inodes, destroys locks and frees itself.
- */
-void
-xfs_qm_destroy_quotainfo(
-       xfs_mount_t     *mp)
-{
-       xfs_quotainfo_t *qi;
-
-       qi = mp->m_quotainfo;
-       ASSERT(qi != NULL);
-       ASSERT(xfs_Gqm != NULL);
-
-       /*
-        * Release the reference that XQM kept, so that we know
-        * when the XQM structure should be freed. We cannot assume
-        * that xfs_Gqm is non-null after this point.
-        */
-       xfs_qm_rele_quotafs_ref(mp);
-
-       ASSERT(list_empty(&qi->qi_dqlist));
-       mutex_destroy(&qi->qi_dqlist_lock);
-
-       if (qi->qi_uquotaip) {
-               IRELE(qi->qi_uquotaip);
-               qi->qi_uquotaip = NULL; /* paranoia */
-       }
-       if (qi->qi_gquotaip) {
-               IRELE(qi->qi_gquotaip);
-               qi->qi_gquotaip = NULL;
-       }
-       mutex_destroy(&qi->qi_quotaofflock);
-       kmem_free(qi);
-       mp->m_quotainfo = NULL;
-}
-
-
-
-/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
-
-/* ARGSUSED */
-STATIC void
-xfs_qm_list_init(
-       xfs_dqlist_t    *list,
-       char            *str,
-       int             n)
-{
-       mutex_init(&list->qh_lock);
-       INIT_LIST_HEAD(&list->qh_list);
-       list->qh_version = 0;
-       list->qh_nelems = 0;
-}
-
-STATIC void
-xfs_qm_list_destroy(
-       xfs_dqlist_t    *list)
-{
-       mutex_destroy(&(list->qh_lock));
-}
-
-/*
- * Create an inode and return with a reference already taken, but unlocked
- * This is how we create quota inodes
- */
-STATIC int
-xfs_qm_qino_alloc(
-       xfs_mount_t     *mp,
-       xfs_inode_t     **ip,
-       __int64_t       sbfields,
-       uint            flags)
-{
-       xfs_trans_t     *tp;
-       int             error;
-       int             committed;
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
-       if ((error = xfs_trans_reserve(tp,
-                                     XFS_QM_QINOCREATE_SPACE_RES(mp),
-                                     XFS_CREATE_LOG_RES(mp), 0,
-                                     XFS_TRANS_PERM_LOG_RES,
-                                     XFS_CREATE_LOG_COUNT))) {
-               xfs_trans_cancel(tp, 0);
-               return error;
-       }
-
-       error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed);
-       if (error) {
-               xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
-                                XFS_TRANS_ABORT);
-               return error;
-       }
-
-       /*
-        * Make the changes in the superblock, and log those too.
-        * sbfields arg may contain fields other than *QUOTINO;
-        * VERSIONNUM for example.
-        */
-       spin_lock(&mp->m_sb_lock);
-       if (flags & XFS_QMOPT_SBVERSION) {
-               ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
-               ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
-                                  XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
-                      (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
-                       XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
-
-               xfs_sb_version_addquota(&mp->m_sb);
-               mp->m_sb.sb_uquotino = NULLFSINO;
-               mp->m_sb.sb_gquotino = NULLFSINO;
-
-               /* qflags will get updated _after_ quotacheck */
-               mp->m_sb.sb_qflags = 0;
-       }
-       if (flags & XFS_QMOPT_UQUOTA)
-               mp->m_sb.sb_uquotino = (*ip)->i_ino;
-       else
-               mp->m_sb.sb_gquotino = (*ip)->i_ino;
-       spin_unlock(&mp->m_sb_lock);
-       xfs_mod_sb(tp, sbfields);
-
-       if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
-               xfs_alert(mp, "%s failed (error %d)!", __func__, error);
-               return error;
-       }
-       return 0;
-}
-
-
-STATIC void
-xfs_qm_reset_dqcounts(
-       xfs_mount_t     *mp,
-       xfs_buf_t       *bp,
-       xfs_dqid_t      id,
-       uint            type)
-{
-       xfs_disk_dquot_t        *ddq;
-       int                     j;
-
-       trace_xfs_reset_dqcounts(bp, _RET_IP_);
-
-       /*
-        * Reset all counters and timers. They'll be
-        * started afresh by xfs_qm_quotacheck.
-        */
-#ifdef DEBUG
-       j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
-       do_div(j, sizeof(xfs_dqblk_t));
-       ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
-#endif
-       ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp);
-       for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
-               /*
-                * Do a sanity check, and if needed, repair the dqblk. Don't
-                * output any warnings because it's perfectly possible to
-                * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
-                */
-               (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
-                                     "xfs_quotacheck");
-               ddq->d_bcount = 0;
-               ddq->d_icount = 0;
-               ddq->d_rtbcount = 0;
-               ddq->d_btimer = 0;
-               ddq->d_itimer = 0;
-               ddq->d_rtbtimer = 0;
-               ddq->d_bwarns = 0;
-               ddq->d_iwarns = 0;
-               ddq->d_rtbwarns = 0;
-               ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
-       }
-}
-
-STATIC int
-xfs_qm_dqiter_bufs(
-       xfs_mount_t     *mp,
-       xfs_dqid_t      firstid,
-       xfs_fsblock_t   bno,
-       xfs_filblks_t   blkcnt,
-       uint            flags)
-{
-       xfs_buf_t       *bp;
-       int             error;
-       int             type;
-
-       ASSERT(blkcnt > 0);
-       type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
-               (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
-       error = 0;
-
-       /*
-        * Blkcnt arg can be a very big number, and might even be
-        * larger than the log itself. So, we have to break it up into
-        * manageable-sized transactions.
-        * Note that we don't start a permanent transaction here; we might
-        * not be able to get a log reservation for the whole thing up front,
-        * and we don't really care to either, because we just discard
-        * everything if we were to crash in the middle of this loop.
-        */
-       while (blkcnt--) {
-               error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
-                             XFS_FSB_TO_DADDR(mp, bno),
-                             mp->m_quotainfo->qi_dqchunklen, 0, &bp);
-               if (error)
-                       break;
-
-               xfs_qm_reset_dqcounts(mp, bp, firstid, type);
-               xfs_bdwrite(mp, bp);
-               /*
-                * goto the next block.
-                */
-               bno++;
-               firstid += mp->m_quotainfo->qi_dqperchunk;
-       }
-       return error;
-}
-
-/*
- * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
- * caller supplied function for every chunk of dquots that we find.
- */
-STATIC int
-xfs_qm_dqiterate(
-       xfs_mount_t     *mp,
-       xfs_inode_t     *qip,
-       uint            flags)
-{
-       xfs_bmbt_irec_t         *map;
-       int                     i, nmaps;       /* number of map entries */
-       int                     error;          /* return value */
-       xfs_fileoff_t           lblkno;
-       xfs_filblks_t           maxlblkcnt;
-       xfs_dqid_t              firstid;
-       xfs_fsblock_t           rablkno;
-       xfs_filblks_t           rablkcnt;
-
-       error = 0;
-       /*
-        * This looks racy, but we can't keep an inode lock across a
-        * trans_reserve. But, this gets called during quotacheck, and that
-        * happens only at mount time which is single threaded.
-        */
-       if (qip->i_d.di_nblocks == 0)
-               return 0;
-
-       map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
-
-       lblkno = 0;
-       maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
-       do {
-               nmaps = XFS_DQITER_MAP_SIZE;
-               /*
-                * We aren't changing the inode itself. Just changing
-                * some of its data. No new blocks are added here, and
-                * the inode is never added to the transaction.
-                */
-               xfs_ilock(qip, XFS_ILOCK_SHARED);
-               error = xfs_bmapi(NULL, qip, lblkno,
-                                 maxlblkcnt - lblkno,
-                                 XFS_BMAPI_METADATA,
-                                 NULL,
-                                 0, map, &nmaps, NULL);
-               xfs_iunlock(qip, XFS_ILOCK_SHARED);
-               if (error)
-                       break;
-
-               ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
-               for (i = 0; i < nmaps; i++) {
-                       ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
-                       ASSERT(map[i].br_blockcount);
-
-
-                       lblkno += map[i].br_blockcount;
-
-                       if (map[i].br_startblock == HOLESTARTBLOCK)
-                               continue;
-
-                       firstid = (xfs_dqid_t) map[i].br_startoff *
-                               mp->m_quotainfo->qi_dqperchunk;
-                       /*
-                        * Do a read-ahead on the next extent.
-                        */
-                       if ((i+1 < nmaps) &&
-                           (map[i+1].br_startblock != HOLESTARTBLOCK)) {
-                               rablkcnt =  map[i+1].br_blockcount;
-                               rablkno = map[i+1].br_startblock;
-                               while (rablkcnt--) {
-                                       xfs_buf_readahead(mp->m_ddev_targp,
-                                              XFS_FSB_TO_DADDR(mp, rablkno),
-                                              mp->m_quotainfo->qi_dqchunklen);
-                                       rablkno++;
-                               }
-                       }
-                       /*
-                        * Iterate thru all the blks in the extent and
-                        * reset the counters of all the dquots inside them.
-                        */
-                       if ((error = xfs_qm_dqiter_bufs(mp,
-                                                      firstid,
-                                                      map[i].br_startblock,
-                                                      map[i].br_blockcount,
-                                                      flags))) {
-                               break;
-                       }
-               }
-
-               if (error)
-                       break;
-       } while (nmaps > 0);
-
-       kmem_free(map);
-
-       return error;
-}
-
-/*
- * Called by dqusage_adjust in doing a quotacheck.
- *
- * Given the inode, and a dquot id this updates both the incore dqout as well
- * as the buffer copy. This is so that once the quotacheck is done, we can
- * just log all the buffers, as opposed to logging numerous updates to
- * individual dquots.
- */
-STATIC int
-xfs_qm_quotacheck_dqadjust(
-       struct xfs_inode        *ip,
-       xfs_dqid_t              id,
-       uint                    type,
-       xfs_qcnt_t              nblks,
-       xfs_qcnt_t              rtblks)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_dquot        *dqp;
-       int                     error;
-
-       error = xfs_qm_dqget(mp, ip, id, type,
-                            XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp);
-       if (error) {
-               /*
-                * Shouldn't be able to turn off quotas here.
-                */
-               ASSERT(error != ESRCH);
-               ASSERT(error != ENOENT);
-               return error;
-       }
-
-       trace_xfs_dqadjust(dqp);
-
-       /*
-        * Adjust the inode count and the block count to reflect this inode's
-        * resource usage.
-        */
-       be64_add_cpu(&dqp->q_core.d_icount, 1);
-       dqp->q_res_icount++;
-       if (nblks) {
-               be64_add_cpu(&dqp->q_core.d_bcount, nblks);
-               dqp->q_res_bcount += nblks;
-       }
-       if (rtblks) {
-               be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks);
-               dqp->q_res_rtbcount += rtblks;
-       }
-
-       /*
-        * Set default limits, adjust timers (since we changed usages)
-        *
-        * There are no timers for the default values set in the root dquot.
-        */
-       if (dqp->q_core.d_id) {
-               xfs_qm_adjust_dqlimits(mp, &dqp->q_core);
-               xfs_qm_adjust_dqtimers(mp, &dqp->q_core);
-       }
-
-       dqp->dq_flags |= XFS_DQ_DIRTY;
-       xfs_qm_dqput(dqp);
-       return 0;
-}
-
-STATIC int
-xfs_qm_get_rtblks(
-       xfs_inode_t     *ip,
-       xfs_qcnt_t      *O_rtblks)
-{
-       xfs_filblks_t   rtblks;                 /* total rt blks */
-       xfs_extnum_t    idx;                    /* extent record index */
-       xfs_ifork_t     *ifp;                   /* inode fork pointer */
-       xfs_extnum_t    nextents;               /* number of extent entries */
-       int             error;
-
-       ASSERT(XFS_IS_REALTIME_INODE(ip));
-       ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
-       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
-               if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
-                       return error;
-       }
-       rtblks = 0;
-       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-       for (idx = 0; idx < nextents; idx++)
-               rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
-       *O_rtblks = (xfs_qcnt_t)rtblks;
-       return 0;
-}
-
-/*
- * callback routine supplied to bulkstat(). Given an inumber, find its
- * dquots and update them to account for resources taken by that inode.
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_dqusage_adjust(
-       xfs_mount_t     *mp,            /* mount point for filesystem */
-       xfs_ino_t       ino,            /* inode number to get data for */
-       void            __user *buffer, /* not used */
-       int             ubsize,         /* not used */
-       int             *ubused,        /* not used */
-       int             *res)           /* result code value */
-{
-       xfs_inode_t     *ip;
-       xfs_qcnt_t      nblks, rtblks = 0;
-       int             error;
-
-       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-       /*
-        * rootino must have its resources accounted for, not so with the quota
-        * inodes.
-        */
-       if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
-               *res = BULKSTAT_RV_NOTHING;
-               return XFS_ERROR(EINVAL);
-       }
-
-       /*
-        * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
-        * interface expects the inode to be exclusively locked because that's
-        * the case in all other instances. It's OK that we do this because
-        * quotacheck is done only at mount time.
-        */
-       error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip);
-       if (error) {
-               *res = BULKSTAT_RV_NOTHING;
-               return error;
-       }
-
-       ASSERT(ip->i_delayed_blks == 0);
-
-       if (XFS_IS_REALTIME_INODE(ip)) {
-               /*
-                * Walk thru the extent list and count the realtime blocks.
-                */
-               error = xfs_qm_get_rtblks(ip, &rtblks);
-               if (error)
-                       goto error0;
-       }
-
-       nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
-
-       /*
-        * Add the (disk blocks and inode) resources occupied by this
-        * inode to its dquots. We do this adjustment in the incore dquot,
-        * and also copy the changes to its buffer.
-        * We don't care about putting these changes in a transaction
-        * envelope because if we crash in the middle of a 'quotacheck'
-        * we have to start from the beginning anyway.
-        * Once we're done, we'll log all the dquot bufs.
-        *
-        * The *QUOTA_ON checks below may look pretty racy, but quotachecks
-        * and quotaoffs don't race. (Quotachecks happen at mount time only).
-        */
-       if (XFS_IS_UQUOTA_ON(mp)) {
-               error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid,
-                                                  XFS_DQ_USER, nblks, rtblks);
-               if (error)
-                       goto error0;
-       }
-
-       if (XFS_IS_GQUOTA_ON(mp)) {
-               error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid,
-                                                  XFS_DQ_GROUP, nblks, rtblks);
-               if (error)
-                       goto error0;
-       }
-
-       if (XFS_IS_PQUOTA_ON(mp)) {
-               error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip),
-                                                  XFS_DQ_PROJ, nblks, rtblks);
-               if (error)
-                       goto error0;
-       }
-
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       IRELE(ip);
-       *res = BULKSTAT_RV_DIDONE;
-       return 0;
-
-error0:
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       IRELE(ip);
-       *res = BULKSTAT_RV_GIVEUP;
-       return error;
-}
-
-/*
- * Walk thru all the filesystem inodes and construct a consistent view
- * of the disk quota world. If the quotacheck fails, disable quotas.
- */
-int
-xfs_qm_quotacheck(
-       xfs_mount_t     *mp)
-{
-       int             done, count, error;
-       xfs_ino_t       lastino;
-       size_t          structsz;
-       xfs_inode_t     *uip, *gip;
-       uint            flags;
-
-       count = INT_MAX;
-       structsz = 1;
-       lastino = 0;
-       flags = 0;
-
-       ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip);
-       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-       /*
-        * There should be no cached dquots. The (simplistic) quotacheck
-        * algorithm doesn't like that.
-        */
-       ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
-
-       xfs_notice(mp, "Quotacheck needed: Please wait.");
-
-       /*
-        * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
-        * their counters to zero. We need a clean slate.
-        * We don't log our changes till later.
-        */
-       uip = mp->m_quotainfo->qi_uquotaip;
-       if (uip) {
-               error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA);
-               if (error)
-                       goto error_return;
-               flags |= XFS_UQUOTA_CHKD;
-       }
-
-       gip = mp->m_quotainfo->qi_gquotaip;
-       if (gip) {
-               error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
-                                       XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
-               if (error)
-                       goto error_return;
-               flags |= XFS_OQUOTA_CHKD;
-       }
-
-       do {
-               /*
-                * Iterate thru all the inodes in the file system,
-                * adjusting the corresponding dquot counters in core.
-                */
-               error = xfs_bulkstat(mp, &lastino, &count,
-                                    xfs_qm_dqusage_adjust,
-                                    structsz, NULL, &done);
-               if (error)
-                       break;
-
-       } while (!done);
-
-       /*
-        * We've made all the changes that we need to make incore.
-        * Flush them down to disk buffers if everything was updated
-        * successfully.
-        */
-       if (!error)
-               error = xfs_qm_dqflush_all(mp, 0);
-
-       /*
-        * We can get this error if we couldn't do a dquot allocation inside
-        * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
-        * dirty dquots that might be cached, we just want to get rid of them
-        * and turn quotaoff. The dquots won't be attached to any of the inodes
-        * at this point (because we intentionally didn't in dqget_noattach).
-        */
-       if (error) {
-               xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
-               goto error_return;
-       }
-
-       /*
-        * We didn't log anything, because if we crashed, we'll have to
-        * start the quotacheck from scratch anyway. However, we must make
-        * sure that our dquot changes are secure before we put the
-        * quotacheck'd stamp on the superblock. So, here we do a synchronous
-        * flush.
-        */
-       XFS_bflush(mp->m_ddev_targp);
-
-       /*
-        * If one type of quotas is off, then it will lose its
-        * quotachecked status, since we won't be doing accounting for
-        * that type anymore.
-        */
-       mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
-       mp->m_qflags |= flags;
-
- error_return:
-       if (error) {
-               xfs_warn(mp,
-       "Quotacheck: Unsuccessful (Error %d): Disabling quotas.",
-                       error);
-               /*
-                * We must turn off quotas.
-                */
-               ASSERT(mp->m_quotainfo != NULL);
-               ASSERT(xfs_Gqm != NULL);
-               xfs_qm_destroy_quotainfo(mp);
-               if (xfs_mount_reset_sbqflags(mp)) {
-                       xfs_warn(mp,
-                               "Quotacheck: Failed to reset quota flags.");
-               }
-       } else
-               xfs_notice(mp, "Quotacheck: Done.");
-       return (error);
-}
-
-/*
- * This is called after the superblock has been read in and we're ready to
- * iget the quota inodes.
- */
-STATIC int
-xfs_qm_init_quotainos(
-       xfs_mount_t     *mp)
-{
-       xfs_inode_t     *uip, *gip;
-       int             error;
-       __int64_t       sbflags;
-       uint            flags;
-
-       ASSERT(mp->m_quotainfo);
-       uip = gip = NULL;
-       sbflags = 0;
-       flags = 0;
-
-       /*
-        * Get the uquota and gquota inodes
-        */
-       if (xfs_sb_version_hasquota(&mp->m_sb)) {
-               if (XFS_IS_UQUOTA_ON(mp) &&
-                   mp->m_sb.sb_uquotino != NULLFSINO) {
-                       ASSERT(mp->m_sb.sb_uquotino > 0);
-                       if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
-                                            0, 0, &uip)))
-                               return XFS_ERROR(error);
-               }
-               if (XFS_IS_OQUOTA_ON(mp) &&
-                   mp->m_sb.sb_gquotino != NULLFSINO) {
-                       ASSERT(mp->m_sb.sb_gquotino > 0);
-                       if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
-                                            0, 0, &gip))) {
-                               if (uip)
-                                       IRELE(uip);
-                               return XFS_ERROR(error);
-                       }
-               }
-       } else {
-               flags |= XFS_QMOPT_SBVERSION;
-               sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
-                           XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
-       }
-
-       /*
-        * Create the two inodes, if they don't exist already. The changes
-        * made above will get added to a transaction and logged in one of
-        * the qino_alloc calls below.  If the device is readonly,
-        * temporarily switch to read-write to do this.
-        */
-       if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
-               if ((error = xfs_qm_qino_alloc(mp, &uip,
-                                             sbflags | XFS_SB_UQUOTINO,
-                                             flags | XFS_QMOPT_UQUOTA)))
-                       return XFS_ERROR(error);
-
-               flags &= ~XFS_QMOPT_SBVERSION;
-       }
-       if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
-               flags |= (XFS_IS_GQUOTA_ON(mp) ?
-                               XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
-               error = xfs_qm_qino_alloc(mp, &gip,
-                                         sbflags | XFS_SB_GQUOTINO, flags);
-               if (error) {
-                       if (uip)
-                               IRELE(uip);
-
-                       return XFS_ERROR(error);
-               }
-       }
-
-       mp->m_quotainfo->qi_uquotaip = uip;
-       mp->m_quotainfo->qi_gquotaip = gip;
-
-       return 0;
-}
-
-
-
-/*
- * Just pop the least recently used dquot off the freelist and
- * recycle it. The returned dquot is locked.
- */
-STATIC xfs_dquot_t *
-xfs_qm_dqreclaim_one(void)
-{
-       xfs_dquot_t     *dqpout;
-       xfs_dquot_t     *dqp;
-       int             restarts;
-       int             startagain;
-
-       restarts = 0;
-       dqpout = NULL;
-
-       /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
-again:
-       startagain = 0;
-       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-
-       list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
-               struct xfs_mount *mp = dqp->q_mount;
-               xfs_dqlock(dqp);
-
-               /*
-                * We are racing with dqlookup here. Naturally we don't
-                * want to reclaim a dquot that lookup wants. We release the
-                * freelist lock and start over, so that lookup will grab
-                * both the dquot and the freelistlock.
-                */
-               if (dqp->dq_flags & XFS_DQ_WANT) {
-                       ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
-
-                       trace_xfs_dqreclaim_want(dqp);
-                       XQM_STATS_INC(xqmstats.xs_qm_dqwants);
-                       restarts++;
-                       startagain = 1;
-                       goto dqunlock;
-               }
-
-               /*
-                * If the dquot is inactive, we are assured that it is
-                * not on the mplist or the hashlist, and that makes our
-                * life easier.
-                */
-               if (dqp->dq_flags & XFS_DQ_INACTIVE) {
-                       ASSERT(mp == NULL);
-                       ASSERT(! XFS_DQ_IS_DIRTY(dqp));
-                       ASSERT(list_empty(&dqp->q_hashlist));
-                       ASSERT(list_empty(&dqp->q_mplist));
-                       list_del_init(&dqp->q_freelist);
-                       xfs_Gqm->qm_dqfrlist_cnt--;
-                       dqpout = dqp;
-                       XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
-                       goto dqunlock;
-               }
-
-               ASSERT(dqp->q_hash);
-               ASSERT(!list_empty(&dqp->q_mplist));
-
-               /*
-                * Try to grab the flush lock. If this dquot is in the process
-                * of getting flushed to disk, we don't want to reclaim it.
-                */
-               if (!xfs_dqflock_nowait(dqp))
-                       goto dqunlock;
-
-               /*
-                * We have the flush lock so we know that this is not in the
-                * process of being flushed. So, if this is dirty, flush it
-                * DELWRI so that we don't get a freelist infested with
-                * dirty dquots.
-                */
-               if (XFS_DQ_IS_DIRTY(dqp)) {
-                       int     error;
-
-                       trace_xfs_dqreclaim_dirty(dqp);
-
-                       /*
-                        * We flush it delayed write, so don't bother
-                        * releasing the freelist lock.
-                        */
-                       error = xfs_qm_dqflush(dqp, 0);
-                       if (error) {
-                               xfs_warn(mp, "%s: dquot %p flush failed",
-                                       __func__, dqp);
-                       }
-                       goto dqunlock;
-               }
-
-               /*
-                * We're trying to get the hashlock out of order. This races
-                * with dqlookup; so, we giveup and goto the next dquot if
-                * we couldn't get the hashlock. This way, we won't starve
-                * a dqlookup process that holds the hashlock that is
-                * waiting for the freelist lock.
-                */
-               if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
-                       restarts++;
-                       goto dqfunlock;
-               }
-
-               /*
-                * This races with dquot allocation code as well as dqflush_all
-                * and reclaim code. So, if we failed to grab the mplist lock,
-                * giveup everything and start over.
-                */
-               if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
-                       restarts++;
-                       startagain = 1;
-                       goto qhunlock;
-               }
-
-               ASSERT(dqp->q_nrefs == 0);
-               list_del_init(&dqp->q_mplist);
-               mp->m_quotainfo->qi_dquots--;
-               mp->m_quotainfo->qi_dqreclaims++;
-               list_del_init(&dqp->q_hashlist);
-               dqp->q_hash->qh_version++;
-               list_del_init(&dqp->q_freelist);
-               xfs_Gqm->qm_dqfrlist_cnt--;
-               dqpout = dqp;
-               mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
-qhunlock:
-               mutex_unlock(&dqp->q_hash->qh_lock);
-dqfunlock:
-               xfs_dqfunlock(dqp);
-dqunlock:
-               xfs_dqunlock(dqp);
-               if (dqpout)
-                       break;
-               if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
-                       break;
-               if (startagain) {
-                       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-                       goto again;
-               }
-       }
-       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-       return dqpout;
-}
-
-/*
- * Traverse the freelist of dquots and attempt to reclaim a maximum of
- * 'howmany' dquots. This operation races with dqlookup(), and attempts to
- * favor the lookup function ...
- */
-STATIC int
-xfs_qm_shake_freelist(
-       int     howmany)
-{
-       int             nreclaimed = 0;
-       xfs_dquot_t     *dqp;
-
-       if (howmany <= 0)
-               return 0;
-
-       while (nreclaimed < howmany) {
-               dqp = xfs_qm_dqreclaim_one();
-               if (!dqp)
-                       return nreclaimed;
-               xfs_qm_dqdestroy(dqp);
-               nreclaimed++;
-       }
-       return nreclaimed;
-}
-
-/*
- * The kmem_shake interface is invoked when memory is running low.
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_shake(
-       struct shrinker *shrink,
-       struct shrink_control *sc)
-{
-       int     ndqused, nfree, n;
-       gfp_t gfp_mask = sc->gfp_mask;
-
-       if (!kmem_shake_allow(gfp_mask))
-               return 0;
-       if (!xfs_Gqm)
-               return 0;
-
-       nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */
-       /* incore dquots in all f/s's */
-       ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
-
-       ASSERT(ndqused >= 0);
-
-       if (nfree <= ndqused && nfree < ndquot)
-               return 0;
-
-       ndqused *= xfs_Gqm->qm_dqfree_ratio;    /* target # of free dquots */
-       n = nfree - ndqused - ndquot;           /* # over target */
-
-       return xfs_qm_shake_freelist(MAX(nfree, n));
-}
-
-
-/*------------------------------------------------------------------*/
-
-/*
- * Return a new incore dquot. Depending on the number of
- * dquots in the system, we either allocate a new one on the kernel heap,
- * or reclaim a free one.
- * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
- * to reclaim an existing one from the freelist.
- */
-boolean_t
-xfs_qm_dqalloc_incore(
-       xfs_dquot_t **O_dqpp)
-{
-       xfs_dquot_t     *dqp;
-
-       /*
-        * Check against high water mark to see if we want to pop
-        * a nincompoop dquot off the freelist.
-        */
-       if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
-               /*
-                * Try to recycle a dquot from the freelist.
-                */
-               if ((dqp = xfs_qm_dqreclaim_one())) {
-                       XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
-                       /*
-                        * Just zero the core here. The rest will get
-                        * reinitialized by caller. XXX we shouldn't even
-                        * do this zero ...
-                        */
-                       memset(&dqp->q_core, 0, sizeof(dqp->q_core));
-                       *O_dqpp = dqp;
-                       return B_FALSE;
-               }
-               XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
-       }
-
-       /*
-        * Allocate a brand new dquot on the kernel heap and return it
-        * to the caller to initialize.
-        */
-       ASSERT(xfs_Gqm->qm_dqzone != NULL);
-       *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
-       atomic_inc(&xfs_Gqm->qm_totaldquots);
-
-       return B_TRUE;
-}
-
-
-/*
- * Start a transaction and write the incore superblock changes to
- * disk. flags parameter indicates which fields have changed.
- */
-int
-xfs_qm_write_sb_changes(
-       xfs_mount_t     *mp,
-       __int64_t       flags)
-{
-       xfs_trans_t     *tp;
-       int             error;
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
-       if ((error = xfs_trans_reserve(tp, 0,
-                                     mp->m_sb.sb_sectsize + 128, 0,
-                                     0,
-                                     XFS_DEFAULT_LOG_COUNT))) {
-               xfs_trans_cancel(tp, 0);
-               return error;
-       }
-
-       xfs_mod_sb(tp, flags);
-       error = xfs_trans_commit(tp, 0);
-
-       return error;
-}
-
-
-/* --------------- utility functions for vnodeops ---------------- */
-
-
-/*
- * Given an inode, a uid, gid and prid make sure that we have
- * allocated relevant dquot(s) on disk, and that we won't exceed inode
- * quotas by creating this file.
- * This also attaches dquot(s) to the given inode after locking it,
- * and returns the dquots corresponding to the uid and/or gid.
- *
- * in  : inode (unlocked)
- * out : udquot, gdquot with references taken and unlocked
- */
-int
-xfs_qm_vop_dqalloc(
-       struct xfs_inode        *ip,
-       uid_t                   uid,
-       gid_t                   gid,
-       prid_t                  prid,
-       uint                    flags,
-       struct xfs_dquot        **O_udqpp,
-       struct xfs_dquot        **O_gdqpp)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_dquot        *uq, *gq;
-       int                     error;
-       uint                    lockflags;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-               return 0;
-
-       lockflags = XFS_ILOCK_EXCL;
-       xfs_ilock(ip, lockflags);
-
-       if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
-               gid = ip->i_d.di_gid;
-
-       /*
-        * Attach the dquot(s) to this inode, doing a dquot allocation
-        * if necessary. The dquot(s) will not be locked.
-        */
-       if (XFS_NOT_DQATTACHED(mp, ip)) {
-               error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
-               if (error) {
-                       xfs_iunlock(ip, lockflags);
-                       return error;
-               }
-       }
-
-       uq = gq = NULL;
-       if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
-               if (ip->i_d.di_uid != uid) {
-                       /*
-                        * What we need is the dquot that has this uid, and
-                        * if we send the inode to dqget, the uid of the inode
-                        * takes priority over what's sent in the uid argument.
-                        * We must unlock inode here before calling dqget if
-                        * we're not sending the inode, because otherwise
-                        * we'll deadlock by doing trans_reserve while
-                        * holding ilock.
-                        */
-                       xfs_iunlock(ip, lockflags);
-                       if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
-                                                XFS_DQ_USER,
-                                                XFS_QMOPT_DQALLOC |
-                                                XFS_QMOPT_DOWARN,
-                                                &uq))) {
-                               ASSERT(error != ENOENT);
-                               return error;
-                       }
-                       /*
-                        * Get the ilock in the right order.
-                        */
-                       xfs_dqunlock(uq);
-                       lockflags = XFS_ILOCK_SHARED;
-                       xfs_ilock(ip, lockflags);
-               } else {
-                       /*
-                        * Take an extra reference, because we'll return
-                        * this to caller
-                        */
-                       ASSERT(ip->i_udquot);
-                       uq = ip->i_udquot;
-                       xfs_dqlock(uq);
-                       XFS_DQHOLD(uq);
-                       xfs_dqunlock(uq);
-               }
-       }
-       if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
-               if (ip->i_d.di_gid != gid) {
-                       xfs_iunlock(ip, lockflags);
-                       if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
-                                                XFS_DQ_GROUP,
-                                                XFS_QMOPT_DQALLOC |
-                                                XFS_QMOPT_DOWARN,
-                                                &gq))) {
-                               if (uq)
-                                       xfs_qm_dqrele(uq);
-                               ASSERT(error != ENOENT);
-                               return error;
-                       }
-                       xfs_dqunlock(gq);
-                       lockflags = XFS_ILOCK_SHARED;
-                       xfs_ilock(ip, lockflags);
-               } else {
-                       ASSERT(ip->i_gdquot);
-                       gq = ip->i_gdquot;
-                       xfs_dqlock(gq);
-                       XFS_DQHOLD(gq);
-                       xfs_dqunlock(gq);
-               }
-       } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
-               if (xfs_get_projid(ip) != prid) {
-                       xfs_iunlock(ip, lockflags);
-                       if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
-                                                XFS_DQ_PROJ,
-                                                XFS_QMOPT_DQALLOC |
-                                                XFS_QMOPT_DOWARN,
-                                                &gq))) {
-                               if (uq)
-                                       xfs_qm_dqrele(uq);
-                               ASSERT(error != ENOENT);
-                               return (error);
-                       }
-                       xfs_dqunlock(gq);
-                       lockflags = XFS_ILOCK_SHARED;
-                       xfs_ilock(ip, lockflags);
-               } else {
-                       ASSERT(ip->i_gdquot);
-                       gq = ip->i_gdquot;
-                       xfs_dqlock(gq);
-                       XFS_DQHOLD(gq);
-                       xfs_dqunlock(gq);
-               }
-       }
-       if (uq)
-               trace_xfs_dquot_dqalloc(ip);
-
-       xfs_iunlock(ip, lockflags);
-       if (O_udqpp)
-               *O_udqpp = uq;
-       else if (uq)
-               xfs_qm_dqrele(uq);
-       if (O_gdqpp)
-               *O_gdqpp = gq;
-       else if (gq)
-               xfs_qm_dqrele(gq);
-       return 0;
-}
-
-/*
- * Actually transfer ownership, and do dquot modifications.
- * These were already reserved.
- */
-xfs_dquot_t *
-xfs_qm_vop_chown(
-       xfs_trans_t     *tp,
-       xfs_inode_t     *ip,
-       xfs_dquot_t     **IO_olddq,
-       xfs_dquot_t     *newdq)
-{
-       xfs_dquot_t     *prevdq;
-       uint            bfield = XFS_IS_REALTIME_INODE(ip) ?
-                                XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
-
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
-
-       /* old dquot */
-       prevdq = *IO_olddq;
-       ASSERT(prevdq);
-       ASSERT(prevdq != newdq);
-
-       xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
-       xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
-
-       /* the sparkling new dquot */
-       xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
-       xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
-
-       /*
-        * Take an extra reference, because the inode
-        * is going to keep this dquot pointer even
-        * after the trans_commit.
-        */
-       xfs_dqlock(newdq);
-       XFS_DQHOLD(newdq);
-       xfs_dqunlock(newdq);
-       *IO_olddq = newdq;
-
-       return prevdq;
-}
-
-/*
- * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
- */
-int
-xfs_qm_vop_chown_reserve(
-       xfs_trans_t     *tp,
-       xfs_inode_t     *ip,
-       xfs_dquot_t     *udqp,
-       xfs_dquot_t     *gdqp,
-       uint            flags)
-{
-       xfs_mount_t     *mp = ip->i_mount;
-       uint            delblks, blkflags, prjflags = 0;
-       xfs_dquot_t     *unresudq, *unresgdq, *delblksudq, *delblksgdq;
-       int             error;
-
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
-       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-       delblks = ip->i_delayed_blks;
-       delblksudq = delblksgdq = unresudq = unresgdq = NULL;
-       blkflags = XFS_IS_REALTIME_INODE(ip) ?
-                       XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
-
-       if (XFS_IS_UQUOTA_ON(mp) && udqp &&
-           ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) {
-               delblksudq = udqp;
-               /*
-                * If there are delayed allocation blocks, then we have to
-                * unreserve those from the old dquot, and add them to the
-                * new dquot.
-                */
-               if (delblks) {
-                       ASSERT(ip->i_udquot);
-                       unresudq = ip->i_udquot;
-               }
-       }
-       if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
-               if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
-                    xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id))
-                       prjflags = XFS_QMOPT_ENOSPC;
-
-               if (prjflags ||
-                   (XFS_IS_GQUOTA_ON(ip->i_mount) &&
-                    ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) {
-                       delblksgdq = gdqp;
-                       if (delblks) {
-                               ASSERT(ip->i_gdquot);
-                               unresgdq = ip->i_gdquot;
-                       }
-               }
-       }
-
-       if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
-                               delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
-                               flags | blkflags | prjflags)))
-               return (error);
-
-       /*
-        * Do the delayed blks reservations/unreservations now. Since, these
-        * are done without the help of a transaction, if a reservation fails
-        * its previous reservations won't be automatically undone by trans
-        * code. So, we have to do it manually here.
-        */
-       if (delblks) {
-               /*
-                * Do the reservations first. Unreservation can't fail.
-                */
-               ASSERT(delblksudq || delblksgdq);
-               ASSERT(unresudq || unresgdq);
-               if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
-                               delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
-                               flags | blkflags | prjflags)))
-                       return (error);
-               xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
-                               unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
-                               blkflags);
-       }
-
-       return (0);
-}
-
-int
-xfs_qm_vop_rename_dqattach(
-       struct xfs_inode        **i_tab)
-{
-       struct xfs_mount        *mp = i_tab[0]->i_mount;
-       int                     i;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-               return 0;
-
-       for (i = 0; (i < 4 && i_tab[i]); i++) {
-               struct xfs_inode        *ip = i_tab[i];
-               int                     error;
-
-               /*
-                * Watch out for duplicate entries in the table.
-                */
-               if (i == 0 || ip != i_tab[i-1]) {
-                       if (XFS_NOT_DQATTACHED(mp, ip)) {
-                               error = xfs_qm_dqattach(ip, 0);
-                               if (error)
-                                       return error;
-                       }
-               }
-       }
-       return 0;
-}
-
-void
-xfs_qm_vop_create_dqattach(
-       struct xfs_trans        *tp,
-       struct xfs_inode        *ip,
-       struct xfs_dquot        *udqp,
-       struct xfs_dquot        *gdqp)
-{
-       struct xfs_mount        *mp = tp->t_mountp;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-               return;
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
-       if (udqp) {
-               xfs_dqlock(udqp);
-               XFS_DQHOLD(udqp);
-               xfs_dqunlock(udqp);
-               ASSERT(ip->i_udquot == NULL);
-               ip->i_udquot = udqp;
-               ASSERT(XFS_IS_UQUOTA_ON(mp));
-               ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
-               xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
-       }
-       if (gdqp) {
-               xfs_dqlock(gdqp);
-               XFS_DQHOLD(gdqp);
-               xfs_dqunlock(gdqp);
-               ASSERT(ip->i_gdquot == NULL);
-               ip->i_gdquot = gdqp;
-               ASSERT(XFS_IS_OQUOTA_ON(mp));
-               ASSERT((XFS_IS_GQUOTA_ON(mp) ?
-                       ip->i_d.di_gid : xfs_get_projid(ip)) ==
-                               be32_to_cpu(gdqp->q_core.d_id));
-               xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
-       }
-}
-
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h

deleted file mode 100644 (file)

index 43b9abe..0000000
--- a/fs/xfs/quota/xfs_qm.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_QM_H__
-#define __XFS_QM_H__
-
-#include "xfs_dquot_item.h"
-#include "xfs_dquot.h"
-#include "xfs_quota_priv.h"
-#include "xfs_qm_stats.h"
-
-struct xfs_qm;
-struct xfs_inode;
-
-extern uint            ndquot;
-extern struct mutex    xfs_Gqm_lock;
-extern struct xfs_qm   *xfs_Gqm;
-extern kmem_zone_t     *qm_dqzone;
-extern kmem_zone_t     *qm_dqtrxzone;
-
-/*
- * Used in xfs_qm_sync called by xfs_sync to count the max times that it can
- * iterate over the mountpt's dquot list in one call.
- */
-#define XFS_QM_SYNC_MAX_RESTARTS       7
-
-/*
- * Ditto, for xfs_qm_dqreclaim_one.
- */
-#define XFS_QM_RECLAIM_MAX_RESTARTS    4
-
-/*
- * Ideal ratio of free to in use dquots. Quota manager makes an attempt
- * to keep this balance.
- */
-#define XFS_QM_DQFREE_RATIO            2
-
-/*
- * Dquot hashtable constants/threshold values.
- */
-#define XFS_QM_HASHSIZE_LOW            (PAGE_SIZE / sizeof(xfs_dqhash_t))
-#define XFS_QM_HASHSIZE_HIGH           ((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t))
-
-/*
- * This defines the unit of allocation of dquots.
- * Currently, it is just one file system block, and a 4K blk contains 30
- * (136 * 30 = 4080) dquots. It's probably not worth trying to make
- * this more dynamic.
- * XXXsup However, if this number is changed, we have to make sure that we don't
- * implicitly assume that we do allocations in chunks of a single filesystem
- * block in the dquot/xqm code.
- */
-#define XFS_DQUOT_CLUSTER_SIZE_FSB     (xfs_filblks_t)1
-
-typedef xfs_dqhash_t   xfs_dqlist_t;
-
-/*
- * Quota Manager (global) structure. Lives only in core.
- */
-typedef struct xfs_qm {
-       xfs_dqlist_t    *qm_usr_dqhtable;/* udquot hash table */
-       xfs_dqlist_t    *qm_grp_dqhtable;/* gdquot hash table */
-       uint             qm_dqhashmask;  /* # buckets in dq hashtab - 1 */
-       struct list_head qm_dqfrlist;    /* freelist of dquots */
-       struct mutex     qm_dqfrlist_lock;
-       int              qm_dqfrlist_cnt;
-       atomic_t         qm_totaldquots; /* total incore dquots */
-       uint             qm_nrefs;       /* file systems with quota on */
-       int              qm_dqfree_ratio;/* ratio of free to inuse dquots */
-       kmem_zone_t     *qm_dqzone;      /* dquot mem-alloc zone */
-       kmem_zone_t     *qm_dqtrxzone;   /* t_dqinfo of transactions */
-} xfs_qm_t;
-
-/*
- * Various quota information for individual filesystems.
- * The mount structure keeps a pointer to this.
- */
-typedef struct xfs_quotainfo {
-       xfs_inode_t     *qi_uquotaip;    /* user quota inode */
-       xfs_inode_t     *qi_gquotaip;    /* group quota inode */
-       struct list_head qi_dqlist;      /* all dquots in filesys */
-       struct mutex     qi_dqlist_lock;
-       int              qi_dquots;
-       int              qi_dqreclaims;  /* a change here indicates
-                                           a removal in the dqlist */
-       time_t           qi_btimelimit;  /* limit for blks timer */
-       time_t           qi_itimelimit;  /* limit for inodes timer */
-       time_t           qi_rtbtimelimit;/* limit for rt blks timer */
-       xfs_qwarncnt_t   qi_bwarnlimit;  /* limit for blks warnings */
-       xfs_qwarncnt_t   qi_iwarnlimit;  /* limit for inodes warnings */
-       xfs_qwarncnt_t   qi_rtbwarnlimit;/* limit for rt blks warnings */
-       struct mutex     qi_quotaofflock;/* to serialize quotaoff */
-       xfs_filblks_t    qi_dqchunklen;  /* # BBs in a chunk of dqs */
-       uint             qi_dqperchunk;  /* # ondisk dqs in above chunk */
-       xfs_qcnt_t       qi_bhardlimit;  /* default data blk hard limit */
-       xfs_qcnt_t       qi_bsoftlimit;  /* default data blk soft limit */
-       xfs_qcnt_t       qi_ihardlimit;  /* default inode count hard limit */
-       xfs_qcnt_t       qi_isoftlimit;  /* default inode count soft limit */
-       xfs_qcnt_t       qi_rtbhardlimit;/* default realtime blk hard limit */
-       xfs_qcnt_t       qi_rtbsoftlimit;/* default realtime blk soft limit */
-} xfs_quotainfo_t;
-
-
-extern void    xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
-extern int     xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
-                       xfs_dquot_t *, xfs_dquot_t *, long, long, uint);
-extern void    xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *);
-extern void    xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *);
-
-/*
- * We keep the usr and grp dquots separately so that locking will be easier
- * to do at commit time. All transactions that we know of at this point
- * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value.
- */
-#define XFS_QM_TRANS_MAXDQS            2
-typedef struct xfs_dquot_acct {
-       xfs_dqtrx_t     dqa_usrdquots[XFS_QM_TRANS_MAXDQS];
-       xfs_dqtrx_t     dqa_grpdquots[XFS_QM_TRANS_MAXDQS];
-} xfs_dquot_acct_t;
-
-/*
- * Users are allowed to have a usage exceeding their softlimit for
- * a period this long.
- */
-#define XFS_QM_BTIMELIMIT      (7 * 24*60*60)          /* 1 week */
-#define XFS_QM_RTBTIMELIMIT    (7 * 24*60*60)          /* 1 week */
-#define XFS_QM_ITIMELIMIT      (7 * 24*60*60)          /* 1 week */
-
-#define XFS_QM_BWARNLIMIT      5
-#define XFS_QM_IWARNLIMIT      5
-#define XFS_QM_RTBWARNLIMIT    5
-
-extern void            xfs_qm_destroy_quotainfo(xfs_mount_t *);
-extern int             xfs_qm_quotacheck(xfs_mount_t *);
-extern int             xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
-
-/* dquot stuff */
-extern boolean_t       xfs_qm_dqalloc_incore(xfs_dquot_t **);
-extern int             xfs_qm_dqpurge_all(xfs_mount_t *, uint);
-extern void            xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
-
-/* quota ops */
-extern int             xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint);
-extern int             xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint,
-                                       fs_disk_quota_t *);
-extern int             xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint,
-                                       fs_disk_quota_t *);
-extern int             xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
-extern int             xfs_qm_scall_quotaon(xfs_mount_t *, uint);
-extern int             xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
-
-#endif /* __XFS_QM_H__ */
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c

deleted file mode 100644 (file)

index a0a829a..0000000
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_qm.h"
-
-
-STATIC void
-xfs_fill_statvfs_from_dquot(
-       struct kstatfs          *statp,
-       xfs_disk_dquot_t        *dp)
-{
-       __uint64_t              limit;
-
-       limit = dp->d_blk_softlimit ?
-               be64_to_cpu(dp->d_blk_softlimit) :
-               be64_to_cpu(dp->d_blk_hardlimit);
-       if (limit && statp->f_blocks > limit) {
-               statp->f_blocks = limit;
-               statp->f_bfree = statp->f_bavail =
-                       (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ?
-                        (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0;
-       }
-
-       limit = dp->d_ino_softlimit ?
-               be64_to_cpu(dp->d_ino_softlimit) :
-               be64_to_cpu(dp->d_ino_hardlimit);
-       if (limit && statp->f_files > limit) {
-               statp->f_files = limit;
-               statp->f_ffree =
-                       (statp->f_files > be64_to_cpu(dp->d_icount)) ?
-                        (statp->f_ffree - be64_to_cpu(dp->d_icount)) : 0;
-       }
-}
-
-
-/*
- * Directory tree accounting is implemented using project quotas, where
- * the project identifier is inherited from parent directories.
- * A statvfs (df, etc.) of a directory that is using project quota should
- * return a statvfs of the project, not the entire filesystem.
- * This makes such trees appear as if they are filesystems in themselves.
- */
-void
-xfs_qm_statvfs(
-       xfs_inode_t             *ip,
-       struct kstatfs          *statp)
-{
-       xfs_mount_t             *mp = ip->i_mount;
-       xfs_dquot_t             *dqp;
-
-       if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) {
-               xfs_fill_statvfs_from_dquot(statp, &dqp->q_core);
-               xfs_qm_dqput(dqp);
-       }
-}
-
-int
-xfs_qm_newmount(
-       xfs_mount_t     *mp,
-       uint            *needquotamount,
-       uint            *quotaflags)
-{
-       uint            quotaondisk;
-       uint            uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0;
-
-       quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) &&
-                               (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT);
-
-       if (quotaondisk) {
-               uquotaondisk = mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT;
-               pquotaondisk = mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT;
-               gquotaondisk = mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT;
-       }
-
-       /*
-        * If the device itself is read-only, we can't allow
-        * the user to change the state of quota on the mount -
-        * this would generate a transaction on the ro device,
-        * which would lead to an I/O error and shutdown
-        */
-
-       if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) ||
-           (!uquotaondisk &&  XFS_IS_UQUOTA_ON(mp)) ||
-            (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) ||
-           (!pquotaondisk &&  XFS_IS_PQUOTA_ON(mp)) ||
-            (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) ||
-           (!gquotaondisk &&  XFS_IS_OQUOTA_ON(mp)))  &&
-           xfs_dev_is_read_only(mp, "changing quota state")) {
-               xfs_warn(mp, "please mount with%s%s%s%s.",
-                       (!quotaondisk ? "out quota" : ""),
-                       (uquotaondisk ? " usrquota" : ""),
-                       (pquotaondisk ? " prjquota" : ""),
-                       (gquotaondisk ? " grpquota" : ""));
-               return XFS_ERROR(EPERM);
-       }
-
-       if (XFS_IS_QUOTA_ON(mp) || quotaondisk) {
-               /*
-                * Call mount_quotas at this point only if we won't have to do
-                * a quotacheck.
-                */
-               if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) {
-                       /*
-                        * If an error occurred, qm_mount_quotas code
-                        * has already disabled quotas. So, just finish
-                        * mounting, and get on with the boring life
-                        * without disk quotas.
-                        */
-                       xfs_qm_mount_quotas(mp);
-               } else {
-                       /*
-                        * Clear the quota flags, but remember them. This
-                        * is so that the quota code doesn't get invoked
-                        * before we're ready. This can happen when an
-                        * inode goes inactive and wants to free blocks,
-                        * or via xfs_log_mount_finish.
-                        */
-                       *needquotamount = B_TRUE;
-                       *quotaflags = mp->m_qflags;
-                       mp->m_qflags = 0;
-               }
-       }
-
-       return 0;
-}
-
-void __init
-xfs_qm_init(void)
-{
-       printk(KERN_INFO "SGI XFS Quota Management subsystem\n");
-       mutex_init(&xfs_Gqm_lock);
-       xfs_qm_init_procfs();
-}
-
-void __exit
-xfs_qm_exit(void)
-{
-       xfs_qm_cleanup_procfs();
-       if (qm_dqzone)
-               kmem_zone_destroy(qm_dqzone);
-       if (qm_dqtrxzone)
-               kmem_zone_destroy(qm_dqtrxzone);
-}
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c

deleted file mode 100644 (file)

index 8671a0b..0000000
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_qm.h"
-
-struct xqmstats xqmstats;
-
-static int xqm_proc_show(struct seq_file *m, void *v)
-{
-       /* maximum; incore; ratio free to inuse; freelist */
-       seq_printf(m, "%d\t%d\t%d\t%u\n",
-                       ndquot,
-                       xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
-                       xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0,
-                       xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
-       return 0;
-}
-
-static int xqm_proc_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, xqm_proc_show, NULL);
-}
-
-static const struct file_operations xqm_proc_fops = {
-       .owner          = THIS_MODULE,
-       .open           = xqm_proc_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = single_release,
-};
-
-static int xqmstat_proc_show(struct seq_file *m, void *v)
-{
-       /* quota performance statistics */
-       seq_printf(m, "qm %u %u %u %u %u %u %u %u\n",
-                       xqmstats.xs_qm_dqreclaims,
-                       xqmstats.xs_qm_dqreclaim_misses,
-                       xqmstats.xs_qm_dquot_dups,
-                       xqmstats.xs_qm_dqcachemisses,
-                       xqmstats.xs_qm_dqcachehits,
-                       xqmstats.xs_qm_dqwants,
-                       xqmstats.xs_qm_dqshake_reclaims,
-                       xqmstats.xs_qm_dqinact_reclaims);
-       return 0;
-}
-
-static int xqmstat_proc_open(struct inode *inode, struct file *file)
-{
-       return single_open(file, xqmstat_proc_show, NULL);
-}
-
-static const struct file_operations xqmstat_proc_fops = {
-       .owner          = THIS_MODULE,
-       .open           = xqmstat_proc_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = single_release,
-};
-
-void
-xfs_qm_init_procfs(void)
-{
-       proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops);
-       proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops);
-}
-
-void
-xfs_qm_cleanup_procfs(void)
-{
-       remove_proc_entry("fs/xfs/xqm", NULL);
-       remove_proc_entry("fs/xfs/xqmstat", NULL);
-}
diff --git a/fs/xfs/quota/xfs_qm_stats.h b/fs/xfs/quota/xfs_qm_stats.h

deleted file mode 100644 (file)

index 5b964fc..0000000
--- a/fs/xfs/quota/xfs_qm_stats.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2002 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_QM_STATS_H__
-#define __XFS_QM_STATS_H__
-
-#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
-
-/*
- * XQM global statistics
- */
-struct xqmstats {
-       __uint32_t              xs_qm_dqreclaims;
-       __uint32_t              xs_qm_dqreclaim_misses;
-       __uint32_t              xs_qm_dquot_dups;
-       __uint32_t              xs_qm_dqcachemisses;
-       __uint32_t              xs_qm_dqcachehits;
-       __uint32_t              xs_qm_dqwants;
-       __uint32_t              xs_qm_dqshake_reclaims;
-       __uint32_t              xs_qm_dqinact_reclaims;
-};
-
-extern struct xqmstats xqmstats;
-
-# define XQM_STATS_INC(count)  ( (count)++ )
-
-extern void xfs_qm_init_procfs(void);
-extern void xfs_qm_cleanup_procfs(void);
-
-#else
-
-# define XQM_STATS_INC(count)  do { } while (0)
-
-static inline void xfs_qm_init_procfs(void) { };
-static inline void xfs_qm_cleanup_procfs(void) { };
-
-#endif
-
-#endif /* __XFS_QM_STATS_H__ */
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c

deleted file mode 100644 (file)

index 609246f..0000000
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ /dev/null
@@ -1,906 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-#include <linux/capability.h>
-
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_qm.h"
-#include "xfs_trace.h"
-
-STATIC int     xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
-STATIC int     xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
-                                       uint);
-STATIC uint    xfs_qm_export_flags(uint);
-STATIC uint    xfs_qm_export_qtype_flags(uint);
-STATIC void    xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *,
-                                       fs_disk_quota_t *);
-
-
-/*
- * Turn off quota accounting and/or enforcement for all udquots and/or
- * gdquots. Called only at unmount time.
- *
- * This assumes that there are no dquots of this file system cached
- * incore, and modifies the ondisk dquot directly. Therefore, for example,
- * it is an error to call this twice, without purging the cache.
- */
-int
-xfs_qm_scall_quotaoff(
-       xfs_mount_t             *mp,
-       uint                    flags)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       uint                    dqtype;
-       int                     error;
-       uint                    inactivate_flags;
-       xfs_qoff_logitem_t      *qoffstart;
-       int                     nculprits;
-
-       /*
-        * No file system can have quotas enabled on disk but not in core.
-        * Note that quota utilities (like quotaoff) _expect_
-        * errno == EEXIST here.
-        */
-       if ((mp->m_qflags & flags) == 0)
-               return XFS_ERROR(EEXIST);
-       error = 0;
-
-       flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
-
-       /*
-        * We don't want to deal with two quotaoffs messing up each other,
-        * so we're going to serialize it. quotaoff isn't exactly a performance
-        * critical thing.
-        * If quotaoff, then we must be dealing with the root filesystem.
-        */
-       ASSERT(q);
-       mutex_lock(&q->qi_quotaofflock);
-
-       /*
-        * If we're just turning off quota enforcement, change mp and go.
-        */
-       if ((flags & XFS_ALL_QUOTA_ACCT) == 0) {
-               mp->m_qflags &= ~(flags);
-
-               spin_lock(&mp->m_sb_lock);
-               mp->m_sb.sb_qflags = mp->m_qflags;
-               spin_unlock(&mp->m_sb_lock);
-               mutex_unlock(&q->qi_quotaofflock);
-
-               /* XXX what to do if error ? Revert back to old vals incore ? */
-               error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS);
-               return (error);
-       }
-
-       dqtype = 0;
-       inactivate_flags = 0;
-       /*
-        * If accounting is off, we must turn enforcement off, clear the
-        * quota 'CHKD' certificate to make it known that we have to
-        * do a quotacheck the next time this quota is turned on.
-        */
-       if (flags & XFS_UQUOTA_ACCT) {
-               dqtype |= XFS_QMOPT_UQUOTA;
-               flags |= (XFS_UQUOTA_CHKD | XFS_UQUOTA_ENFD);
-               inactivate_flags |= XFS_UQUOTA_ACTIVE;
-       }
-       if (flags & XFS_GQUOTA_ACCT) {
-               dqtype |= XFS_QMOPT_GQUOTA;
-               flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
-               inactivate_flags |= XFS_GQUOTA_ACTIVE;
-       } else if (flags & XFS_PQUOTA_ACCT) {
-               dqtype |= XFS_QMOPT_PQUOTA;
-               flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
-               inactivate_flags |= XFS_PQUOTA_ACTIVE;
-       }
-
-       /*
-        * Nothing to do?  Don't complain. This happens when we're just
-        * turning off quota enforcement.
-        */
-       if ((mp->m_qflags & flags) == 0)
-               goto out_unlock;
-
-       /*
-        * Write the LI_QUOTAOFF log record, and do SB changes atomically,
-        * and synchronously. If we fail to write, we should abort the
-        * operation as it cannot be recovered safely if we crash.
-        */
-       error = xfs_qm_log_quotaoff(mp, &qoffstart, flags);
-       if (error)
-               goto out_unlock;
-
-       /*
-        * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct
-        * to take care of the race between dqget and quotaoff. We don't take
-        * any special locks to reset these bits. All processes need to check
-        * these bits *after* taking inode lock(s) to see if the particular
-        * quota type is in the process of being turned off. If *ACTIVE, it is
-        * guaranteed that all dquot structures and all quotainode ptrs will all
-        * stay valid as long as that inode is kept locked.
-        *
-        * There is no turning back after this.
-        */
-       mp->m_qflags &= ~inactivate_flags;
-
-       /*
-        * Give back all the dquot reference(s) held by inodes.
-        * Here we go thru every single incore inode in this file system, and
-        * do a dqrele on the i_udquot/i_gdquot that it may have.
-        * Essentially, as long as somebody has an inode locked, this guarantees
-        * that quotas will not be turned off. This is handy because in a
-        * transaction once we lock the inode(s) and check for quotaon, we can
-        * depend on the quota inodes (and other things) being valid as long as
-        * we keep the lock(s).
-        */
-       xfs_qm_dqrele_all_inodes(mp, flags);
-
-       /*
-        * Next we make the changes in the quota flag in the mount struct.
-        * This isn't protected by a particular lock directly, because we
-        * don't want to take a mrlock every time we depend on quotas being on.
-        */
-       mp->m_qflags &= ~(flags);
-
-       /*
-        * Go through all the dquots of this file system and purge them,
-        * according to what was turned off. We may not be able to get rid
-        * of all dquots, because dquots can have temporary references that
-        * are not attached to inodes. eg. xfs_setattr, xfs_create.
-        * So, if we couldn't purge all the dquots from the filesystem,
-        * we can't get rid of the incore data structures.
-        */
-       while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype)))
-               delay(10 * nculprits);
-
-       /*
-        * Transactions that had started before ACTIVE state bit was cleared
-        * could have logged many dquots, so they'd have higher LSNs than
-        * the first QUOTAOFF log record does. If we happen to crash when
-        * the tail of the log has gone past the QUOTAOFF record, but
-        * before the last dquot modification, those dquots __will__
-        * recover, and that's not good.
-        *
-        * So, we have QUOTAOFF start and end logitems; the start
-        * logitem won't get overwritten until the end logitem appears...
-        */
-       error = xfs_qm_log_quotaoff_end(mp, qoffstart, flags);
-       if (error) {
-               /* We're screwed now. Shutdown is the only option. */
-               xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-               goto out_unlock;
-       }
-
-       /*
-        * If quotas is completely disabled, close shop.
-        */
-       if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) ||
-           ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) {
-               mutex_unlock(&q->qi_quotaofflock);
-               xfs_qm_destroy_quotainfo(mp);
-               return (0);
-       }
-
-       /*
-        * Release our quotainode references if we don't need them anymore.
-        */
-       if ((dqtype & XFS_QMOPT_UQUOTA) && q->qi_uquotaip) {
-               IRELE(q->qi_uquotaip);
-               q->qi_uquotaip = NULL;
-       }
-       if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && q->qi_gquotaip) {
-               IRELE(q->qi_gquotaip);
-               q->qi_gquotaip = NULL;
-       }
-
-out_unlock:
-       mutex_unlock(&q->qi_quotaofflock);
-       return error;
-}
-
-STATIC int
-xfs_qm_scall_trunc_qfile(
-       struct xfs_mount        *mp,
-       xfs_ino_t               ino)
-{
-       struct xfs_inode        *ip;
-       struct xfs_trans        *tp;
-       int                     error;
-
-       if (ino == NULLFSINO)
-               return 0;
-
-       error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
-       if (error)
-               return error;
-
-       xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
-       error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
-                                 XFS_TRANS_PERM_LOG_RES,
-                                 XFS_ITRUNCATE_LOG_COUNT);
-       if (error) {
-               xfs_trans_cancel(tp, 0);
-               xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-               goto out_put;
-       }
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       xfs_trans_ijoin(tp, ip);
-
-       error = xfs_itruncate_data(&tp, ip, 0);
-       if (error) {
-               xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
-                                    XFS_TRANS_ABORT);
-               goto out_unlock;
-       }
-
-       xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-
-out_unlock:
-       xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-out_put:
-       IRELE(ip);
-       return error;
-}
-
-int
-xfs_qm_scall_trunc_qfiles(
-       xfs_mount_t     *mp,
-       uint            flags)
-{
-       int             error = 0, error2 = 0;
-
-       if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
-               xfs_debug(mp, "%s: flags=%x m_qflags=%x\n",
-                       __func__, flags, mp->m_qflags);
-               return XFS_ERROR(EINVAL);
-       }
-
-       if (flags & XFS_DQ_USER)
-               error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino);
-       if (flags & (XFS_DQ_GROUP|XFS_DQ_PROJ))
-               error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino);
-
-       return error ? error : error2;
-}
-
-/*
- * Switch on (a given) quota enforcement for a filesystem.  This takes
- * effect immediately.
- * (Switching on quota accounting must be done at mount time.)
- */
-int
-xfs_qm_scall_quotaon(
-       xfs_mount_t     *mp,
-       uint            flags)
-{
-       int             error;
-       uint            qf;
-       __int64_t       sbflags;
-
-       flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
-       /*
-        * Switching on quota accounting must be done at mount time.
-        */
-       flags &= ~(XFS_ALL_QUOTA_ACCT);
-
-       sbflags = 0;
-
-       if (flags == 0) {
-               xfs_debug(mp, "%s: zero flags, m_qflags=%x\n",
-                       __func__, mp->m_qflags);
-               return XFS_ERROR(EINVAL);
-       }
-
-       /* No fs can turn on quotas with a delayed effect */
-       ASSERT((flags & XFS_ALL_QUOTA_ACCT) == 0);
-
-       /*
-        * Can't enforce without accounting. We check the superblock
-        * qflags here instead of m_qflags because rootfs can have
-        * quota acct on ondisk without m_qflags' knowing.
-        */
-       if (((flags & XFS_UQUOTA_ACCT) == 0 &&
-           (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 &&
-           (flags & XFS_UQUOTA_ENFD))
-           ||
-           ((flags & XFS_PQUOTA_ACCT) == 0 &&
-           (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 &&
-           (flags & XFS_GQUOTA_ACCT) == 0 &&
-           (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 &&
-           (flags & XFS_OQUOTA_ENFD))) {
-               xfs_debug(mp,
-                       "%s: Can't enforce without acct, flags=%x sbflags=%x\n",
-                       __func__, flags, mp->m_sb.sb_qflags);
-               return XFS_ERROR(EINVAL);
-       }
-       /*
-        * If everything's up to-date incore, then don't waste time.
-        */
-       if ((mp->m_qflags & flags) == flags)
-               return XFS_ERROR(EEXIST);
-
-       /*
-        * Change sb_qflags on disk but not incore mp->qflags
-        * if this is the root filesystem.
-        */
-       spin_lock(&mp->m_sb_lock);
-       qf = mp->m_sb.sb_qflags;
-       mp->m_sb.sb_qflags = qf | flags;
-       spin_unlock(&mp->m_sb_lock);
-
-       /*
-        * There's nothing to change if it's the same.
-        */
-       if ((qf & flags) == flags && sbflags == 0)
-               return XFS_ERROR(EEXIST);
-       sbflags |= XFS_SB_QFLAGS;
-
-       if ((error = xfs_qm_write_sb_changes(mp, sbflags)))
-               return (error);
-       /*
-        * If we aren't trying to switch on quota enforcement, we are done.
-        */
-       if  (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) !=
-            (mp->m_qflags & XFS_UQUOTA_ACCT)) ||
-            ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) !=
-            (mp->m_qflags & XFS_PQUOTA_ACCT)) ||
-            ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) !=
-            (mp->m_qflags & XFS_GQUOTA_ACCT)) ||
-           (flags & XFS_ALL_QUOTA_ENFD) == 0)
-               return (0);
-
-       if (! XFS_IS_QUOTA_RUNNING(mp))
-               return XFS_ERROR(ESRCH);
-
-       /*
-        * Switch on quota enforcement in core.
-        */
-       mutex_lock(&mp->m_quotainfo->qi_quotaofflock);
-       mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD);
-       mutex_unlock(&mp->m_quotainfo->qi_quotaofflock);
-
-       return (0);
-}
-
-
-/*
- * Return quota status information, such as uquota-off, enforcements, etc.
- */
-int
-xfs_qm_scall_getqstat(
-       struct xfs_mount        *mp,
-       struct fs_quota_stat    *out)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       struct xfs_inode        *uip, *gip;
-       boolean_t               tempuqip, tempgqip;
-
-       uip = gip = NULL;
-       tempuqip = tempgqip = B_FALSE;
-       memset(out, 0, sizeof(fs_quota_stat_t));
-
-       out->qs_version = FS_QSTAT_VERSION;
-       if (!xfs_sb_version_hasquota(&mp->m_sb)) {
-               out->qs_uquota.qfs_ino = NULLFSINO;
-               out->qs_gquota.qfs_ino = NULLFSINO;
-               return (0);
-       }
-       out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
-                                                       (XFS_ALL_QUOTA_ACCT|
-                                                        XFS_ALL_QUOTA_ENFD));
-       out->qs_pad = 0;
-       out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
-       out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
-
-       if (q) {
-               uip = q->qi_uquotaip;
-               gip = q->qi_gquotaip;
-       }
-       if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
-               if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
-                                       0, 0, &uip) == 0)
-                       tempuqip = B_TRUE;
-       }
-       if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
-               if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
-                                       0, 0, &gip) == 0)
-                       tempgqip = B_TRUE;
-       }
-       if (uip) {
-               out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
-               out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
-               if (tempuqip)
-                       IRELE(uip);
-       }
-       if (gip) {
-               out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
-               out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
-               if (tempgqip)
-                       IRELE(gip);
-       }
-       if (q) {
-               out->qs_incoredqs = q->qi_dquots;
-               out->qs_btimelimit = q->qi_btimelimit;
-               out->qs_itimelimit = q->qi_itimelimit;
-               out->qs_rtbtimelimit = q->qi_rtbtimelimit;
-               out->qs_bwarnlimit = q->qi_bwarnlimit;
-               out->qs_iwarnlimit = q->qi_iwarnlimit;
-       }
-       return 0;
-}
-
-#define XFS_DQ_MASK \
-       (FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK)
-
-/*
- * Adjust quota limits, and start/stop timers accordingly.
- */
-int
-xfs_qm_scall_setqlim(
-       xfs_mount_t             *mp,
-       xfs_dqid_t              id,
-       uint                    type,
-       fs_disk_quota_t         *newlim)
-{
-       struct xfs_quotainfo    *q = mp->m_quotainfo;
-       xfs_disk_dquot_t        *ddq;
-       xfs_dquot_t             *dqp;
-       xfs_trans_t             *tp;
-       int                     error;
-       xfs_qcnt_t              hard, soft;
-
-       if (newlim->d_fieldmask & ~XFS_DQ_MASK)
-               return EINVAL;
-       if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0)
-               return 0;
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
-       if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128,
-                                     0, 0, XFS_DEFAULT_LOG_COUNT))) {
-               xfs_trans_cancel(tp, 0);
-               return (error);
-       }
-
-       /*
-        * We don't want to race with a quotaoff so take the quotaoff lock.
-        * (We don't hold an inode lock, so there's nothing else to stop
-        * a quotaoff from happening). (XXXThis doesn't currently happen
-        * because we take the vfslock before calling xfs_qm_sysent).
-        */
-       mutex_lock(&q->qi_quotaofflock);
-
-       /*
-        * Get the dquot (locked), and join it to the transaction.
-        * Allocate the dquot if this doesn't exist.
-        */
-       if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) {
-               xfs_trans_cancel(tp, XFS_TRANS_ABORT);
-               ASSERT(error != ENOENT);
-               goto out_unlock;
-       }
-       xfs_trans_dqjoin(tp, dqp);
-       ddq = &dqp->q_core;
-
-       /*
-        * Make sure that hardlimits are >= soft limits before changing.
-        */
-       hard = (newlim->d_fieldmask & FS_DQ_BHARD) ?
-               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) :
-                       be64_to_cpu(ddq->d_blk_hardlimit);
-       soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ?
-               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) :
-                       be64_to_cpu(ddq->d_blk_softlimit);
-       if (hard == 0 || hard >= soft) {
-               ddq->d_blk_hardlimit = cpu_to_be64(hard);
-               ddq->d_blk_softlimit = cpu_to_be64(soft);
-               if (id == 0) {
-                       q->qi_bhardlimit = hard;
-                       q->qi_bsoftlimit = soft;
-               }
-       } else {
-               xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft);
-       }
-       hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ?
-               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) :
-                       be64_to_cpu(ddq->d_rtb_hardlimit);
-       soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ?
-               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) :
-                       be64_to_cpu(ddq->d_rtb_softlimit);
-       if (hard == 0 || hard >= soft) {
-               ddq->d_rtb_hardlimit = cpu_to_be64(hard);
-               ddq->d_rtb_softlimit = cpu_to_be64(soft);
-               if (id == 0) {
-                       q->qi_rtbhardlimit = hard;
-                       q->qi_rtbsoftlimit = soft;
-               }
-       } else {
-               xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft);
-       }
-
-       hard = (newlim->d_fieldmask & FS_DQ_IHARD) ?
-               (xfs_qcnt_t) newlim->d_ino_hardlimit :
-                       be64_to_cpu(ddq->d_ino_hardlimit);
-       soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ?
-               (xfs_qcnt_t) newlim->d_ino_softlimit :
-                       be64_to_cpu(ddq->d_ino_softlimit);
-       if (hard == 0 || hard >= soft) {
-               ddq->d_ino_hardlimit = cpu_to_be64(hard);
-               ddq->d_ino_softlimit = cpu_to_be64(soft);
-               if (id == 0) {
-                       q->qi_ihardlimit = hard;
-                       q->qi_isoftlimit = soft;
-               }
-       } else {
-               xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft);
-       }
-
-       /*
-        * Update warnings counter(s) if requested
-        */
-       if (newlim->d_fieldmask & FS_DQ_BWARNS)
-               ddq->d_bwarns = cpu_to_be16(newlim->d_bwarns);
-       if (newlim->d_fieldmask & FS_DQ_IWARNS)
-               ddq->d_iwarns = cpu_to_be16(newlim->d_iwarns);
-       if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
-               ddq->d_rtbwarns = cpu_to_be16(newlim->d_rtbwarns);
-
-       if (id == 0) {
-               /*
-                * Timelimits for the super user set the relative time
-                * the other users can be over quota for this file system.
-                * If it is zero a default is used.  Ditto for the default
-                * soft and hard limit values (already done, above), and
-                * for warnings.
-                */
-               if (newlim->d_fieldmask & FS_DQ_BTIMER) {
-                       q->qi_btimelimit = newlim->d_btimer;
-                       ddq->d_btimer = cpu_to_be32(newlim->d_btimer);
-               }
-               if (newlim->d_fieldmask & FS_DQ_ITIMER) {
-                       q->qi_itimelimit = newlim->d_itimer;
-                       ddq->d_itimer = cpu_to_be32(newlim->d_itimer);
-               }
-               if (newlim->d_fieldmask & FS_DQ_RTBTIMER) {
-                       q->qi_rtbtimelimit = newlim->d_rtbtimer;
-                       ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer);
-               }
-               if (newlim->d_fieldmask & FS_DQ_BWARNS)
-                       q->qi_bwarnlimit = newlim->d_bwarns;
-               if (newlim->d_fieldmask & FS_DQ_IWARNS)
-                       q->qi_iwarnlimit = newlim->d_iwarns;
-               if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
-                       q->qi_rtbwarnlimit = newlim->d_rtbwarns;
-       } else {
-               /*
-                * If the user is now over quota, start the timelimit.
-                * The user will not be 'warned'.
-                * Note that we keep the timers ticking, whether enforcement
-                * is on or off. We don't really want to bother with iterating
-                * over all ondisk dquots and turning the timers on/off.
-                */
-               xfs_qm_adjust_dqtimers(mp, ddq);
-       }
-       dqp->dq_flags |= XFS_DQ_DIRTY;
-       xfs_trans_log_dquot(tp, dqp);
-
-       error = xfs_trans_commit(tp, 0);
-       xfs_qm_dqrele(dqp);
-
- out_unlock:
-       mutex_unlock(&q->qi_quotaofflock);
-       return error;
-}
-
-int
-xfs_qm_scall_getquota(
-       xfs_mount_t     *mp,
-       xfs_dqid_t      id,
-       uint            type,
-       fs_disk_quota_t *out)
-{
-       xfs_dquot_t     *dqp;
-       int             error;
-
-       /*
-        * Try to get the dquot. We don't want it allocated on disk, so
-        * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
-        * exist, we'll get ENOENT back.
-        */
-       if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) {
-               return (error);
-       }
-
-       /*
-        * If everything's NULL, this dquot doesn't quite exist as far as
-        * our utility programs are concerned.
-        */
-       if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
-               xfs_qm_dqput(dqp);
-               return XFS_ERROR(ENOENT);
-       }
-       /*
-        * Convert the disk dquot to the exportable format
-        */
-       xfs_qm_export_dquot(mp, &dqp->q_core, out);
-       xfs_qm_dqput(dqp);
-       return (error ? XFS_ERROR(EFAULT) : 0);
-}
-
-
-STATIC int
-xfs_qm_log_quotaoff_end(
-       xfs_mount_t             *mp,
-       xfs_qoff_logitem_t      *startqoff,
-       uint                    flags)
-{
-       xfs_trans_t             *tp;
-       int                     error;
-       xfs_qoff_logitem_t      *qoffi;
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
-
-       if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2,
-                                     0, 0, XFS_DEFAULT_LOG_COUNT))) {
-               xfs_trans_cancel(tp, 0);
-               return (error);
-       }
-
-       qoffi = xfs_trans_get_qoff_item(tp, startqoff,
-                                       flags & XFS_ALL_QUOTA_ACCT);
-       xfs_trans_log_quotaoff_item(tp, qoffi);
-
-       /*
-        * We have to make sure that the transaction is secure on disk before we
-        * return and actually stop quota accounting. So, make it synchronous.
-        * We don't care about quotoff's performance.
-        */
-       xfs_trans_set_sync(tp);
-       error = xfs_trans_commit(tp, 0);
-       return (error);
-}
-
-
-STATIC int
-xfs_qm_log_quotaoff(
-       xfs_mount_t            *mp,
-       xfs_qoff_logitem_t     **qoffstartp,
-       uint                   flags)
-{
-       xfs_trans_t            *tp;
-       int                     error;
-       xfs_qoff_logitem_t     *qoffi=NULL;
-       uint                    oldsbqflag=0;
-
-       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
-       if ((error = xfs_trans_reserve(tp, 0,
-                                     sizeof(xfs_qoff_logitem_t) * 2 +
-                                     mp->m_sb.sb_sectsize + 128,
-                                     0,
-                                     0,
-                                     XFS_DEFAULT_LOG_COUNT))) {
-               goto error0;
-       }
-
-       qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
-       xfs_trans_log_quotaoff_item(tp, qoffi);
-
-       spin_lock(&mp->m_sb_lock);
-       oldsbqflag = mp->m_sb.sb_qflags;
-       mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL;
-       spin_unlock(&mp->m_sb_lock);
-
-       xfs_mod_sb(tp, XFS_SB_QFLAGS);
-
-       /*
-        * We have to make sure that the transaction is secure on disk before we
-        * return and actually stop quota accounting. So, make it synchronous.
-        * We don't care about quotoff's performance.
-        */
-       xfs_trans_set_sync(tp);
-       error = xfs_trans_commit(tp, 0);
-
-error0:
-       if (error) {
-               xfs_trans_cancel(tp, 0);
-               /*
-                * No one else is modifying sb_qflags, so this is OK.
-                * We still hold the quotaofflock.
-                */
-               spin_lock(&mp->m_sb_lock);
-               mp->m_sb.sb_qflags = oldsbqflag;
-               spin_unlock(&mp->m_sb_lock);
-       }
-       *qoffstartp = qoffi;
-       return (error);
-}
-
-
-/*
- * Translate an internal style on-disk-dquot to the exportable format.
- * The main differences are that the counters/limits are all in Basic
- * Blocks (BBs) instead of the internal FSBs, and all on-disk data has
- * to be converted to the native endianness.
- */
-STATIC void
-xfs_qm_export_dquot(
-       xfs_mount_t             *mp,
-       xfs_disk_dquot_t        *src,
-       struct fs_disk_quota    *dst)
-{
-       memset(dst, 0, sizeof(*dst));
-       dst->d_version = FS_DQUOT_VERSION;  /* different from src->d_version */
-       dst->d_flags = xfs_qm_export_qtype_flags(src->d_flags);
-       dst->d_id = be32_to_cpu(src->d_id);
-       dst->d_blk_hardlimit =
-               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_hardlimit));
-       dst->d_blk_softlimit =
-               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_softlimit));
-       dst->d_ino_hardlimit = be64_to_cpu(src->d_ino_hardlimit);
-       dst->d_ino_softlimit = be64_to_cpu(src->d_ino_softlimit);
-       dst->d_bcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_bcount));
-       dst->d_icount = be64_to_cpu(src->d_icount);
-       dst->d_btimer = be32_to_cpu(src->d_btimer);
-       dst->d_itimer = be32_to_cpu(src->d_itimer);
-       dst->d_iwarns = be16_to_cpu(src->d_iwarns);
-       dst->d_bwarns = be16_to_cpu(src->d_bwarns);
-       dst->d_rtb_hardlimit =
-               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_hardlimit));
-       dst->d_rtb_softlimit =
-               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_softlimit));
-       dst->d_rtbcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtbcount));
-       dst->d_rtbtimer = be32_to_cpu(src->d_rtbtimer);
-       dst->d_rtbwarns = be16_to_cpu(src->d_rtbwarns);
-
-       /*
-        * Internally, we don't reset all the timers when quota enforcement
-        * gets turned off. No need to confuse the user level code,
-        * so return zeroes in that case.
-        */
-       if ((!XFS_IS_UQUOTA_ENFORCED(mp) && src->d_flags == XFS_DQ_USER) ||
-           (!XFS_IS_OQUOTA_ENFORCED(mp) &&
-                       (src->d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) {
-               dst->d_btimer = 0;
-               dst->d_itimer = 0;
-               dst->d_rtbtimer = 0;
-       }
-
-#ifdef DEBUG
-       if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) ||
-            (XFS_IS_OQUOTA_ENFORCED(mp) &&
-                       (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) &&
-           dst->d_id != 0) {
-               if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) &&
-                   (dst->d_blk_softlimit > 0)) {
-                       ASSERT(dst->d_btimer != 0);
-               }
-               if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) &&
-                   (dst->d_ino_softlimit > 0)) {
-                       ASSERT(dst->d_itimer != 0);
-               }
-       }
-#endif
-}
-
-STATIC uint
-xfs_qm_export_qtype_flags(
-       uint flags)
-{
-       /*
-        * Can't be more than one, or none.
-        */
-       ASSERT((flags & (FS_PROJ_QUOTA | FS_USER_QUOTA)) !=
-               (FS_PROJ_QUOTA | FS_USER_QUOTA));
-       ASSERT((flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)) !=
-               (FS_PROJ_QUOTA | FS_GROUP_QUOTA));
-       ASSERT((flags & (FS_USER_QUOTA | FS_GROUP_QUOTA)) !=
-               (FS_USER_QUOTA | FS_GROUP_QUOTA));
-       ASSERT((flags & (FS_PROJ_QUOTA|FS_USER_QUOTA|FS_GROUP_QUOTA)) != 0);
-
-       return (flags & XFS_DQ_USER) ?
-               FS_USER_QUOTA : (flags & XFS_DQ_PROJ) ?
-                       FS_PROJ_QUOTA : FS_GROUP_QUOTA;
-}
-
-STATIC uint
-xfs_qm_export_flags(
-       uint flags)
-{
-       uint uflags;
-
-       uflags = 0;
-       if (flags & XFS_UQUOTA_ACCT)
-               uflags |= FS_QUOTA_UDQ_ACCT;
-       if (flags & XFS_PQUOTA_ACCT)
-               uflags |= FS_QUOTA_PDQ_ACCT;
-       if (flags & XFS_GQUOTA_ACCT)
-               uflags |= FS_QUOTA_GDQ_ACCT;
-       if (flags & XFS_UQUOTA_ENFD)
-               uflags |= FS_QUOTA_UDQ_ENFD;
-       if (flags & (XFS_OQUOTA_ENFD)) {
-               uflags |= (flags & XFS_GQUOTA_ACCT) ?
-                       FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD;
-       }
-       return (uflags);
-}
-
-
-STATIC int
-xfs_dqrele_inode(
-       struct xfs_inode        *ip,
-       struct xfs_perag        *pag,
-       int                     flags)
-{
-       /* skip quota inodes */
-       if (ip == ip->i_mount->m_quotainfo->qi_uquotaip ||
-           ip == ip->i_mount->m_quotainfo->qi_gquotaip) {
-               ASSERT(ip->i_udquot == NULL);
-               ASSERT(ip->i_gdquot == NULL);
-               return 0;
-       }
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
-               xfs_qm_dqrele(ip->i_udquot);
-               ip->i_udquot = NULL;
-       }
-       if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) {
-               xfs_qm_dqrele(ip->i_gdquot);
-               ip->i_gdquot = NULL;
-       }
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       return 0;
-}
-
-
-/*
- * Go thru all the inodes in the file system, releasing their dquots.
- *
- * Note that the mount structure gets modified to indicate that quotas are off
- * AFTER this, in the case of quotaoff.
- */
-void
-xfs_qm_dqrele_all_inodes(
-       struct xfs_mount *mp,
-       uint             flags)
-{
-       ASSERT(mp->m_quotainfo);
-       xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
-}
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h

deleted file mode 100644 (file)

index 94a3d92..0000000
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_QUOTA_PRIV_H__
-#define __XFS_QUOTA_PRIV_H__
-
-/*
- * Number of bmaps that we ask from bmapi when doing a quotacheck.
- * We make this restriction to keep the memory usage to a minimum.
- */
-#define XFS_DQITER_MAP_SIZE    10
-
-/*
- * Hash into a bucket in the dquot hash table, based on <mp, id>.
- */
-#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
-                                (__psunsigned_t)(id)) & \
-                               (xfs_Gqm->qm_dqhashmask - 1))
-#define XFS_DQ_HASH(mp, id, type)   (type == XFS_DQ_USER ? \
-                                    (xfs_Gqm->qm_usr_dqhtable + \
-                                     XFS_DQ_HASHVAL(mp, id)) : \
-                                    (xfs_Gqm->qm_grp_dqhtable + \
-                                     XFS_DQ_HASHVAL(mp, id)))
-#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
-       !dqp->q_core.d_blk_hardlimit && \
-       !dqp->q_core.d_blk_softlimit && \
-       !dqp->q_core.d_rtb_hardlimit && \
-       !dqp->q_core.d_rtb_softlimit && \
-       !dqp->q_core.d_ino_hardlimit && \
-       !dqp->q_core.d_ino_softlimit && \
-       !dqp->q_core.d_bcount && \
-       !dqp->q_core.d_rtbcount && \
-       !dqp->q_core.d_icount)
-
-#define DQFLAGTO_TYPESTR(d)    (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
-                                (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
-                                (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
-
-#endif /* __XFS_QUOTA_PRIV_H__ */
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c

deleted file mode 100644 (file)

index 4d00ee6..0000000
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ /dev/null
@@ -1,890 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_priv.h"
-#include "xfs_qm.h"
-
-STATIC void    xfs_trans_alloc_dqinfo(xfs_trans_t *);
-
-/*
- * Add the locked dquot to the transaction.
- * The dquot must be locked, and it cannot be associated with any
- * transaction.
- */
-void
-xfs_trans_dqjoin(
-       xfs_trans_t     *tp,
-       xfs_dquot_t     *dqp)
-{
-       ASSERT(dqp->q_transp != tp);
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-       ASSERT(dqp->q_logitem.qli_dquot == dqp);
-
-       /*
-        * Get a log_item_desc to point at the new item.
-        */
-       xfs_trans_add_item(tp, &dqp->q_logitem.qli_item);
-
-       /*
-        * Initialize d_transp so we can later determine if this dquot is
-        * associated with this transaction.
-        */
-       dqp->q_transp = tp;
-}
-
-
-/*
- * This is called to mark the dquot as needing
- * to be logged when the transaction is committed.  The dquot must
- * already be associated with the given transaction.
- * Note that it marks the entire transaction as dirty. In the ordinary
- * case, this gets called via xfs_trans_commit, after the transaction
- * is already dirty. However, there's nothing stop this from getting
- * called directly, as done by xfs_qm_scall_setqlim. Hence, the TRANS_DIRTY
- * flag.
- */
-void
-xfs_trans_log_dquot(
-       xfs_trans_t     *tp,
-       xfs_dquot_t     *dqp)
-{
-       ASSERT(dqp->q_transp == tp);
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       dqp->q_logitem.qli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
-}
-
-/*
- * Carry forward whatever is left of the quota blk reservation to
- * the spanky new transaction
- */
-void
-xfs_trans_dup_dqinfo(
-       xfs_trans_t     *otp,
-       xfs_trans_t     *ntp)
-{
-       xfs_dqtrx_t     *oq, *nq;
-       int             i,j;
-       xfs_dqtrx_t     *oqa, *nqa;
-
-       if (!otp->t_dqinfo)
-               return;
-
-       xfs_trans_alloc_dqinfo(ntp);
-       oqa = otp->t_dqinfo->dqa_usrdquots;
-       nqa = ntp->t_dqinfo->dqa_usrdquots;
-
-       /*
-        * Because the quota blk reservation is carried forward,
-        * it is also necessary to carry forward the DQ_DIRTY flag.
-        */
-       if(otp->t_flags & XFS_TRANS_DQ_DIRTY)
-               ntp->t_flags |= XFS_TRANS_DQ_DIRTY;
-
-       for (j = 0; j < 2; j++) {
-               for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
-                       if (oqa[i].qt_dquot == NULL)
-                               break;
-                       oq = &oqa[i];
-                       nq = &nqa[i];
-
-                       nq->qt_dquot = oq->qt_dquot;
-                       nq->qt_bcount_delta = nq->qt_icount_delta = 0;
-                       nq->qt_rtbcount_delta = 0;
-
-                       /*
-                        * Transfer whatever is left of the reservations.
-                        */
-                       nq->qt_blk_res = oq->qt_blk_res - oq->qt_blk_res_used;
-                       oq->qt_blk_res = oq->qt_blk_res_used;
-
-                       nq->qt_rtblk_res = oq->qt_rtblk_res -
-                               oq->qt_rtblk_res_used;
-                       oq->qt_rtblk_res = oq->qt_rtblk_res_used;
-
-                       nq->qt_ino_res = oq->qt_ino_res - oq->qt_ino_res_used;
-                       oq->qt_ino_res = oq->qt_ino_res_used;
-
-               }
-               oqa = otp->t_dqinfo->dqa_grpdquots;
-               nqa = ntp->t_dqinfo->dqa_grpdquots;
-       }
-}
-
-/*
- * Wrap around mod_dquot to account for both user and group quotas.
- */
-void
-xfs_trans_mod_dquot_byino(
-       xfs_trans_t     *tp,
-       xfs_inode_t     *ip,
-       uint            field,
-       long            delta)
-{
-       xfs_mount_t     *mp = tp->t_mountp;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) ||
-           !XFS_IS_QUOTA_ON(mp) ||
-           ip->i_ino == mp->m_sb.sb_uquotino ||
-           ip->i_ino == mp->m_sb.sb_gquotino)
-               return;
-
-       if (tp->t_dqinfo == NULL)
-               xfs_trans_alloc_dqinfo(tp);
-
-       if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot)
-               (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta);
-       if (XFS_IS_OQUOTA_ON(mp) && ip->i_gdquot)
-               (void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta);
-}
-
-STATIC xfs_dqtrx_t *
-xfs_trans_get_dqtrx(
-       xfs_trans_t     *tp,
-       xfs_dquot_t     *dqp)
-{
-       int             i;
-       xfs_dqtrx_t     *qa;
-
-       qa = XFS_QM_ISUDQ(dqp) ?
-               tp->t_dqinfo->dqa_usrdquots : tp->t_dqinfo->dqa_grpdquots;
-
-       for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
-               if (qa[i].qt_dquot == NULL ||
-                   qa[i].qt_dquot == dqp)
-                       return &qa[i];
-       }
-
-       return NULL;
-}
-
-/*
- * Make the changes in the transaction structure.
- * The moral equivalent to xfs_trans_mod_sb().
- * We don't touch any fields in the dquot, so we don't care
- * if it's locked or not (most of the time it won't be).
- */
-void
-xfs_trans_mod_dquot(
-       xfs_trans_t     *tp,
-       xfs_dquot_t     *dqp,
-       uint            field,
-       long            delta)
-{
-       xfs_dqtrx_t     *qtrx;
-
-       ASSERT(tp);
-       ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
-       qtrx = NULL;
-
-       if (tp->t_dqinfo == NULL)
-               xfs_trans_alloc_dqinfo(tp);
-       /*
-        * Find either the first free slot or the slot that belongs
-        * to this dquot.
-        */
-       qtrx = xfs_trans_get_dqtrx(tp, dqp);
-       ASSERT(qtrx);
-       if (qtrx->qt_dquot == NULL)
-               qtrx->qt_dquot = dqp;
-
-       switch (field) {
-
-               /*
-                * regular disk blk reservation
-                */
-             case XFS_TRANS_DQ_RES_BLKS:
-               qtrx->qt_blk_res += (ulong)delta;
-               break;
-
-               /*
-                * inode reservation
-                */
-             case XFS_TRANS_DQ_RES_INOS:
-               qtrx->qt_ino_res += (ulong)delta;
-               break;
-
-               /*
-                * disk blocks used.
-                */
-             case XFS_TRANS_DQ_BCOUNT:
-               if (qtrx->qt_blk_res && delta > 0) {
-                       qtrx->qt_blk_res_used += (ulong)delta;
-                       ASSERT(qtrx->qt_blk_res >= qtrx->qt_blk_res_used);
-               }
-               qtrx->qt_bcount_delta += delta;
-               break;
-
-             case XFS_TRANS_DQ_DELBCOUNT:
-               qtrx->qt_delbcnt_delta += delta;
-               break;
-
-               /*
-                * Inode Count
-                */
-             case XFS_TRANS_DQ_ICOUNT:
-               if (qtrx->qt_ino_res && delta > 0) {
-                       qtrx->qt_ino_res_used += (ulong)delta;
-                       ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used);
-               }
-               qtrx->qt_icount_delta += delta;
-               break;
-
-               /*
-                * rtblk reservation
-                */
-             case XFS_TRANS_DQ_RES_RTBLKS:
-               qtrx->qt_rtblk_res += (ulong)delta;
-               break;
-
-               /*
-                * rtblk count
-                */
-             case XFS_TRANS_DQ_RTBCOUNT:
-               if (qtrx->qt_rtblk_res && delta > 0) {
-                       qtrx->qt_rtblk_res_used += (ulong)delta;
-                       ASSERT(qtrx->qt_rtblk_res >= qtrx->qt_rtblk_res_used);
-               }
-               qtrx->qt_rtbcount_delta += delta;
-               break;
-
-             case XFS_TRANS_DQ_DELRTBCOUNT:
-               qtrx->qt_delrtb_delta += delta;
-               break;
-
-             default:
-               ASSERT(0);
-       }
-       tp->t_flags |= XFS_TRANS_DQ_DIRTY;
-}
-
-
-/*
- * Given an array of dqtrx structures, lock all the dquots associated
- * and join them to the transaction, provided they have been modified.
- * We know that the highest number of dquots (of one type - usr OR grp),
- * involved in a transaction is 2 and that both usr and grp combined - 3.
- * So, we don't attempt to make this very generic.
- */
-STATIC void
-xfs_trans_dqlockedjoin(
-       xfs_trans_t     *tp,
-       xfs_dqtrx_t     *q)
-{
-       ASSERT(q[0].qt_dquot != NULL);
-       if (q[1].qt_dquot == NULL) {
-               xfs_dqlock(q[0].qt_dquot);
-               xfs_trans_dqjoin(tp, q[0].qt_dquot);
-       } else {
-               ASSERT(XFS_QM_TRANS_MAXDQS == 2);
-               xfs_dqlock2(q[0].qt_dquot, q[1].qt_dquot);
-               xfs_trans_dqjoin(tp, q[0].qt_dquot);
-               xfs_trans_dqjoin(tp, q[1].qt_dquot);
-       }
-}
-
-
-/*
- * Called by xfs_trans_commit() and similar in spirit to
- * xfs_trans_apply_sb_deltas().
- * Go thru all the dquots belonging to this transaction and modify the
- * INCORE dquot to reflect the actual usages.
- * Unreserve just the reservations done by this transaction.
- * dquot is still left locked at exit.
- */
-void
-xfs_trans_apply_dquot_deltas(
-       xfs_trans_t             *tp)
-{
-       int                     i, j;
-       xfs_dquot_t             *dqp;
-       xfs_dqtrx_t             *qtrx, *qa;
-       xfs_disk_dquot_t        *d;
-       long                    totalbdelta;
-       long                    totalrtbdelta;
-
-       if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY))
-               return;
-
-       ASSERT(tp->t_dqinfo);
-       qa = tp->t_dqinfo->dqa_usrdquots;
-       for (j = 0; j < 2; j++) {
-               if (qa[0].qt_dquot == NULL) {
-                       qa = tp->t_dqinfo->dqa_grpdquots;
-                       continue;
-               }
-
-               /*
-                * Lock all of the dquots and join them to the transaction.
-                */
-               xfs_trans_dqlockedjoin(tp, qa);
-
-               for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
-                       qtrx = &qa[i];
-                       /*
-                        * The array of dquots is filled
-                        * sequentially, not sparsely.
-                        */
-                       if ((dqp = qtrx->qt_dquot) == NULL)
-                               break;
-
-                       ASSERT(XFS_DQ_IS_LOCKED(dqp));
-                       ASSERT(dqp->q_transp == tp);
-
-                       /*
-                        * adjust the actual number of blocks used
-                        */
-                       d = &dqp->q_core;
-
-                       /*
-                        * The issue here is - sometimes we don't make a blkquota
-                        * reservation intentionally to be fair to users
-                        * (when the amount is small). On the other hand,
-                        * delayed allocs do make reservations, but that's
-                        * outside of a transaction, so we have no
-                        * idea how much was really reserved.
-                        * So, here we've accumulated delayed allocation blks and
-                        * non-delay blks. The assumption is that the
-                        * delayed ones are always reserved (outside of a
-                        * transaction), and the others may or may not have
-                        * quota reservations.
-                        */
-                       totalbdelta = qtrx->qt_bcount_delta +
-                               qtrx->qt_delbcnt_delta;
-                       totalrtbdelta = qtrx->qt_rtbcount_delta +
-                               qtrx->qt_delrtb_delta;
-#ifdef DEBUG
-                       if (totalbdelta < 0)
-                               ASSERT(be64_to_cpu(d->d_bcount) >=
-                                      -totalbdelta);
-
-                       if (totalrtbdelta < 0)
-                               ASSERT(be64_to_cpu(d->d_rtbcount) >=
-                                      -totalrtbdelta);
-
-                       if (qtrx->qt_icount_delta < 0)
-                               ASSERT(be64_to_cpu(d->d_icount) >=
-                                      -qtrx->qt_icount_delta);
-#endif
-                       if (totalbdelta)
-                               be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta);
-
-                       if (qtrx->qt_icount_delta)
-                               be64_add_cpu(&d->d_icount, (xfs_qcnt_t)qtrx->qt_icount_delta);
-
-                       if (totalrtbdelta)
-                               be64_add_cpu(&d->d_rtbcount, (xfs_qcnt_t)totalrtbdelta);
-
-                       /*
-                        * Get any default limits in use.
-                        * Start/reset the timer(s) if needed.
-                        */
-                       if (d->d_id) {
-                               xfs_qm_adjust_dqlimits(tp->t_mountp, d);
-                               xfs_qm_adjust_dqtimers(tp->t_mountp, d);
-                       }
-
-                       dqp->dq_flags |= XFS_DQ_DIRTY;
-                       /*
-                        * add this to the list of items to get logged
-                        */
-                       xfs_trans_log_dquot(tp, dqp);
-                       /*
-                        * Take off what's left of the original reservation.
-                        * In case of delayed allocations, there's no
-                        * reservation that a transaction structure knows of.
-                        */
-                       if (qtrx->qt_blk_res != 0) {
-                               if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) {
-                                       if (qtrx->qt_blk_res >
-                                           qtrx->qt_blk_res_used)
-                                               dqp->q_res_bcount -= (xfs_qcnt_t)
-                                                       (qtrx->qt_blk_res -
-                                                        qtrx->qt_blk_res_used);
-                                       else
-                                               dqp->q_res_bcount -= (xfs_qcnt_t)
-                                                       (qtrx->qt_blk_res_used -
-                                                        qtrx->qt_blk_res);
-                               }
-                       } else {
-                               /*
-                                * These blks were never reserved, either inside
-                                * a transaction or outside one (in a delayed
-                                * allocation). Also, this isn't always a
-                                * negative number since we sometimes
-                                * deliberately skip quota reservations.
-                                */
-                               if (qtrx->qt_bcount_delta) {
-                                       dqp->q_res_bcount +=
-                                             (xfs_qcnt_t)qtrx->qt_bcount_delta;
-                               }
-                       }
-                       /*
-                        * Adjust the RT reservation.
-                        */
-                       if (qtrx->qt_rtblk_res != 0) {
-                               if (qtrx->qt_rtblk_res != qtrx->qt_rtblk_res_used) {
-                                       if (qtrx->qt_rtblk_res >
-                                           qtrx->qt_rtblk_res_used)
-                                              dqp->q_res_rtbcount -= (xfs_qcnt_t)
-                                                      (qtrx->qt_rtblk_res -
-                                                       qtrx->qt_rtblk_res_used);
-                                       else
-                                              dqp->q_res_rtbcount -= (xfs_qcnt_t)
-                                                      (qtrx->qt_rtblk_res_used -
-                                                       qtrx->qt_rtblk_res);
-                               }
-                       } else {
-                               if (qtrx->qt_rtbcount_delta)
-                                       dqp->q_res_rtbcount +=
-                                           (xfs_qcnt_t)qtrx->qt_rtbcount_delta;
-                       }
-
-                       /*
-                        * Adjust the inode reservation.
-                        */
-                       if (qtrx->qt_ino_res != 0) {
-                               ASSERT(qtrx->qt_ino_res >=
-                                      qtrx->qt_ino_res_used);
-                               if (qtrx->qt_ino_res > qtrx->qt_ino_res_used)
-                                       dqp->q_res_icount -= (xfs_qcnt_t)
-                                               (qtrx->qt_ino_res -
-                                                qtrx->qt_ino_res_used);
-                       } else {
-                               if (qtrx->qt_icount_delta)
-                                       dqp->q_res_icount +=
-                                           (xfs_qcnt_t)qtrx->qt_icount_delta;
-                       }
-
-                       ASSERT(dqp->q_res_bcount >=
-                               be64_to_cpu(dqp->q_core.d_bcount));
-                       ASSERT(dqp->q_res_icount >=
-                               be64_to_cpu(dqp->q_core.d_icount));
-                       ASSERT(dqp->q_res_rtbcount >=
-                               be64_to_cpu(dqp->q_core.d_rtbcount));
-               }
-               /*
-                * Do the group quotas next
-                */
-               qa = tp->t_dqinfo->dqa_grpdquots;
-       }
-}
-
-/*
- * Release the reservations, and adjust the dquots accordingly.
- * This is called only when the transaction is being aborted. If by
- * any chance we have done dquot modifications incore (ie. deltas) already,
- * we simply throw those away, since that's the expected behavior
- * when a transaction is curtailed without a commit.
- */
-void
-xfs_trans_unreserve_and_mod_dquots(
-       xfs_trans_t             *tp)
-{
-       int                     i, j;
-       xfs_dquot_t             *dqp;
-       xfs_dqtrx_t             *qtrx, *qa;
-       boolean_t               locked;
-
-       if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))
-               return;
-
-       qa = tp->t_dqinfo->dqa_usrdquots;
-
-       for (j = 0; j < 2; j++) {
-               for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
-                       qtrx = &qa[i];
-                       /*
-                        * We assume that the array of dquots is filled
-                        * sequentially, not sparsely.
-                        */
-                       if ((dqp = qtrx->qt_dquot) == NULL)
-                               break;
-                       /*
-                        * Unreserve the original reservation. We don't care
-                        * about the number of blocks used field, or deltas.
-                        * Also we don't bother to zero the fields.
-                        */
-                       locked = B_FALSE;
-                       if (qtrx->qt_blk_res) {
-                               xfs_dqlock(dqp);
-                               locked = B_TRUE;
-                               dqp->q_res_bcount -=
-                                       (xfs_qcnt_t)qtrx->qt_blk_res;
-                       }
-                       if (qtrx->qt_ino_res) {
-                               if (!locked) {
-                                       xfs_dqlock(dqp);
-                                       locked = B_TRUE;
-                               }
-                               dqp->q_res_icount -=
-                                       (xfs_qcnt_t)qtrx->qt_ino_res;
-                       }
-
-                       if (qtrx->qt_rtblk_res) {
-                               if (!locked) {
-                                       xfs_dqlock(dqp);
-                                       locked = B_TRUE;
-                               }
-                               dqp->q_res_rtbcount -=
-                                       (xfs_qcnt_t)qtrx->qt_rtblk_res;
-                       }
-                       if (locked)
-                               xfs_dqunlock(dqp);
-
-               }
-               qa = tp->t_dqinfo->dqa_grpdquots;
-       }
-}
-
-STATIC void
-xfs_quota_warn(
-       struct xfs_mount        *mp,
-       struct xfs_dquot        *dqp,
-       int                     type)
-{
-       /* no warnings for project quotas - we just return ENOSPC later */
-       if (dqp->dq_flags & XFS_DQ_PROJ)
-               return;
-       quota_send_warning((dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA,
-                          be32_to_cpu(dqp->q_core.d_id), mp->m_super->s_dev,
-                          type);
-}
-
-/*
- * This reserves disk blocks and inodes against a dquot.
- * Flags indicate if the dquot is to be locked here and also
- * if the blk reservation is for RT or regular blocks.
- * Sending in XFS_QMOPT_FORCE_RES flag skips the quota check.
- */
-STATIC int
-xfs_trans_dqresv(
-       xfs_trans_t     *tp,
-       xfs_mount_t     *mp,
-       xfs_dquot_t     *dqp,
-       long            nblks,
-       long            ninos,
-       uint            flags)
-{
-       xfs_qcnt_t      hardlimit;
-       xfs_qcnt_t      softlimit;
-       time_t          timer;
-       xfs_qwarncnt_t  warns;
-       xfs_qwarncnt_t  warnlimit;
-       xfs_qcnt_t      count;
-       xfs_qcnt_t      *resbcountp;
-       xfs_quotainfo_t *q = mp->m_quotainfo;
-
-
-       xfs_dqlock(dqp);
-
-       if (flags & XFS_TRANS_DQ_RES_BLKS) {
-               hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
-               if (!hardlimit)
-                       hardlimit = q->qi_bhardlimit;
-               softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit);
-               if (!softlimit)
-                       softlimit = q->qi_bsoftlimit;
-               timer = be32_to_cpu(dqp->q_core.d_btimer);
-               warns = be16_to_cpu(dqp->q_core.d_bwarns);
-               warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit;
-               resbcountp = &dqp->q_res_bcount;
-       } else {
-               ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
-               hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit);
-               if (!hardlimit)
-                       hardlimit = q->qi_rtbhardlimit;
-               softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit);
-               if (!softlimit)
-                       softlimit = q->qi_rtbsoftlimit;
-               timer = be32_to_cpu(dqp->q_core.d_rtbtimer);
-               warns = be16_to_cpu(dqp->q_core.d_rtbwarns);
-               warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit;
-               resbcountp = &dqp->q_res_rtbcount;
-       }
-
-       if ((flags & XFS_QMOPT_FORCE_RES) == 0 &&
-           dqp->q_core.d_id &&
-           ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) ||
-            (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) &&
-             (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) {
-               if (nblks > 0) {
-                       /*
-                        * dquot is locked already. See if we'd go over the
-                        * hardlimit or exceed the timelimit if we allocate
-                        * nblks.
-                        */
-                       if (hardlimit > 0ULL &&
-                           hardlimit <= nblks + *resbcountp) {
-                               xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
-                               goto error_return;
-                       }
-                       if (softlimit > 0ULL &&
-                           softlimit <= nblks + *resbcountp) {
-                               if ((timer != 0 && get_seconds() > timer) ||
-                                   (warns != 0 && warns >= warnlimit)) {
-                                       xfs_quota_warn(mp, dqp,
-                                                      QUOTA_NL_BSOFTLONGWARN);
-                                       goto error_return;
-                               }
-
-                               xfs_quota_warn(mp, dqp, QUOTA_NL_BSOFTWARN);
-                       }
-               }
-               if (ninos > 0) {
-                       count = be64_to_cpu(dqp->q_core.d_icount);
-                       timer = be32_to_cpu(dqp->q_core.d_itimer);
-                       warns = be16_to_cpu(dqp->q_core.d_iwarns);
-                       warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
-                       hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
-                       if (!hardlimit)
-                               hardlimit = q->qi_ihardlimit;
-                       softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
-                       if (!softlimit)
-                               softlimit = q->qi_isoftlimit;
-
-                       if (hardlimit > 0ULL && count >= hardlimit) {
-                               xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
-                               goto error_return;
-                       }
-                       if (softlimit > 0ULL && count >= softlimit) {
-                               if  ((timer != 0 && get_seconds() > timer) ||
-                                    (warns != 0 && warns >= warnlimit)) {
-                                       xfs_quota_warn(mp, dqp,
-                                                      QUOTA_NL_ISOFTLONGWARN);
-                                       goto error_return;
-                               }
-                               xfs_quota_warn(mp, dqp, QUOTA_NL_ISOFTWARN);
-                       }
-               }
-       }
-
-       /*
-        * Change the reservation, but not the actual usage.
-        * Note that q_res_bcount = q_core.d_bcount + resv
-        */
-       (*resbcountp) += (xfs_qcnt_t)nblks;
-       if (ninos != 0)
-               dqp->q_res_icount += (xfs_qcnt_t)ninos;
-
-       /*
-        * note the reservation amt in the trans struct too,
-        * so that the transaction knows how much was reserved by
-        * it against this particular dquot.
-        * We don't do this when we are reserving for a delayed allocation,
-        * because we don't have the luxury of a transaction envelope then.
-        */
-       if (tp) {
-               ASSERT(tp->t_dqinfo);
-               ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
-               if (nblks != 0)
-                       xfs_trans_mod_dquot(tp, dqp,
-                                           flags & XFS_QMOPT_RESBLK_MASK,
-                                           nblks);
-               if (ninos != 0)
-                       xfs_trans_mod_dquot(tp, dqp,
-                                           XFS_TRANS_DQ_RES_INOS,
-                                           ninos);
-       }
-       ASSERT(dqp->q_res_bcount >= be64_to_cpu(dqp->q_core.d_bcount));
-       ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount));
-       ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount));
-
-       xfs_dqunlock(dqp);
-       return 0;
-
-error_return:
-       xfs_dqunlock(dqp);
-       if (flags & XFS_QMOPT_ENOSPC)
-               return ENOSPC;
-       return EDQUOT;
-}
-
-
-/*
- * Given dquot(s), make disk block and/or inode reservations against them.
- * The fact that this does the reservation against both the usr and
- * grp/prj quotas is important, because this follows a both-or-nothing
- * approach.
- *
- * flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown.
- *        XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT.  Used by pquota.
- *        XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks
- *        XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks
- * dquots are unlocked on return, if they were not locked by caller.
- */
-int
-xfs_trans_reserve_quota_bydquots(
-       xfs_trans_t     *tp,
-       xfs_mount_t     *mp,
-       xfs_dquot_t     *udqp,
-       xfs_dquot_t     *gdqp,
-       long            nblks,
-       long            ninos,
-       uint            flags)
-{
-       int             resvd = 0, error;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-               return 0;
-
-       if (tp && tp->t_dqinfo == NULL)
-               xfs_trans_alloc_dqinfo(tp);
-
-       ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
-
-       if (udqp) {
-               error = xfs_trans_dqresv(tp, mp, udqp, nblks, ninos,
-                                       (flags & ~XFS_QMOPT_ENOSPC));
-               if (error)
-                       return error;
-               resvd = 1;
-       }
-
-       if (gdqp) {
-               error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags);
-               if (error) {
-                       /*
-                        * can't do it, so backout previous reservation
-                        */
-                       if (resvd) {
-                               flags |= XFS_QMOPT_FORCE_RES;
-                               xfs_trans_dqresv(tp, mp, udqp,
-                                                -nblks, -ninos, flags);
-                       }
-                       return error;
-               }
-       }
-
-       /*
-        * Didn't change anything critical, so, no need to log
-        */
-       return 0;
-}
-
-
-/*
- * Lock the dquot and change the reservation if we can.
- * This doesn't change the actual usage, just the reservation.
- * The inode sent in is locked.
- */
-int
-xfs_trans_reserve_quota_nblks(
-       struct xfs_trans        *tp,
-       struct xfs_inode        *ip,
-       long                    nblks,
-       long                    ninos,
-       uint                    flags)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-
-       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-               return 0;
-       if (XFS_IS_PQUOTA_ON(mp))
-               flags |= XFS_QMOPT_ENOSPC;
-
-       ASSERT(ip->i_ino != mp->m_sb.sb_uquotino);
-       ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
-                               XFS_TRANS_DQ_RES_RTBLKS ||
-              (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
-                               XFS_TRANS_DQ_RES_BLKS);
-
-       /*
-        * Reserve nblks against these dquots, with trans as the mediator.
-        */
-       return xfs_trans_reserve_quota_bydquots(tp, mp,
-                                               ip->i_udquot, ip->i_gdquot,
-                                               nblks, ninos, flags);
-}
-
-/*
- * This routine is called to allocate a quotaoff log item.
- */
-xfs_qoff_logitem_t *
-xfs_trans_get_qoff_item(
-       xfs_trans_t             *tp,
-       xfs_qoff_logitem_t      *startqoff,
-       uint                    flags)
-{
-       xfs_qoff_logitem_t      *q;
-
-       ASSERT(tp != NULL);
-
-       q = xfs_qm_qoff_logitem_init(tp->t_mountp, startqoff, flags);
-       ASSERT(q != NULL);
-
-       /*
-        * Get a log_item_desc to point at the new item.
-        */
-       xfs_trans_add_item(tp, &q->qql_item);
-       return q;
-}
-
-
-/*
- * This is called to mark the quotaoff logitem as needing
- * to be logged when the transaction is committed.  The logitem must
- * already be associated with the given transaction.
- */
-void
-xfs_trans_log_quotaoff_item(
-       xfs_trans_t             *tp,
-       xfs_qoff_logitem_t      *qlp)
-{
-       tp->t_flags |= XFS_TRANS_DIRTY;
-       qlp->qql_item.li_desc->lid_flags |= XFS_LID_DIRTY;
-}
-
-STATIC void
-xfs_trans_alloc_dqinfo(
-       xfs_trans_t     *tp)
-{
-       tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
-}
-
-void
-xfs_trans_free_dqinfo(
-       xfs_trans_t     *tp)
-{
-       if (!tp->t_dqinfo)
-               return;
-       kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo);
-       tp->t_dqinfo = NULL;
-}
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c

deleted file mode 100644 (file)

index b83f76b..0000000
--- a/fs/xfs/support/uuid.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include <xfs.h>
-
-/* IRIX interpretation of an uuid_t */
-typedef struct {
-       __be32  uu_timelow;
-       __be16  uu_timemid;
-       __be16  uu_timehi;
-       __be16  uu_clockseq;
-       __be16  uu_node[3];
-} xfs_uu_t;
-
-/*
- * uuid_getnodeuniq - obtain the node unique fields of a UUID.
- *
- * This is not in any way a standard or condoned UUID function;
- * it just something that's needed for user-level file handles.
- */
-void
-uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
-{
-       xfs_uu_t *uup = (xfs_uu_t *)uuid;
-
-       fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
-                  be16_to_cpu(uup->uu_timemid);
-       fsid[1] = be32_to_cpu(uup->uu_timelow);
-}
-
-int
-uuid_is_nil(uuid_t *uuid)
-{
-       int     i;
-       char    *cp = (char *)uuid;
-
-       if (uuid == NULL)
-               return 0;
-       /* implied check of version number here... */
-       for (i = 0; i < sizeof *uuid; i++)
-               if (*cp++) return 0;    /* not nil */
-       return 1;       /* is nil */
-}
-
-int
-uuid_equal(uuid_t *uuid1, uuid_t *uuid2)
-{
-       return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1;
-}
diff --git a/fs/xfs/support/uuid.h b/fs/xfs/support/uuid.h

deleted file mode 100644 (file)

index 4732d71..0000000
--- a/fs/xfs/support/uuid.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_SUPPORT_UUID_H__
-#define __XFS_SUPPORT_UUID_H__
-
-typedef struct {
-       unsigned char   __u_bits[16];
-} uuid_t;
-
-extern int uuid_is_nil(uuid_t *uuid);
-extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
-extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]);
-
-#endif /* __XFS_SUPPORT_UUID_H__ */
diff --git a/fs/xfs/time.h b/fs/xfs/time.h

new file mode 100644 (file)

index 0000000..387e695
--- /dev/null
+++ b/fs/xfs/time.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SUPPORT_TIME_H__
+#define __XFS_SUPPORT_TIME_H__
+
+#include <linux/sched.h>
+#include <linux/time.h>
+
+typedef struct timespec timespec_t;
+
+static inline void delay(long ticks)
+{
+       schedule_timeout_uninterruptible(ticks);
+}
+
+static inline void nanotime(struct timespec *tvp)
+{
+       *tvp = CURRENT_TIME;
+}
+
+#endif /* __XFS_SUPPORT_TIME_H__ */
diff --git a/fs/xfs/uuid.c b/fs/xfs/uuid.c

new file mode 100644 (file)

index 0000000..b83f76b
--- /dev/null
+++ b/fs/xfs/uuid.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include <xfs.h>
+
+/* IRIX interpretation of an uuid_t */
+typedef struct {
+       __be32  uu_timelow;
+       __be16  uu_timemid;
+       __be16  uu_timehi;
+       __be16  uu_clockseq;
+       __be16  uu_node[3];
+} xfs_uu_t;
+
+/*
+ * uuid_getnodeuniq - obtain the node unique fields of a UUID.
+ *
+ * This is not in any way a standard or condoned UUID function;
+ * it just something that's needed for user-level file handles.
+ */
+void
+uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
+{
+       xfs_uu_t *uup = (xfs_uu_t *)uuid;
+
+       fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
+                  be16_to_cpu(uup->uu_timemid);
+       fsid[1] = be32_to_cpu(uup->uu_timelow);
+}
+
+int
+uuid_is_nil(uuid_t *uuid)
+{
+       int     i;
+       char    *cp = (char *)uuid;
+
+       if (uuid == NULL)
+               return 0;
+       /* implied check of version number here... */
+       for (i = 0; i < sizeof *uuid; i++)
+               if (*cp++) return 0;    /* not nil */
+       return 1;       /* is nil */
+}
+
+int
+uuid_equal(uuid_t *uuid1, uuid_t *uuid2)
+{
+       return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1;
+}
diff --git a/fs/xfs/uuid.h b/fs/xfs/uuid.h

new file mode 100644 (file)

index 0000000..4732d71
--- /dev/null
+++ b/fs/xfs/uuid.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SUPPORT_UUID_H__
+#define __XFS_SUPPORT_UUID_H__
+
+typedef struct {
+       unsigned char   __u_bits[16];
+} uuid_t;
+
+extern int uuid_is_nil(uuid_t *uuid);
+extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
+extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]);
+
+#endif /* __XFS_SUPPORT_UUID_H__ */
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h

index 53ec3ea9a625f40e36a16d7950e624e198836ee7..d8b11b7f94aae5bf7f591037d1fc07eaf79e7c25 100644 (file)
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -24,5 +24,6 @@
  #define XFS_BUF_LOCK_TRACKING 1
  #endif
  
-#include <linux-2.6/xfs_linux.h>
+#include "xfs_linux.h"
+
  #endif /* __XFS_H__ */
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c

new file mode 100644 (file)

index 0000000..b6c4b37
--- /dev/null
+++ b/fs/xfs/xfs_acl.c
@@ -0,0 +1,420 @@
+/*
+ * Copyright (c) 2008, Christoph Hellwig
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_acl.h"
+#include "xfs_attr.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_vnodeops.h"
+#include "xfs_trace.h"
+#include <linux/slab.h>
+#include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
+
+
+/*
+ * Locking scheme:
+ *  - all ACL updates are protected by inode->i_mutex, which is taken before
+ *    calling into this file.
+ */
+
+STATIC struct posix_acl *
+xfs_acl_from_disk(struct xfs_acl *aclp)
+{
+       struct posix_acl_entry *acl_e;
+       struct posix_acl *acl;
+       struct xfs_acl_entry *ace;
+       int count, i;
+
+       count = be32_to_cpu(aclp->acl_cnt);
+
+       acl = posix_acl_alloc(count, GFP_KERNEL);
+       if (!acl)
+               return ERR_PTR(-ENOMEM);
+
+       for (i = 0; i < count; i++) {
+               acl_e = &acl->a_entries[i];
+               ace = &aclp->acl_entry[i];
+
+               /*
+                * The tag is 32 bits on disk and 16 bits in core.
+                *
+                * Because every access to it goes through the core
+                * format first this is not a problem.
+                */
+               acl_e->e_tag = be32_to_cpu(ace->ae_tag);
+               acl_e->e_perm = be16_to_cpu(ace->ae_perm);
+
+               switch (acl_e->e_tag) {
+               case ACL_USER:
+               case ACL_GROUP:
+                       acl_e->e_id = be32_to_cpu(ace->ae_id);
+                       break;
+               case ACL_USER_OBJ:
+               case ACL_GROUP_OBJ:
+               case ACL_MASK:
+               case ACL_OTHER:
+                       acl_e->e_id = ACL_UNDEFINED_ID;
+                       break;
+               default:
+                       goto fail;
+               }
+       }
+       return acl;
+
+fail:
+       posix_acl_release(acl);
+       return ERR_PTR(-EINVAL);
+}
+
+STATIC void
+xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
+{
+       const struct posix_acl_entry *acl_e;
+       struct xfs_acl_entry *ace;
+       int i;
+
+       aclp->acl_cnt = cpu_to_be32(acl->a_count);
+       for (i = 0; i < acl->a_count; i++) {
+               ace = &aclp->acl_entry[i];
+               acl_e = &acl->a_entries[i];
+
+               ace->ae_tag = cpu_to_be32(acl_e->e_tag);
+               ace->ae_id = cpu_to_be32(acl_e->e_id);
+               ace->ae_perm = cpu_to_be16(acl_e->e_perm);
+       }
+}
+
+struct posix_acl *
+xfs_get_acl(struct inode *inode, int type)
+{
+       struct xfs_inode *ip = XFS_I(inode);
+       struct posix_acl *acl;
+       struct xfs_acl *xfs_acl;
+       int len = sizeof(struct xfs_acl);
+       unsigned char *ea_name;
+       int error;
+
+       acl = get_cached_acl(inode, type);
+       if (acl != ACL_NOT_CACHED)
+               return acl;
+
+       trace_xfs_get_acl(ip);
+
+       switch (type) {
+       case ACL_TYPE_ACCESS:
+               ea_name = SGI_ACL_FILE;
+               break;
+       case ACL_TYPE_DEFAULT:
+               ea_name = SGI_ACL_DEFAULT;
+               break;
+       default:
+               BUG();
+       }
+
+       /*
+        * If we have a cached ACLs value just return it, not need to
+        * go out to the disk.
+        */
+
+       xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+       if (!xfs_acl)
+               return ERR_PTR(-ENOMEM);
+
+       error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl,
+                                                       &len, ATTR_ROOT);
+       if (error) {
+               /*
+                * If the attribute doesn't exist make sure we have a negative
+                * cache entry, for any other error assume it is transient and
+                * leave the cache entry as ACL_NOT_CACHED.
+                */
+               if (error == -ENOATTR) {
+                       acl = NULL;
+                       goto out_update_cache;
+               }
+               goto out;
+       }
+
+       acl = xfs_acl_from_disk(xfs_acl);
+       if (IS_ERR(acl))
+               goto out;
+
+ out_update_cache:
+       set_cached_acl(inode, type, acl);
+ out:
+       kfree(xfs_acl);
+       return acl;
+}
+
+STATIC int
+xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+{
+       struct xfs_inode *ip = XFS_I(inode);
+       unsigned char *ea_name;
+       int error;
+
+       if (S_ISLNK(inode->i_mode))
+               return -EOPNOTSUPP;
+
+       switch (type) {
+       case ACL_TYPE_ACCESS:
+               ea_name = SGI_ACL_FILE;
+               break;
+       case ACL_TYPE_DEFAULT:
+               if (!S_ISDIR(inode->i_mode))
+                       return acl ? -EACCES : 0;
+               ea_name = SGI_ACL_DEFAULT;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       if (acl) {
+               struct xfs_acl *xfs_acl;
+               int len;
+
+               xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+               if (!xfs_acl)
+                       return -ENOMEM;
+
+               xfs_acl_to_disk(xfs_acl, acl);
+               len = sizeof(struct xfs_acl) -
+                       (sizeof(struct xfs_acl_entry) *
+                        (XFS_ACL_MAX_ENTRIES - acl->a_count));
+
+               error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
+                               len, ATTR_ROOT);
+
+               kfree(xfs_acl);
+       } else {
+               /*
+                * A NULL ACL argument means we want to remove the ACL.
+                */
+               error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT);
+
+               /*
+                * If the attribute didn't exist to start with that's fine.
+                */
+               if (error == -ENOATTR)
+                       error = 0;
+       }
+
+       if (!error)
+               set_cached_acl(inode, type, acl);
+       return error;
+}
+
+static int
+xfs_set_mode(struct inode *inode, umode_t mode)
+{
+       int error = 0;
+
+       if (mode != inode->i_mode) {
+               struct iattr iattr;
+
+               iattr.ia_valid = ATTR_MODE | ATTR_CTIME;
+               iattr.ia_mode = mode;
+               iattr.ia_ctime = current_fs_time(inode->i_sb);
+
+               error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
+       }
+
+       return error;
+}
+
+static int
+xfs_acl_exists(struct inode *inode, unsigned char *name)
+{
+       int len = sizeof(struct xfs_acl);
+
+       return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
+                           ATTR_ROOT|ATTR_KERNOVAL) == 0);
+}
+
+int
+posix_acl_access_exists(struct inode *inode)
+{
+       return xfs_acl_exists(inode, SGI_ACL_FILE);
+}
+
+int
+posix_acl_default_exists(struct inode *inode)
+{
+       if (!S_ISDIR(inode->i_mode))
+               return 0;
+       return xfs_acl_exists(inode, SGI_ACL_DEFAULT);
+}
+
+/*
+ * No need for i_mutex because the inode is not yet exposed to the VFS.
+ */
+int
+xfs_inherit_acl(struct inode *inode, struct posix_acl *acl)
+{
+       umode_t mode = inode->i_mode;
+       int error = 0, inherit = 0;
+
+       if (S_ISDIR(inode->i_mode)) {
+               error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
+               if (error)
+                       goto out;
+       }
+
+       error = posix_acl_create(&acl, GFP_KERNEL, &mode);
+       if (error < 0)
+               return error;
+
+       /*
+        * If posix_acl_create returns a positive value we need to
+        * inherit a permission that can't be represented using the Unix
+        * mode bits and we actually need to set an ACL.
+        */
+       if (error > 0)
+               inherit = 1;
+
+       error = xfs_set_mode(inode, mode);
+       if (error)
+               goto out;
+
+       if (inherit)
+               error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl);
+
+out:
+       posix_acl_release(acl);
+       return error;
+}
+
+int
+xfs_acl_chmod(struct inode *inode)
+{
+       struct posix_acl *acl;
+       int error;
+
+       if (S_ISLNK(inode->i_mode))
+               return -EOPNOTSUPP;
+
+       acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
+       if (IS_ERR(acl) || !acl)
+               return PTR_ERR(acl);
+
+       error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
+       if (error)
+               return error;
+
+       error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl);
+       posix_acl_release(acl);
+       return error;
+}
+
+static int
+xfs_xattr_acl_get(struct dentry *dentry, const char *name,
+               void *value, size_t size, int type)
+{
+       struct posix_acl *acl;
+       int error;
+
+       acl = xfs_get_acl(dentry->d_inode, type);
+       if (IS_ERR(acl))
+               return PTR_ERR(acl);
+       if (acl == NULL)
+               return -ENODATA;
+
+       error = posix_acl_to_xattr(acl, value, size);
+       posix_acl_release(acl);
+
+       return error;
+}
+
+static int
+xfs_xattr_acl_set(struct dentry *dentry, const char *name,
+               const void *value, size_t size, int flags, int type)
+{
+       struct inode *inode = dentry->d_inode;
+       struct posix_acl *acl = NULL;
+       int error = 0;
+
+       if (flags & XATTR_CREATE)
+               return -EINVAL;
+       if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
+               return value ? -EACCES : 0;
+       if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
+               return -EPERM;
+
+       if (!value)
+               goto set_acl;
+
+       acl = posix_acl_from_xattr(value, size);
+       if (!acl) {
+               /*
+                * acl_set_file(3) may request that we set default ACLs with
+                * zero length -- defend (gracefully) against that here.
+                */
+               goto out;
+       }
+       if (IS_ERR(acl)) {
+               error = PTR_ERR(acl);
+               goto out;
+       }
+
+       error = posix_acl_valid(acl);
+       if (error)
+               goto out_release;
+
+       error = -EINVAL;
+       if (acl->a_count > XFS_ACL_MAX_ENTRIES)
+               goto out_release;
+
+       if (type == ACL_TYPE_ACCESS) {
+               umode_t mode = inode->i_mode;
+               error = posix_acl_equiv_mode(acl, &mode);
+
+               if (error <= 0) {
+                       posix_acl_release(acl);
+                       acl = NULL;
+
+                       if (error < 0)
+                               return error;
+               }
+
+               error = xfs_set_mode(inode, mode);
+               if (error)
+                       goto out_release;
+       }
+
+ set_acl:
+       error = xfs_set_acl(inode, type, acl);
+ out_release:
+       posix_acl_release(acl);
+ out:
+       return error;
+}
+
+const struct xattr_handler xfs_xattr_acl_access_handler = {
+       .prefix = POSIX_ACL_XATTR_ACCESS,
+       .flags  = ACL_TYPE_ACCESS,
+       .get    = xfs_xattr_acl_get,
+       .set    = xfs_xattr_acl_set,
+};
+
+const struct xattr_handler xfs_xattr_acl_default_handler = {
+       .prefix = POSIX_ACL_XATTR_DEFAULT,
+       .flags  = ACL_TYPE_DEFAULT,
+       .get    = xfs_xattr_acl_get,
+       .set    = xfs_xattr_acl_set,
+};
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h

index 6530769a999bdb045ba4ec5d1588440a04993e1c..4805f009f923caac2e03b9f9a0dac07e64ae53f1 100644 (file)
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -103,7 +103,7 @@ typedef struct xfs_agf {
  /* disk block (xfs_daddr_t) in the AG */
  #define XFS_AGF_DADDR(mp)      ((xfs_daddr_t)(1 << (mp)->m_sectbb_log))
  #define        XFS_AGF_BLOCK(mp)       XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp))
-#define        XFS_BUF_TO_AGF(bp)      ((xfs_agf_t *)XFS_BUF_PTR(bp))
+#define        XFS_BUF_TO_AGF(bp)      ((xfs_agf_t *)((bp)->b_addr))
  
  extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
                         xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
@@ -156,7 +156,7 @@ typedef struct xfs_agi {
  /* disk block (xfs_daddr_t) in the AG */
  #define XFS_AGI_DADDR(mp)      ((xfs_daddr_t)(2 << (mp)->m_sectbb_log))
  #define        XFS_AGI_BLOCK(mp)       XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp))
-#define        XFS_BUF_TO_AGI(bp)      ((xfs_agi_t *)XFS_BUF_PTR(bp))
+#define        XFS_BUF_TO_AGI(bp)      ((xfs_agi_t *)((bp)->b_addr))
  
  extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
                                 xfs_agnumber_t agno, struct xfs_buf **bpp);
@@ -168,7 +168,7 @@ extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
  #define XFS_AGFL_DADDR(mp)     ((xfs_daddr_t)(3 << (mp)->m_sectbb_log))
  #define        XFS_AGFL_BLOCK(mp)      XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp))
  #define XFS_AGFL_SIZE(mp)      ((mp)->m_sb.sb_sectsize / sizeof(xfs_agblock_t))
-#define        XFS_BUF_TO_AGFL(bp)     ((xfs_agfl_t *)XFS_BUF_PTR(bp))
+#define        XFS_BUF_TO_AGFL(bp)     ((xfs_agfl_t *)((bp)->b_addr))
  
  typedef struct xfs_agfl {
         __be32          agfl_bno[1];    /* actually XFS_AGFL_SIZE(mp) */
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c

index 1e00b3ef627496d39a167891c2f8d7f980b595f6..bdd9cb54d63bb04fcc3fdcf5fc04d66db25ce015 100644 (file)
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -451,8 +451,7 @@ xfs_alloc_read_agfl(
                         XFS_FSS_TO_BB(mp, 1), 0, &bp);
         if (error)
                 return error;
-       ASSERT(bp);
-       ASSERT(!XFS_BUF_GETERROR(bp));
+       ASSERT(!xfs_buf_geterror(bp));
         XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF);
         *bpp = bp;
         return 0;
@@ -2116,7 +2115,7 @@ xfs_read_agf(
         if (!*bpp)
                 return 0;
  
-       ASSERT(!XFS_BUF_GETERROR(*bpp));
+       ASSERT(!(*bpp)->b_error);
         agf = XFS_BUF_TO_AGF(*bpp);
  
         /*
@@ -2168,7 +2167,7 @@ xfs_alloc_read_agf(
                 return error;
         if (!*bpp)
                 return 0;
-       ASSERT(!XFS_BUF_GETERROR(*bpp));
+       ASSERT(!(*bpp)->b_error);
  
         agf = XFS_BUF_TO_AGF(*bpp);
         pag = xfs_perag_get(mp, agno);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c

new file mode 100644 (file)

index 0000000..8c37dde
--- /dev/null
+++ b/fs/xfs/xfs_aops.c
@@ -0,0 +1,1500 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_trans.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_alloc.h"
+#include "xfs_error.h"
+#include "xfs_rw.h"
+#include "xfs_iomap.h"
+#include "xfs_vnodeops.h"
+#include "xfs_trace.h"
+#include "xfs_bmap.h"
+#include <linux/gfp.h>
+#include <linux/mpage.h>
+#include <linux/pagevec.h>
+#include <linux/writeback.h>
+
+
+/*
+ * Prime number of hash buckets since address is used as the key.
+ */
+#define NVSYNC         37
+#define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC])
+static wait_queue_head_t xfs_ioend_wq[NVSYNC];
+
+void __init
+xfs_ioend_init(void)
+{
+       int i;
+
+       for (i = 0; i < NVSYNC; i++)
+               init_waitqueue_head(&xfs_ioend_wq[i]);
+}
+
+void
+xfs_ioend_wait(
+       xfs_inode_t     *ip)
+{
+       wait_queue_head_t *wq = to_ioend_wq(ip);
+
+       wait_event(*wq, (atomic_read(&ip->i_iocount) == 0));
+}
+
+STATIC void
+xfs_ioend_wake(
+       xfs_inode_t     *ip)
+{
+       if (atomic_dec_and_test(&ip->i_iocount))
+               wake_up(to_ioend_wq(ip));
+}
+
+void
+xfs_count_page_state(
+       struct page             *page,
+       int                     *delalloc,
+       int                     *unwritten)
+{
+       struct buffer_head      *bh, *head;
+
+       *delalloc = *unwritten = 0;
+
+       bh = head = page_buffers(page);
+       do {
+               if (buffer_unwritten(bh))
+                       (*unwritten) = 1;
+               else if (buffer_delay(bh))
+                       (*delalloc) = 1;
+       } while ((bh = bh->b_this_page) != head);
+}
+
+STATIC struct block_device *
+xfs_find_bdev_for_inode(
+       struct inode            *inode)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+
+       if (XFS_IS_REALTIME_INODE(ip))
+               return mp->m_rtdev_targp->bt_bdev;
+       else
+               return mp->m_ddev_targp->bt_bdev;
+}
+
+/*
+ * We're now finished for good with this ioend structure.
+ * Update the page state via the associated buffer_heads,
+ * release holds on the inode and bio, and finally free
+ * up memory.  Do not use the ioend after this.
+ */
+STATIC void
+xfs_destroy_ioend(
+       xfs_ioend_t             *ioend)
+{
+       struct buffer_head      *bh, *next;
+       struct xfs_inode        *ip = XFS_I(ioend->io_inode);
+
+       for (bh = ioend->io_buffer_head; bh; bh = next) {
+               next = bh->b_private;
+               bh->b_end_io(bh, !ioend->io_error);
+       }
+
+       /*
+        * Volume managers supporting multiple paths can send back ENODEV
+        * when the final path disappears.  In this case continuing to fill
+        * the page cache with dirty data which cannot be written out is
+        * evil, so prevent that.
+        */
+       if (unlikely(ioend->io_error == -ENODEV)) {
+               xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ,
+                                     __FILE__, __LINE__);
+       }
+
+       xfs_ioend_wake(ip);
+       mempool_free(ioend, xfs_ioend_pool);
+}
+
+/*
+ * If the end of the current ioend is beyond the current EOF,
+ * return the new EOF value, otherwise zero.
+ */
+STATIC xfs_fsize_t
+xfs_ioend_new_eof(
+       xfs_ioend_t             *ioend)
+{
+       xfs_inode_t             *ip = XFS_I(ioend->io_inode);
+       xfs_fsize_t             isize;
+       xfs_fsize_t             bsize;
+
+       bsize = ioend->io_offset + ioend->io_size;
+       isize = MAX(ip->i_size, ip->i_new_size);
+       isize = MIN(isize, bsize);
+       return isize > ip->i_d.di_size ? isize : 0;
+}
+
+/*
+ * Update on-disk file size now that data has been written to disk.  The
+ * current in-memory file size is i_size.  If a write is beyond eof i_new_size
+ * will be the intended file size until i_size is updated.  If this write does
+ * not extend all the way to the valid file size then restrict this update to
+ * the end of the write.
+ *
+ * This function does not block as blocking on the inode lock in IO completion
+ * can lead to IO completion order dependency deadlocks.. If it can't get the
+ * inode ilock it will return EAGAIN. Callers must handle this.
+ */
+STATIC int
+xfs_setfilesize(
+       xfs_ioend_t             *ioend)
+{
+       xfs_inode_t             *ip = XFS_I(ioend->io_inode);
+       xfs_fsize_t             isize;
+
+       if (unlikely(ioend->io_error))
+               return 0;
+
+       if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
+               return EAGAIN;
+
+       isize = xfs_ioend_new_eof(ioend);
+       if (isize) {
+               trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
+               ip->i_d.di_size = isize;
+               xfs_mark_inode_dirty(ip);
+       }
+
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       return 0;
+}
+
+/*
+ * Schedule IO completion handling on the final put of an ioend.
+ */
+STATIC void
+xfs_finish_ioend(
+       struct xfs_ioend        *ioend)
+{
+       if (atomic_dec_and_test(&ioend->io_remaining)) {
+               if (ioend->io_type == IO_UNWRITTEN)
+                       queue_work(xfsconvertd_workqueue, &ioend->io_work);
+               else
+                       queue_work(xfsdatad_workqueue, &ioend->io_work);
+       }
+}
+
+/*
+ * IO write completion.
+ */
+STATIC void
+xfs_end_io(
+       struct work_struct *work)
+{
+       xfs_ioend_t     *ioend = container_of(work, xfs_ioend_t, io_work);
+       struct xfs_inode *ip = XFS_I(ioend->io_inode);
+       int             error = 0;
+
+       /*
+        * For unwritten extents we need to issue transactions to convert a
+        * range to normal written extens after the data I/O has finished.
+        */
+       if (ioend->io_type == IO_UNWRITTEN &&
+           likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
+
+               error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
+                                                ioend->io_size);
+               if (error)
+                       ioend->io_error = error;
+       }
+
+       /*
+        * We might have to update the on-disk file size after extending
+        * writes.
+        */
+       error = xfs_setfilesize(ioend);
+       ASSERT(!error || error == EAGAIN);
+
+       /*
+        * If we didn't complete processing of the ioend, requeue it to the
+        * tail of the workqueue for another attempt later. Otherwise destroy
+        * it.
+        */
+       if (error == EAGAIN) {
+               atomic_inc(&ioend->io_remaining);
+               xfs_finish_ioend(ioend);
+               /* ensure we don't spin on blocked ioends */
+               delay(1);
+       } else {
+               if (ioend->io_iocb)
+                       aio_complete(ioend->io_iocb, ioend->io_result, 0);
+               xfs_destroy_ioend(ioend);
+       }
+}
+
+/*
+ * Call IO completion handling in caller context on the final put of an ioend.
+ */
+STATIC void
+xfs_finish_ioend_sync(
+       struct xfs_ioend        *ioend)
+{
+       if (atomic_dec_and_test(&ioend->io_remaining))
+               xfs_end_io(&ioend->io_work);
+}
+
+/*
+ * Allocate and initialise an IO completion structure.
+ * We need to track unwritten extent write completion here initially.
+ * We'll need to extend this for updating the ondisk inode size later
+ * (vs. incore size).
+ */
+STATIC xfs_ioend_t *
+xfs_alloc_ioend(
+       struct inode            *inode,
+       unsigned int            type)
+{
+       xfs_ioend_t             *ioend;
+
+       ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
+
+       /*
+        * Set the count to 1 initially, which will prevent an I/O
+        * completion callback from happening before we have started
+        * all the I/O from calling the completion routine too early.
+        */
+       atomic_set(&ioend->io_remaining, 1);
+       ioend->io_error = 0;
+       ioend->io_list = NULL;
+       ioend->io_type = type;
+       ioend->io_inode = inode;
+       ioend->io_buffer_head = NULL;
+       ioend->io_buffer_tail = NULL;
+       atomic_inc(&XFS_I(ioend->io_inode)->i_iocount);
+       ioend->io_offset = 0;
+       ioend->io_size = 0;
+       ioend->io_iocb = NULL;
+       ioend->io_result = 0;
+
+       INIT_WORK(&ioend->io_work, xfs_end_io);
+       return ioend;
+}
+
+STATIC int
+xfs_map_blocks(
+       struct inode            *inode,
+       loff_t                  offset,
+       struct xfs_bmbt_irec    *imap,
+       int                     type,
+       int                     nonblocking)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       ssize_t                 count = 1 << inode->i_blkbits;
+       xfs_fileoff_t           offset_fsb, end_fsb;
+       int                     error = 0;
+       int                     bmapi_flags = XFS_BMAPI_ENTIRE;
+       int                     nimaps = 1;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -XFS_ERROR(EIO);
+
+       if (type == IO_UNWRITTEN)
+               bmapi_flags |= XFS_BMAPI_IGSTATE;
+
+       if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
+               if (nonblocking)
+                       return -XFS_ERROR(EAGAIN);
+               xfs_ilock(ip, XFS_ILOCK_SHARED);
+       }
+
+       ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
+              (ip->i_df.if_flags & XFS_IFEXTENTS));
+       ASSERT(offset <= mp->m_maxioffset);
+
+       if (offset + count > mp->m_maxioffset)
+               count = mp->m_maxioffset - offset;
+       end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
+       offset_fsb = XFS_B_TO_FSBT(mp, offset);
+       error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
+                         bmapi_flags,  NULL, 0, imap, &nimaps, NULL);
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+       if (error)
+               return -XFS_ERROR(error);
+
+       if (type == IO_DELALLOC &&
+           (!nimaps || isnullstartblock(imap->br_startblock))) {
+               error = xfs_iomap_write_allocate(ip, offset, count, imap);
+               if (!error)
+                       trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
+               return -XFS_ERROR(error);
+       }
+
+#ifdef DEBUG
+       if (type == IO_UNWRITTEN) {
+               ASSERT(nimaps);
+               ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+               ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
+       }
+#endif
+       if (nimaps)
+               trace_xfs_map_blocks_found(ip, offset, count, type, imap);
+       return 0;
+}
+
+STATIC int
+xfs_imap_valid(
+       struct inode            *inode,
+       struct xfs_bmbt_irec    *imap,
+       xfs_off_t               offset)
+{
+       offset >>= inode->i_blkbits;
+
+       return offset >= imap->br_startoff &&
+               offset < imap->br_startoff + imap->br_blockcount;
+}
+
+/*
+ * BIO completion handler for buffered IO.
+ */
+STATIC void
+xfs_end_bio(
+       struct bio              *bio,
+       int                     error)
+{
+       xfs_ioend_t             *ioend = bio->bi_private;
+
+       ASSERT(atomic_read(&bio->bi_cnt) >= 1);
+       ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error;
+
+       /* Toss bio and pass work off to an xfsdatad thread */
+       bio->bi_private = NULL;
+       bio->bi_end_io = NULL;
+       bio_put(bio);
+
+       xfs_finish_ioend(ioend);
+}
+
+STATIC void
+xfs_submit_ioend_bio(
+       struct writeback_control *wbc,
+       xfs_ioend_t             *ioend,
+       struct bio              *bio)
+{
+       atomic_inc(&ioend->io_remaining);
+       bio->bi_private = ioend;
+       bio->bi_end_io = xfs_end_bio;
+
+       /*
+        * If the I/O is beyond EOF we mark the inode dirty immediately
+        * but don't update the inode size until I/O completion.
+        */
+       if (xfs_ioend_new_eof(ioend))
+               xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
+
+       submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
+}
+
+STATIC struct bio *
+xfs_alloc_ioend_bio(
+       struct buffer_head      *bh)
+{
+       int                     nvecs = bio_get_nr_vecs(bh->b_bdev);
+       struct bio              *bio = bio_alloc(GFP_NOIO, nvecs);
+
+       ASSERT(bio->bi_private == NULL);
+       bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+       bio->bi_bdev = bh->b_bdev;
+       return bio;
+}
+
+STATIC void
+xfs_start_buffer_writeback(
+       struct buffer_head      *bh)
+{
+       ASSERT(buffer_mapped(bh));
+       ASSERT(buffer_locked(bh));
+       ASSERT(!buffer_delay(bh));
+       ASSERT(!buffer_unwritten(bh));
+
+       mark_buffer_async_write(bh);
+       set_buffer_uptodate(bh);
+       clear_buffer_dirty(bh);
+}
+
+STATIC void
+xfs_start_page_writeback(
+       struct page             *page,
+       int                     clear_dirty,
+       int                     buffers)
+{
+       ASSERT(PageLocked(page));
+       ASSERT(!PageWriteback(page));
+       if (clear_dirty)
+               clear_page_dirty_for_io(page);
+       set_page_writeback(page);
+       unlock_page(page);
+       /* If no buffers on the page are to be written, finish it here */
+       if (!buffers)
+               end_page_writeback(page);
+}
+
+static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
+{
+       return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
+}
+
+/*
+ * Submit all of the bios for all of the ioends we have saved up, covering the
+ * initial writepage page and also any probed pages.
+ *
+ * Because we may have multiple ioends spanning a page, we need to start
+ * writeback on all the buffers before we submit them for I/O. If we mark the
+ * buffers as we got, then we can end up with a page that only has buffers
+ * marked async write and I/O complete on can occur before we mark the other
+ * buffers async write.
+ *
+ * The end result of this is that we trip a bug in end_page_writeback() because
+ * we call it twice for the one page as the code in end_buffer_async_write()
+ * assumes that all buffers on the page are started at the same time.
+ *
+ * The fix is two passes across the ioend list - one to start writeback on the
+ * buffer_heads, and then submit them for I/O on the second pass.
+ */
+STATIC void
+xfs_submit_ioend(
+       struct writeback_control *wbc,
+       xfs_ioend_t             *ioend)
+{
+       xfs_ioend_t             *head = ioend;
+       xfs_ioend_t             *next;
+       struct buffer_head      *bh;
+       struct bio              *bio;
+       sector_t                lastblock = 0;
+
+       /* Pass 1 - start writeback */
+       do {
+               next = ioend->io_list;
+               for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
+                       xfs_start_buffer_writeback(bh);
+       } while ((ioend = next) != NULL);
+
+       /* Pass 2 - submit I/O */
+       ioend = head;
+       do {
+               next = ioend->io_list;
+               bio = NULL;
+
+               for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
+
+                       if (!bio) {
+ retry:
+                               bio = xfs_alloc_ioend_bio(bh);
+                       } else if (bh->b_blocknr != lastblock + 1) {
+                               xfs_submit_ioend_bio(wbc, ioend, bio);
+                               goto retry;
+                       }
+
+                       if (bio_add_buffer(bio, bh) != bh->b_size) {
+                               xfs_submit_ioend_bio(wbc, ioend, bio);
+                               goto retry;
+                       }
+
+                       lastblock = bh->b_blocknr;
+               }
+               if (bio)
+                       xfs_submit_ioend_bio(wbc, ioend, bio);
+               xfs_finish_ioend(ioend);
+       } while ((ioend = next) != NULL);
+}
+
+/*
+ * Cancel submission of all buffer_heads so far in this endio.
+ * Toss the endio too.  Only ever called for the initial page
+ * in a writepage request, so only ever one page.
+ */
+STATIC void
+xfs_cancel_ioend(
+       xfs_ioend_t             *ioend)
+{
+       xfs_ioend_t             *next;
+       struct buffer_head      *bh, *next_bh;
+
+       do {
+               next = ioend->io_list;
+               bh = ioend->io_buffer_head;
+               do {
+                       next_bh = bh->b_private;
+                       clear_buffer_async_write(bh);
+                       unlock_buffer(bh);
+               } while ((bh = next_bh) != NULL);
+
+               xfs_ioend_wake(XFS_I(ioend->io_inode));
+               mempool_free(ioend, xfs_ioend_pool);
+       } while ((ioend = next) != NULL);
+}
+
+/*
+ * Test to see if we've been building up a completion structure for
+ * earlier buffers -- if so, we try to append to this ioend if we
+ * can, otherwise we finish off any current ioend and start another.
+ * Return true if we've finished the given ioend.
+ */
+STATIC void
+xfs_add_to_ioend(
+       struct inode            *inode,
+       struct buffer_head      *bh,
+       xfs_off_t               offset,
+       unsigned int            type,
+       xfs_ioend_t             **result,
+       int                     need_ioend)
+{
+       xfs_ioend_t             *ioend = *result;
+
+       if (!ioend || need_ioend || type != ioend->io_type) {
+               xfs_ioend_t     *previous = *result;
+
+               ioend = xfs_alloc_ioend(inode, type);
+               ioend->io_offset = offset;
+               ioend->io_buffer_head = bh;
+               ioend->io_buffer_tail = bh;
+               if (previous)
+                       previous->io_list = ioend;
+               *result = ioend;
+       } else {
+               ioend->io_buffer_tail->b_private = bh;
+               ioend->io_buffer_tail = bh;
+       }
+
+       bh->b_private = NULL;
+       ioend->io_size += bh->b_size;
+}
+
+STATIC void
+xfs_map_buffer(
+       struct inode            *inode,
+       struct buffer_head      *bh,
+       struct xfs_bmbt_irec    *imap,
+       xfs_off_t               offset)
+{
+       sector_t                bn;
+       struct xfs_mount        *m = XFS_I(inode)->i_mount;
+       xfs_off_t               iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff);
+       xfs_daddr_t             iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock);
+
+       ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+       ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
+
+       bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
+             ((offset - iomap_offset) >> inode->i_blkbits);
+
+       ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
+
+       bh->b_blocknr = bn;
+       set_buffer_mapped(bh);
+}
+
+STATIC void
+xfs_map_at_offset(
+       struct inode            *inode,
+       struct buffer_head      *bh,
+       struct xfs_bmbt_irec    *imap,
+       xfs_off_t               offset)
+{
+       ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+       ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
+
+       xfs_map_buffer(inode, bh, imap, offset);
+       set_buffer_mapped(bh);
+       clear_buffer_delay(bh);
+       clear_buffer_unwritten(bh);
+}
+
+/*
+ * Test if a given page is suitable for writing as part of an unwritten
+ * or delayed allocate extent.
+ */
+STATIC int
+xfs_is_delayed_page(
+       struct page             *page,
+       unsigned int            type)
+{
+       if (PageWriteback(page))
+               return 0;
+
+       if (page->mapping && page_has_buffers(page)) {
+               struct buffer_head      *bh, *head;
+               int                     acceptable = 0;
+
+               bh = head = page_buffers(page);
+               do {
+                       if (buffer_unwritten(bh))
+                               acceptable = (type == IO_UNWRITTEN);
+                       else if (buffer_delay(bh))
+                               acceptable = (type == IO_DELALLOC);
+                       else if (buffer_dirty(bh) && buffer_mapped(bh))
+                               acceptable = (type == IO_OVERWRITE);
+                       else
+                               break;
+               } while ((bh = bh->b_this_page) != head);
+
+               if (acceptable)
+                       return 1;
+       }
+
+       return 0;
+}
+
+/*
+ * Allocate & map buffers for page given the extent map. Write it out.
+ * except for the original page of a writepage, this is called on
+ * delalloc/unwritten pages only, for the original page it is possible
+ * that the page has no mapping at all.
+ */
+STATIC int
+xfs_convert_page(
+       struct inode            *inode,
+       struct page             *page,
+       loff_t                  tindex,
+       struct xfs_bmbt_irec    *imap,
+       xfs_ioend_t             **ioendp,
+       struct writeback_control *wbc)
+{
+       struct buffer_head      *bh, *head;
+       xfs_off_t               end_offset;
+       unsigned long           p_offset;
+       unsigned int            type;
+       int                     len, page_dirty;
+       int                     count = 0, done = 0, uptodate = 1;
+       xfs_off_t               offset = page_offset(page);
+
+       if (page->index != tindex)
+               goto fail;
+       if (!trylock_page(page))
+               goto fail;
+       if (PageWriteback(page))
+               goto fail_unlock_page;
+       if (page->mapping != inode->i_mapping)
+               goto fail_unlock_page;
+       if (!xfs_is_delayed_page(page, (*ioendp)->io_type))
+               goto fail_unlock_page;
+
+       /*
+        * page_dirty is initially a count of buffers on the page before
+        * EOF and is decremented as we move each into a cleanable state.
+        *
+        * Derivation:
+        *
+        * End offset is the highest offset that this page should represent.
+        * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
+        * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
+        * hence give us the correct page_dirty count. On any other page,
+        * it will be zero and in that case we need page_dirty to be the
+        * count of buffers on the page.
+        */
+       end_offset = min_t(unsigned long long,
+                       (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
+                       i_size_read(inode));
+
+       len = 1 << inode->i_blkbits;
+       p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
+                                       PAGE_CACHE_SIZE);
+       p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
+       page_dirty = p_offset / len;
+
+       bh = head = page_buffers(page);
+       do {
+               if (offset >= end_offset)
+                       break;
+               if (!buffer_uptodate(bh))
+                       uptodate = 0;
+               if (!(PageUptodate(page) || buffer_uptodate(bh))) {
+                       done = 1;
+                       continue;
+               }
+
+               if (buffer_unwritten(bh) || buffer_delay(bh) ||
+                   buffer_mapped(bh)) {
+                       if (buffer_unwritten(bh))
+                               type = IO_UNWRITTEN;
+                       else if (buffer_delay(bh))
+                               type = IO_DELALLOC;
+                       else
+                               type = IO_OVERWRITE;
+
+                       if (!xfs_imap_valid(inode, imap, offset)) {
+                               done = 1;
+                               continue;
+                       }
+
+                       lock_buffer(bh);
+                       if (type != IO_OVERWRITE)
+                               xfs_map_at_offset(inode, bh, imap, offset);
+                       xfs_add_to_ioend(inode, bh, offset, type,
+                                        ioendp, done);
+
+                       page_dirty--;
+                       count++;
+               } else {
+                       done = 1;
+               }
+       } while (offset += len, (bh = bh->b_this_page) != head);
+
+       if (uptodate && bh == head)
+               SetPageUptodate(page);
+
+       if (count) {
+               if (--wbc->nr_to_write <= 0 &&
+                   wbc->sync_mode == WB_SYNC_NONE)
+                       done = 1;
+       }
+       xfs_start_page_writeback(page, !page_dirty, count);
+
+       return done;
+ fail_unlock_page:
+       unlock_page(page);
+ fail:
+       return 1;
+}
+
+/*
+ * Convert & write out a cluster of pages in the same extent as defined
+ * by mp and following the start page.
+ */
+STATIC void
+xfs_cluster_write(
+       struct inode            *inode,
+       pgoff_t                 tindex,
+       struct xfs_bmbt_irec    *imap,
+       xfs_ioend_t             **ioendp,
+       struct writeback_control *wbc,
+       pgoff_t                 tlast)
+{
+       struct pagevec          pvec;
+       int                     done = 0, i;
+
+       pagevec_init(&pvec, 0);
+       while (!done && tindex <= tlast) {
+               unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
+
+               if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
+                       break;
+
+               for (i = 0; i < pagevec_count(&pvec); i++) {
+                       done = xfs_convert_page(inode, pvec.pages[i], tindex++,
+                                       imap, ioendp, wbc);
+                       if (done)
+                               break;
+               }
+
+               pagevec_release(&pvec);
+               cond_resched();
+       }
+}
+
+STATIC void
+xfs_vm_invalidatepage(
+       struct page             *page,
+       unsigned long           offset)
+{
+       trace_xfs_invalidatepage(page->mapping->host, page, offset);
+       block_invalidatepage(page, offset);
+}
+
+/*
+ * If the page has delalloc buffers on it, we need to punch them out before we
+ * invalidate the page. If we don't, we leave a stale delalloc mapping on the
+ * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
+ * is done on that same region - the delalloc extent is returned when none is
+ * supposed to be there.
+ *
+ * We prevent this by truncating away the delalloc regions on the page before
+ * invalidating it. Because they are delalloc, we can do this without needing a
+ * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
+ * truncation without a transaction as there is no space left for block
+ * reservation (typically why we see a ENOSPC in writeback).
+ *
+ * This is not a performance critical path, so for now just do the punching a
+ * buffer head at a time.
+ */
+STATIC void
+xfs_aops_discard_page(
+       struct page             *page)
+{
+       struct inode            *inode = page->mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct buffer_head      *bh, *head;
+       loff_t                  offset = page_offset(page);
+
+       if (!xfs_is_delayed_page(page, IO_DELALLOC))
+               goto out_invalidate;
+
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+               goto out_invalidate;
+
+       xfs_alert(ip->i_mount,
+               "page discard on page %p, inode 0x%llx, offset %llu.",
+                       page, ip->i_ino, offset);
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       bh = head = page_buffers(page);
+       do {
+               int             error;
+               xfs_fileoff_t   start_fsb;
+
+               if (!buffer_delay(bh))
+                       goto next_buffer;
+
+               start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
+               error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
+               if (error) {
+                       /* something screwed, just bail */
+                       if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+                               xfs_alert(ip->i_mount,
+                       "page discard unable to remove delalloc mapping.");
+                       }
+                       break;
+               }
+next_buffer:
+               offset += 1 << inode->i_blkbits;
+
+       } while ((bh = bh->b_this_page) != head);
+
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+out_invalidate:
+       xfs_vm_invalidatepage(page, 0);
+       return;
+}
+
+/*
+ * Write out a dirty page.
+ *
+ * For delalloc space on the page we need to allocate space and flush it.
+ * For unwritten space on the page we need to start the conversion to
+ * regular allocated space.
+ * For any other dirty buffer heads on the page we should flush them.
+ */
+STATIC int
+xfs_vm_writepage(
+       struct page             *page,
+       struct writeback_control *wbc)
+{
+       struct inode            *inode = page->mapping->host;
+       struct buffer_head      *bh, *head;
+       struct xfs_bmbt_irec    imap;
+       xfs_ioend_t             *ioend = NULL, *iohead = NULL;
+       loff_t                  offset;
+       unsigned int            type;
+       __uint64_t              end_offset;
+       pgoff_t                 end_index, last_index;
+       ssize_t                 len;
+       int                     err, imap_valid = 0, uptodate = 1;
+       int                     count = 0;
+       int                     nonblocking = 0;
+
+       trace_xfs_writepage(inode, page, 0);
+
+       ASSERT(page_has_buffers(page));
+
+       /*
+        * Refuse to write the page out if we are called from reclaim context.
+        *
+        * This avoids stack overflows when called from deeply used stacks in
+        * random callers for direct reclaim or memcg reclaim.  We explicitly
+        * allow reclaim from kswapd as the stack usage there is relatively low.
+        *
+        * This should really be done by the core VM, but until that happens
+        * filesystems like XFS, btrfs and ext4 have to take care of this
+        * by themselves.
+        */
+       if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC)
+               goto redirty;
+
+       /*
+        * Given that we do not allow direct reclaim to call us, we should
+        * never be called while in a filesystem transaction.
+        */
+       if (WARN_ON(current->flags & PF_FSTRANS))
+               goto redirty;
+
+       /* Is this page beyond the end of the file? */
+       offset = i_size_read(inode);
+       end_index = offset >> PAGE_CACHE_SHIFT;
+       last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
+       if (page->index >= end_index) {
+               if ((page->index >= end_index + 1) ||
+                   !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
+                       unlock_page(page);
+                       return 0;
+               }
+       }
+
+       end_offset = min_t(unsigned long long,
+                       (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
+                       offset);
+       len = 1 << inode->i_blkbits;
+
+       bh = head = page_buffers(page);
+       offset = page_offset(page);
+       type = IO_OVERWRITE;
+
+       if (wbc->sync_mode == WB_SYNC_NONE)
+               nonblocking = 1;
+
+       do {
+               int new_ioend = 0;
+
+               if (offset >= end_offset)
+                       break;
+               if (!buffer_uptodate(bh))
+                       uptodate = 0;
+
+               /*
+                * set_page_dirty dirties all buffers in a page, independent
+                * of their state.  The dirty state however is entirely
+                * meaningless for holes (!mapped && uptodate), so skip
+                * buffers covering holes here.
+                */
+               if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
+                       imap_valid = 0;
+                       continue;
+               }
+
+               if (buffer_unwritten(bh)) {
+                       if (type != IO_UNWRITTEN) {
+                               type = IO_UNWRITTEN;
+                               imap_valid = 0;
+                       }
+               } else if (buffer_delay(bh)) {
+                       if (type != IO_DELALLOC) {
+                               type = IO_DELALLOC;
+                               imap_valid = 0;
+                       }
+               } else if (buffer_uptodate(bh)) {
+                       if (type != IO_OVERWRITE) {
+                               type = IO_OVERWRITE;
+                               imap_valid = 0;
+                       }
+               } else {
+                       if (PageUptodate(page)) {
+                               ASSERT(buffer_mapped(bh));
+                               imap_valid = 0;
+                       }
+                       continue;
+               }
+
+               if (imap_valid)
+                       imap_valid = xfs_imap_valid(inode, &imap, offset);
+               if (!imap_valid) {
+                       /*
+                        * If we didn't have a valid mapping then we need to
+                        * put the new mapping into a separate ioend structure.
+                        * This ensures non-contiguous extents always have
+                        * separate ioends, which is particularly important
+                        * for unwritten extent conversion at I/O completion
+                        * time.
+                        */
+                       new_ioend = 1;
+                       err = xfs_map_blocks(inode, offset, &imap, type,
+                                            nonblocking);
+                       if (err)
+                               goto error;
+                       imap_valid = xfs_imap_valid(inode, &imap, offset);
+               }
+               if (imap_valid) {
+                       lock_buffer(bh);
+                       if (type != IO_OVERWRITE)
+                               xfs_map_at_offset(inode, bh, &imap, offset);
+                       xfs_add_to_ioend(inode, bh, offset, type, &ioend,
+                                        new_ioend);
+                       count++;
+               }
+
+               if (!iohead)
+                       iohead = ioend;
+
+       } while (offset += len, ((bh = bh->b_this_page) != head));
+
+       if (uptodate && bh == head)
+               SetPageUptodate(page);
+
+       xfs_start_page_writeback(page, 1, count);
+
+       if (ioend && imap_valid) {
+               xfs_off_t               end_index;
+
+               end_index = imap.br_startoff + imap.br_blockcount;
+
+               /* to bytes */
+               end_index <<= inode->i_blkbits;
+
+               /* to pages */
+               end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
+
+               /* check against file size */
+               if (end_index > last_index)
+                       end_index = last_index;
+
+               xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
+                                 wbc, end_index);
+       }
+
+       if (iohead)
+               xfs_submit_ioend(wbc, iohead);
+
+       return 0;
+
+error:
+       if (iohead)
+               xfs_cancel_ioend(iohead);
+
+       if (err == -EAGAIN)
+               goto redirty;
+
+       xfs_aops_discard_page(page);
+       ClearPageUptodate(page);
+       unlock_page(page);
+       return err;
+
+redirty:
+       redirty_page_for_writepage(wbc, page);
+       unlock_page(page);
+       return 0;
+}
+
+STATIC int
+xfs_vm_writepages(
+       struct address_space    *mapping,
+       struct writeback_control *wbc)
+{
+       xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
+       return generic_writepages(mapping, wbc);
+}
+
+/*
+ * Called to move a page into cleanable state - and from there
+ * to be released. The page should already be clean. We always
+ * have buffer heads in this call.
+ *
+ * Returns 1 if the page is ok to release, 0 otherwise.
+ */
+STATIC int
+xfs_vm_releasepage(
+       struct page             *page,
+       gfp_t                   gfp_mask)
+{
+       int                     delalloc, unwritten;
+
+       trace_xfs_releasepage(page->mapping->host, page, 0);
+
+       xfs_count_page_state(page, &delalloc, &unwritten);
+
+       if (WARN_ON(delalloc))
+               return 0;
+       if (WARN_ON(unwritten))
+               return 0;
+
+       return try_to_free_buffers(page);
+}
+
+STATIC int
+__xfs_get_blocks(
+       struct inode            *inode,
+       sector_t                iblock,
+       struct buffer_head      *bh_result,
+       int                     create,
+       int                     direct)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       xfs_fileoff_t           offset_fsb, end_fsb;
+       int                     error = 0;
+       int                     lockmode = 0;
+       struct xfs_bmbt_irec    imap;
+       int                     nimaps = 1;
+       xfs_off_t               offset;
+       ssize_t                 size;
+       int                     new = 0;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -XFS_ERROR(EIO);
+
+       offset = (xfs_off_t)iblock << inode->i_blkbits;
+       ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
+       size = bh_result->b_size;
+
+       if (!create && direct && offset >= i_size_read(inode))
+               return 0;
+
+       if (create) {
+               lockmode = XFS_ILOCK_EXCL;
+               xfs_ilock(ip, lockmode);
+       } else {
+               lockmode = xfs_ilock_map_shared(ip);
+       }
+
+       ASSERT(offset <= mp->m_maxioffset);
+       if (offset + size > mp->m_maxioffset)
+               size = mp->m_maxioffset - offset;
+       end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
+       offset_fsb = XFS_B_TO_FSBT(mp, offset);
+
+       error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
+                         XFS_BMAPI_ENTIRE,  NULL, 0, &imap, &nimaps, NULL);
+       if (error)
+               goto out_unlock;
+
+       if (create &&
+           (!nimaps ||
+            (imap.br_startblock == HOLESTARTBLOCK ||
+             imap.br_startblock == DELAYSTARTBLOCK))) {
+               if (direct) {
+                       error = xfs_iomap_write_direct(ip, offset, size,
+                                                      &imap, nimaps);
+               } else {
+                       error = xfs_iomap_write_delay(ip, offset, size, &imap);
+               }
+               if (error)
+                       goto out_unlock;
+
+               trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap);
+       } else if (nimaps) {
+               trace_xfs_get_blocks_found(ip, offset, size, 0, &imap);
+       } else {
+               trace_xfs_get_blocks_notfound(ip, offset, size);
+               goto out_unlock;
+       }
+       xfs_iunlock(ip, lockmode);
+
+       if (imap.br_startblock != HOLESTARTBLOCK &&
+           imap.br_startblock != DELAYSTARTBLOCK) {
+               /*
+                * For unwritten extents do not report a disk address on
+                * the read case (treat as if we're reading into a hole).
+                */
+               if (create || !ISUNWRITTEN(&imap))
+                       xfs_map_buffer(inode, bh_result, &imap, offset);
+               if (create && ISUNWRITTEN(&imap)) {
+                       if (direct)
+                               bh_result->b_private = inode;
+                       set_buffer_unwritten(bh_result);
+               }
+       }
+
+       /*
+        * If this is a realtime file, data may be on a different device.
+        * to that pointed to from the buffer_head b_bdev currently.
+        */
+       bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
+
+       /*
+        * If we previously allocated a block out beyond eof and we are now
+        * coming back to use it then we will need to flag it as new even if it
+        * has a disk address.
+        *
+        * With sub-block writes into unwritten extents we also need to mark
+        * the buffer as new so that the unwritten parts of the buffer gets
+        * correctly zeroed.
+        */
+       if (create &&
+           ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
+            (offset >= i_size_read(inode)) ||
+            (new || ISUNWRITTEN(&imap))))
+               set_buffer_new(bh_result);
+
+       if (imap.br_startblock == DELAYSTARTBLOCK) {
+               BUG_ON(direct);
+               if (create) {
+                       set_buffer_uptodate(bh_result);
+                       set_buffer_mapped(bh_result);
+                       set_buffer_delay(bh_result);
+               }
+       }
+
+       /*
+        * If this is O_DIRECT or the mpage code calling tell them how large
+        * the mapping is, so that we can avoid repeated get_blocks calls.
+        */
+       if (direct || size > (1 << inode->i_blkbits)) {
+               xfs_off_t               mapping_size;
+
+               mapping_size = imap.br_startoff + imap.br_blockcount - iblock;
+               mapping_size <<= inode->i_blkbits;
+
+               ASSERT(mapping_size > 0);
+               if (mapping_size > size)
+                       mapping_size = size;
+               if (mapping_size > LONG_MAX)
+                       mapping_size = LONG_MAX;
+
+               bh_result->b_size = mapping_size;
+       }
+
+       return 0;
+
+out_unlock:
+       xfs_iunlock(ip, lockmode);
+       return -error;
+}
+
+int
+xfs_get_blocks(
+       struct inode            *inode,
+       sector_t                iblock,
+       struct buffer_head      *bh_result,
+       int                     create)
+{
+       return __xfs_get_blocks(inode, iblock, bh_result, create, 0);
+}
+
+STATIC int
+xfs_get_blocks_direct(
+       struct inode            *inode,
+       sector_t                iblock,
+       struct buffer_head      *bh_result,
+       int                     create)
+{
+       return __xfs_get_blocks(inode, iblock, bh_result, create, 1);
+}
+
+/*
+ * Complete a direct I/O write request.
+ *
+ * If the private argument is non-NULL __xfs_get_blocks signals us that we
+ * need to issue a transaction to convert the range from unwritten to written
+ * extents.  In case this is regular synchronous I/O we just call xfs_end_io
+ * to do this and we are done.  But in case this was a successful AIO
+ * request this handler is called from interrupt context, from which we
+ * can't start transactions.  In that case offload the I/O completion to
+ * the workqueues we also use for buffered I/O completion.
+ */
+STATIC void
+xfs_end_io_direct_write(
+       struct kiocb            *iocb,
+       loff_t                  offset,
+       ssize_t                 size,
+       void                    *private,
+       int                     ret,
+       bool                    is_async)
+{
+       struct xfs_ioend        *ioend = iocb->private;
+       struct inode            *inode = ioend->io_inode;
+
+       /*
+        * blockdev_direct_IO can return an error even after the I/O
+        * completion handler was called.  Thus we need to protect
+        * against double-freeing.
+        */
+       iocb->private = NULL;
+
+       ioend->io_offset = offset;
+       ioend->io_size = size;
+       if (private && size > 0)
+               ioend->io_type = IO_UNWRITTEN;
+
+       if (is_async) {
+               /*
+                * If we are converting an unwritten extent we need to delay
+                * the AIO completion until after the unwrittent extent
+                * conversion has completed, otherwise do it ASAP.
+                */
+               if (ioend->io_type == IO_UNWRITTEN) {
+                       ioend->io_iocb = iocb;
+                       ioend->io_result = ret;
+               } else {
+                       aio_complete(iocb, ret, 0);
+               }
+               xfs_finish_ioend(ioend);
+       } else {
+               xfs_finish_ioend_sync(ioend);
+       }
+
+       /* XXX: probably should move into the real I/O completion handler */
+       inode_dio_done(inode);
+}
+
+STATIC ssize_t
+xfs_vm_direct_IO(
+       int                     rw,
+       struct kiocb            *iocb,
+       const struct iovec      *iov,
+       loff_t                  offset,
+       unsigned long           nr_segs)
+{
+       struct inode            *inode = iocb->ki_filp->f_mapping->host;
+       struct block_device     *bdev = xfs_find_bdev_for_inode(inode);
+       ssize_t                 ret;
+
+       if (rw & WRITE) {
+               iocb->private = xfs_alloc_ioend(inode, IO_DIRECT);
+
+               ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
+                                           offset, nr_segs,
+                                           xfs_get_blocks_direct,
+                                           xfs_end_io_direct_write, NULL, 0);
+               if (ret != -EIOCBQUEUED && iocb->private)
+                       xfs_destroy_ioend(iocb->private);
+       } else {
+               ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
+                                           offset, nr_segs,
+                                           xfs_get_blocks_direct,
+                                           NULL, NULL, 0);
+       }
+
+       return ret;
+}
+
+STATIC void
+xfs_vm_write_failed(
+       struct address_space    *mapping,
+       loff_t                  to)
+{
+       struct inode            *inode = mapping->host;
+
+       if (to > inode->i_size) {
+               /*
+                * punch out the delalloc blocks we have already allocated. We
+                * don't call xfs_setattr() to do this as we may be in the
+                * middle of a multi-iovec write and so the vfs inode->i_size
+                * will not match the xfs ip->i_size and so it will zero too
+                * much. Hence we jus truncate the page cache to zero what is
+                * necessary and punch the delalloc blocks directly.
+                */
+               struct xfs_inode        *ip = XFS_I(inode);
+               xfs_fileoff_t           start_fsb;
+               xfs_fileoff_t           end_fsb;
+               int                     error;
+
+               truncate_pagecache(inode, to, inode->i_size);
+
+               /*
+                * Check if there are any blocks that are outside of i_size
+                * that need to be trimmed back.
+                */
+               start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1;
+               end_fsb = XFS_B_TO_FSB(ip->i_mount, to);
+               if (end_fsb <= start_fsb)
+                       return;
+
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+               error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
+                                                       end_fsb - start_fsb);
+               if (error) {
+                       /* something screwed, just bail */
+                       if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+                               xfs_alert(ip->i_mount,
+                       "xfs_vm_write_failed: unable to clean up ino %lld",
+                                               ip->i_ino);
+                       }
+               }
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       }
+}
+
+STATIC int
+xfs_vm_write_begin(
+       struct file             *file,
+       struct address_space    *mapping,
+       loff_t                  pos,
+       unsigned                len,
+       unsigned                flags,
+       struct page             **pagep,
+       void                    **fsdata)
+{
+       int                     ret;
+
+       ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS,
+                               pagep, xfs_get_blocks);
+       if (unlikely(ret))
+               xfs_vm_write_failed(mapping, pos + len);
+       return ret;
+}
+
+STATIC int
+xfs_vm_write_end(
+       struct file             *file,
+       struct address_space    *mapping,
+       loff_t                  pos,
+       unsigned                len,
+       unsigned                copied,
+       struct page             *page,
+       void                    *fsdata)
+{
+       int                     ret;
+
+       ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+       if (unlikely(ret < len))
+               xfs_vm_write_failed(mapping, pos + len);
+       return ret;
+}
+
+STATIC sector_t
+xfs_vm_bmap(
+       struct address_space    *mapping,
+       sector_t                block)
+{
+       struct inode            *inode = (struct inode *)mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+
+       trace_xfs_vm_bmap(XFS_I(inode));
+       xfs_ilock(ip, XFS_IOLOCK_SHARED);
+       xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF);
+       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+       return generic_block_bmap(mapping, block, xfs_get_blocks);
+}
+
+STATIC int
+xfs_vm_readpage(
+       struct file             *unused,
+       struct page             *page)
+{
+       return mpage_readpage(page, xfs_get_blocks);
+}
+
+STATIC int
+xfs_vm_readpages(
+       struct file             *unused,
+       struct address_space    *mapping,
+       struct list_head        *pages,
+       unsigned                nr_pages)
+{
+       return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
+}
+
+const struct address_space_operations xfs_address_space_operations = {
+       .readpage               = xfs_vm_readpage,
+       .readpages              = xfs_vm_readpages,
+       .writepage              = xfs_vm_writepage,
+       .writepages             = xfs_vm_writepages,
+       .releasepage            = xfs_vm_releasepage,
+       .invalidatepage         = xfs_vm_invalidatepage,
+       .write_begin            = xfs_vm_write_begin,
+       .write_end              = xfs_vm_write_end,
+       .bmap                   = xfs_vm_bmap,
+       .direct_IO              = xfs_vm_direct_IO,
+       .migratepage            = buffer_migrate_page,
+       .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
+};
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h

new file mode 100644 (file)

index 0000000..71f721e
--- /dev/null
+++ b/fs/xfs/xfs_aops.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2005-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_AOPS_H__
+#define __XFS_AOPS_H__
+
+extern struct workqueue_struct *xfsdatad_workqueue;
+extern struct workqueue_struct *xfsconvertd_workqueue;
+extern mempool_t *xfs_ioend_pool;
+
+/*
+ * Types of I/O for bmap clustering and I/O completion tracking.
+ */
+enum {
+       IO_DIRECT = 0,  /* special case for direct I/O ioends */
+       IO_DELALLOC,    /* mapping covers delalloc region */
+       IO_UNWRITTEN,   /* mapping covers allocated but uninitialized data */
+       IO_OVERWRITE,   /* mapping covers already allocated extent */
+};
+
+#define XFS_IO_TYPES \
+       { 0,                    "" }, \
+       { IO_DELALLOC,          "delalloc" }, \
+       { IO_UNWRITTEN,         "unwritten" }, \
+       { IO_OVERWRITE,         "overwrite" }
+
+/*
+ * xfs_ioend struct manages large extent writes for XFS.
+ * It can manage several multi-page bio's at once.
+ */
+typedef struct xfs_ioend {
+       struct xfs_ioend        *io_list;       /* next ioend in chain */
+       unsigned int            io_type;        /* delalloc / unwritten */
+       int                     io_error;       /* I/O error code */
+       atomic_t                io_remaining;   /* hold count */
+       struct inode            *io_inode;      /* file being written to */
+       struct buffer_head      *io_buffer_head;/* buffer linked list head */
+       struct buffer_head      *io_buffer_tail;/* buffer linked list tail */
+       size_t                  io_size;        /* size of the extent */
+       xfs_off_t               io_offset;      /* offset in the file */
+       struct work_struct      io_work;        /* xfsdatad work queue */
+       struct kiocb            *io_iocb;
+       int                     io_result;
+} xfs_ioend_t;
+
+extern const struct address_space_operations xfs_address_space_operations;
+extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
+
+extern void xfs_ioend_init(void);
+extern void xfs_ioend_wait(struct xfs_inode *);
+
+extern void xfs_count_page_state(struct page *, int *, int *);
+
+#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c

index cbae424fe1ba205c1eda1674b885354089ef5562..160bcdc34a6e91d6ce77394089123b2a86abc3dc 100644 (file)
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -2121,8 +2121,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
  
                 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt,
                                  XBF_LOCK | XBF_DONT_BLOCK);
-               ASSERT(bp);
-               ASSERT(!XFS_BUF_GETERROR(bp));
+               ASSERT(!xfs_buf_geterror(bp));
  
                 tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen :
                                                         XFS_BUF_SIZE(bp);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c

index ab3e5c6c464252c737457af110c6f52cc1e75145..452a291383abaf72c16b138769fe5a2eba2b1f7c 100644 (file)
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -3383,8 +3383,7 @@ xfs_bmap_local_to_extents(
                 ASSERT(args.len == 1);
                 *firstblock = args.fsbno;
                 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
-               memcpy((char *)XFS_BUF_PTR(bp), ifp->if_u1.if_data,
-                       ifp->if_bytes);
+               memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
                 xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
                 xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
                 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c

index cabf4b5604aa6dff065ff2ab428a6e6efdd97a00..2b9fd385e27dbd93e91b7e4d90cd1a81bc86f7af 100644 (file)
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -275,8 +275,7 @@ xfs_btree_dup_cursor(
                                 return error;
                         }
                         new->bc_bufs[i] = bp;
-                       ASSERT(bp);
-                       ASSERT(!XFS_BUF_GETERROR(bp));
+                       ASSERT(!xfs_buf_geterror(bp));
                 } else
                         new->bc_bufs[i] = NULL;
         }
@@ -467,8 +466,7 @@ xfs_btree_get_bufl(
         ASSERT(fsbno != NULLFSBLOCK);
         d = XFS_FSB_TO_DADDR(mp, fsbno);
         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
-       ASSERT(bp);
-       ASSERT(!XFS_BUF_GETERROR(bp));
+       ASSERT(!xfs_buf_geterror(bp));
         return bp;
  }
  
@@ -491,8 +489,7 @@ xfs_btree_get_bufs(
         ASSERT(agbno != NULLAGBLOCK);
         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
-       ASSERT(bp);
-       ASSERT(!XFS_BUF_GETERROR(bp));
+       ASSERT(!xfs_buf_geterror(bp));
         return bp;
  }
  
@@ -632,7 +629,7 @@ xfs_btree_read_bufl(
                         mp->m_bsize, lock, &bp))) {
                 return error;
         }
-       ASSERT(!bp || !XFS_BUF_GETERROR(bp));
+       ASSERT(!xfs_buf_geterror(bp));
         if (bp)
                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
         *bpp = bp;
@@ -973,8 +970,7 @@ xfs_btree_get_buf_block(
         *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
                                  mp->m_bsize, flags);
  
-       ASSERT(*bpp);
-       ASSERT(!XFS_BUF_GETERROR(*bpp));
+       ASSERT(!xfs_buf_geterror(*bpp));
  
         *block = XFS_BUF_TO_BLOCK(*bpp);
         return 0;
@@ -1006,8 +1002,7 @@ xfs_btree_read_buf_block(
         if (error)
                 return error;
  
-       ASSERT(*bpp != NULL);
-       ASSERT(!XFS_BUF_GETERROR(*bpp));
+       ASSERT(!xfs_buf_geterror(*bpp));
  
         xfs_btree_set_refs(cur, *bpp);
         *block = XFS_BUF_TO_BLOCK(*bpp);
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h

index 8d05a6a46ce312bccf4bcdbbc384cf83722e0628..5b240de104c0a39955bac019f384990a1e6fb457 100644 (file)
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -262,7 +262,7 @@ typedef struct xfs_btree_cur
  /*
   * Convert from buffer to btree block header.
   */
-#define        XFS_BUF_TO_BLOCK(bp)    ((struct xfs_btree_block *)XFS_BUF_PTR(bp))
+#define        XFS_BUF_TO_BLOCK(bp)    ((struct xfs_btree_block *)((bp)->b_addr))
  
  
  /*
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c

new file mode 100644 (file)

index 0000000..c57836d
--- /dev/null
+++ b/fs/xfs/xfs_buf.c
@@ -0,0 +1,1876 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include <linux/stddef.h>
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/pagemap.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <linux/bio.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/workqueue.h>
+#include <linux/percpu.h>
+#include <linux/blkdev.h>
+#include <linux/hash.h>
+#include <linux/kthread.h>
+#include <linux/migrate.h>
+#include <linux/backing-dev.h>
+#include <linux/freezer.h>
+
+#include "xfs_sb.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_trace.h"
+
+static kmem_zone_t *xfs_buf_zone;
+STATIC int xfsbufd(void *);
+STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
+
+static struct workqueue_struct *xfslogd_workqueue;
+struct workqueue_struct *xfsdatad_workqueue;
+struct workqueue_struct *xfsconvertd_workqueue;
+
+#ifdef XFS_BUF_LOCK_TRACKING
+# define XB_SET_OWNER(bp)      ((bp)->b_last_holder = current->pid)
+# define XB_CLEAR_OWNER(bp)    ((bp)->b_last_holder = -1)
+# define XB_GET_OWNER(bp)      ((bp)->b_last_holder)
+#else
+# define XB_SET_OWNER(bp)      do { } while (0)
+# define XB_CLEAR_OWNER(bp)    do { } while (0)
+# define XB_GET_OWNER(bp)      do { } while (0)
+#endif
+
+#define xb_to_gfp(flags) \
+       ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \
+         ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
+
+#define xb_to_km(flags) \
+        (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
+
+#define xfs_buf_allocate(flags) \
+       kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags))
+#define xfs_buf_deallocate(bp) \
+       kmem_zone_free(xfs_buf_zone, (bp));
+
+static inline int
+xfs_buf_is_vmapped(
+       struct xfs_buf  *bp)
+{
+       /*
+        * Return true if the buffer is vmapped.
+        *
+        * The XBF_MAPPED flag is set if the buffer should be mapped, but the
+        * code is clever enough to know it doesn't have to map a single page,
+        * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1.
+        */
+       return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1;
+}
+
+static inline int
+xfs_buf_vmap_len(
+       struct xfs_buf  *bp)
+{
+       return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
+}
+
+/*
+ * xfs_buf_lru_add - add a buffer to the LRU.
+ *
+ * The LRU takes a new reference to the buffer so that it will only be freed
+ * once the shrinker takes the buffer off the LRU.
+ */
+STATIC void
+xfs_buf_lru_add(
+       struct xfs_buf  *bp)
+{
+       struct xfs_buftarg *btp = bp->b_target;
+
+       spin_lock(&btp->bt_lru_lock);
+       if (list_empty(&bp->b_lru)) {
+               atomic_inc(&bp->b_hold);
+               list_add_tail(&bp->b_lru, &btp->bt_lru);
+               btp->bt_lru_nr++;
+       }
+       spin_unlock(&btp->bt_lru_lock);
+}
+
+/*
+ * xfs_buf_lru_del - remove a buffer from the LRU
+ *
+ * The unlocked check is safe here because it only occurs when there are not
+ * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
+ * to optimise the shrinker removing the buffer from the LRU and calling
+ * xfs_buf_free(). i.e. it removes an unnecessary round trip on the
+ * bt_lru_lock.
+ */
+STATIC void
+xfs_buf_lru_del(
+       struct xfs_buf  *bp)
+{
+       struct xfs_buftarg *btp = bp->b_target;
+
+       if (list_empty(&bp->b_lru))
+               return;
+
+       spin_lock(&btp->bt_lru_lock);
+       if (!list_empty(&bp->b_lru)) {
+               list_del_init(&bp->b_lru);
+               btp->bt_lru_nr--;
+       }
+       spin_unlock(&btp->bt_lru_lock);
+}
+
+/*
+ * When we mark a buffer stale, we remove the buffer from the LRU and clear the
+ * b_lru_ref count so that the buffer is freed immediately when the buffer
+ * reference count falls to zero. If the buffer is already on the LRU, we need
+ * to remove the reference that LRU holds on the buffer.
+ *
+ * This prevents build-up of stale buffers on the LRU.
+ */
+void
+xfs_buf_stale(
+       struct xfs_buf  *bp)
+{
+       bp->b_flags |= XBF_STALE;
+       atomic_set(&(bp)->b_lru_ref, 0);
+       if (!list_empty(&bp->b_lru)) {
+               struct xfs_buftarg *btp = bp->b_target;
+
+               spin_lock(&btp->bt_lru_lock);
+               if (!list_empty(&bp->b_lru)) {
+                       list_del_init(&bp->b_lru);
+                       btp->bt_lru_nr--;
+                       atomic_dec(&bp->b_hold);
+               }
+               spin_unlock(&btp->bt_lru_lock);
+       }
+       ASSERT(atomic_read(&bp->b_hold) >= 1);
+}
+
+STATIC void
+_xfs_buf_initialize(
+       xfs_buf_t               *bp,
+       xfs_buftarg_t           *target,
+       xfs_off_t               range_base,
+       size_t                  range_length,
+       xfs_buf_flags_t         flags)
+{
+       /*
+        * We don't want certain flags to appear in b_flags.
+        */
+       flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD);
+
+       memset(bp, 0, sizeof(xfs_buf_t));
+       atomic_set(&bp->b_hold, 1);
+       atomic_set(&bp->b_lru_ref, 1);
+       init_completion(&bp->b_iowait);
+       INIT_LIST_HEAD(&bp->b_lru);
+       INIT_LIST_HEAD(&bp->b_list);
+       RB_CLEAR_NODE(&bp->b_rbnode);
+       sema_init(&bp->b_sema, 0); /* held, no waiters */
+       XB_SET_OWNER(bp);
+       bp->b_target = target;
+       bp->b_file_offset = range_base;
+       /*
+        * Set buffer_length and count_desired to the same value initially.
+        * I/O routines should use count_desired, which will be the same in
+        * most cases but may be reset (e.g. XFS recovery).
+        */
+       bp->b_buffer_length = bp->b_count_desired = range_length;
+       bp->b_flags = flags;
+       bp->b_bn = XFS_BUF_DADDR_NULL;
+       atomic_set(&bp->b_pin_count, 0);
+       init_waitqueue_head(&bp->b_waiters);
+
+       XFS_STATS_INC(xb_create);
+
+       trace_xfs_buf_init(bp, _RET_IP_);
+}
+
+/*
+ *     Allocate a page array capable of holding a specified number
+ *     of pages, and point the page buf at it.
+ */
+STATIC int
+_xfs_buf_get_pages(
+       xfs_buf_t               *bp,
+       int                     page_count,
+       xfs_buf_flags_t         flags)
+{
+       /* Make sure that we have a page list */
+       if (bp->b_pages == NULL) {
+               bp->b_offset = xfs_buf_poff(bp->b_file_offset);
+               bp->b_page_count = page_count;
+               if (page_count <= XB_PAGES) {
+                       bp->b_pages = bp->b_page_array;
+               } else {
+                       bp->b_pages = kmem_alloc(sizeof(struct page *) *
+                                       page_count, xb_to_km(flags));
+                       if (bp->b_pages == NULL)
+                               return -ENOMEM;
+               }
+               memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
+       }
+       return 0;
+}
+
+/*
+ *     Frees b_pages if it was allocated.
+ */
+STATIC void
+_xfs_buf_free_pages(
+       xfs_buf_t       *bp)
+{
+       if (bp->b_pages != bp->b_page_array) {
+               kmem_free(bp->b_pages);
+               bp->b_pages = NULL;
+       }
+}
+
+/*
+ *     Releases the specified buffer.
+ *
+ *     The modification state of any associated pages is left unchanged.
+ *     The buffer most not be on any hash - use xfs_buf_rele instead for
+ *     hashed and refcounted buffers
+ */
+void
+xfs_buf_free(
+       xfs_buf_t               *bp)
+{
+       trace_xfs_buf_free(bp, _RET_IP_);
+
+       ASSERT(list_empty(&bp->b_lru));
+
+       if (bp->b_flags & _XBF_PAGES) {
+               uint            i;
+
+               if (xfs_buf_is_vmapped(bp))
+                       vm_unmap_ram(bp->b_addr - bp->b_offset,
+                                       bp->b_page_count);
+
+               for (i = 0; i < bp->b_page_count; i++) {
+                       struct page     *page = bp->b_pages[i];
+
+                       __free_page(page);
+               }
+       } else if (bp->b_flags & _XBF_KMEM)
+               kmem_free(bp->b_addr);
+       _xfs_buf_free_pages(bp);
+       xfs_buf_deallocate(bp);
+}
+
+/*
+ * Allocates all the pages for buffer in question and builds it's page list.
+ */
+STATIC int
+xfs_buf_allocate_memory(
+       xfs_buf_t               *bp,
+       uint                    flags)
+{
+       size_t                  size = bp->b_count_desired;
+       size_t                  nbytes, offset;
+       gfp_t                   gfp_mask = xb_to_gfp(flags);
+       unsigned short          page_count, i;
+       xfs_off_t               end;
+       int                     error;
+
+       /*
+        * for buffers that are contained within a single page, just allocate
+        * the memory from the heap - there's no need for the complexity of
+        * page arrays to keep allocation down to order 0.
+        */
+       if (bp->b_buffer_length < PAGE_SIZE) {
+               bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags));
+               if (!bp->b_addr) {
+                       /* low memory - use alloc_page loop instead */
+                       goto use_alloc_page;
+               }
+
+               if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) &
+                                                               PAGE_MASK) !=
+                   ((unsigned long)bp->b_addr & PAGE_MASK)) {
+                       /* b_addr spans two pages - use alloc_page instead */
+                       kmem_free(bp->b_addr);
+                       bp->b_addr = NULL;
+                       goto use_alloc_page;
+               }
+               bp->b_offset = offset_in_page(bp->b_addr);
+               bp->b_pages = bp->b_page_array;
+               bp->b_pages[0] = virt_to_page(bp->b_addr);
+               bp->b_page_count = 1;
+               bp->b_flags |= XBF_MAPPED | _XBF_KMEM;
+               return 0;
+       }
+
+use_alloc_page:
+       end = bp->b_file_offset + bp->b_buffer_length;
+       page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
+       error = _xfs_buf_get_pages(bp, page_count, flags);
+       if (unlikely(error))
+               return error;
+
+       offset = bp->b_offset;
+       bp->b_flags |= _XBF_PAGES;
+
+       for (i = 0; i < bp->b_page_count; i++) {
+               struct page     *page;
+               uint            retries = 0;
+retry:
+               page = alloc_page(gfp_mask);
+               if (unlikely(page == NULL)) {
+                       if (flags & XBF_READ_AHEAD) {
+                               bp->b_page_count = i;
+                               error = ENOMEM;
+                               goto out_free_pages;
+                       }
+
+                       /*
+                        * This could deadlock.
+                        *
+                        * But until all the XFS lowlevel code is revamped to
+                        * handle buffer allocation failures we can't do much.
+                        */
+                       if (!(++retries % 100))
+                               xfs_err(NULL,
+               "possible memory allocation deadlock in %s (mode:0x%x)",
+                                       __func__, gfp_mask);
+
+                       XFS_STATS_INC(xb_page_retries);
+                       congestion_wait(BLK_RW_ASYNC, HZ/50);
+                       goto retry;
+               }
+
+               XFS_STATS_INC(xb_page_found);
+
+               nbytes = min_t(size_t, size, PAGE_SIZE - offset);
+               size -= nbytes;
+               bp->b_pages[i] = page;
+               offset = 0;
+       }
+       return 0;
+
+out_free_pages:
+       for (i = 0; i < bp->b_page_count; i++)
+               __free_page(bp->b_pages[i]);
+       return error;
+}
+
+/*
+ *     Map buffer into kernel address-space if necessary.
+ */
+STATIC int
+_xfs_buf_map_pages(
+       xfs_buf_t               *bp,
+       uint                    flags)
+{
+       ASSERT(bp->b_flags & _XBF_PAGES);
+       if (bp->b_page_count == 1) {
+               /* A single page buffer is always mappable */
+               bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
+               bp->b_flags |= XBF_MAPPED;
+       } else if (flags & XBF_MAPPED) {
+               int retried = 0;
+
+               do {
+                       bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
+                                               -1, PAGE_KERNEL);
+                       if (bp->b_addr)
+                               break;
+                       vm_unmap_aliases();
+               } while (retried++ <= 1);
+
+               if (!bp->b_addr)
+                       return -ENOMEM;
+               bp->b_addr += bp->b_offset;
+               bp->b_flags |= XBF_MAPPED;
+       }
+
+       return 0;
+}
+
+/*
+ *     Finding and Reading Buffers
+ */
+
+/*
+ *     Look up, and creates if absent, a lockable buffer for
+ *     a given range of an inode.  The buffer is returned
+ *     locked.  If other overlapping buffers exist, they are
+ *     released before the new buffer is created and locked,
+ *     which may imply that this call will block until those buffers
+ *     are unlocked.  No I/O is implied by this call.
+ */
+xfs_buf_t *
+_xfs_buf_find(
+       xfs_buftarg_t           *btp,   /* block device target          */
+       xfs_off_t               ioff,   /* starting offset of range     */
+       size_t                  isize,  /* length of range              */
+       xfs_buf_flags_t         flags,
+       xfs_buf_t               *new_bp)
+{
+       xfs_off_t               range_base;
+       size_t                  range_length;
+       struct xfs_perag        *pag;
+       struct rb_node          **rbp;
+       struct rb_node          *parent;
+       xfs_buf_t               *bp;
+
+       range_base = (ioff << BBSHIFT);
+       range_length = (isize << BBSHIFT);
+
+       /* Check for IOs smaller than the sector size / not sector aligned */
+       ASSERT(!(range_length < (1 << btp->bt_sshift)));
+       ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
+
+       /* get tree root */
+       pag = xfs_perag_get(btp->bt_mount,
+                               xfs_daddr_to_agno(btp->bt_mount, ioff));
+
+       /* walk tree */
+       spin_lock(&pag->pag_buf_lock);
+       rbp = &pag->pag_buf_tree.rb_node;
+       parent = NULL;
+       bp = NULL;
+       while (*rbp) {
+               parent = *rbp;
+               bp = rb_entry(parent, struct xfs_buf, b_rbnode);
+
+               if (range_base < bp->b_file_offset)
+                       rbp = &(*rbp)->rb_left;
+               else if (range_base > bp->b_file_offset)
+                       rbp = &(*rbp)->rb_right;
+               else {
+                       /*
+                        * found a block offset match. If the range doesn't
+                        * match, the only way this is allowed is if the buffer
+                        * in the cache is stale and the transaction that made
+                        * it stale has not yet committed. i.e. we are
+                        * reallocating a busy extent. Skip this buffer and
+                        * continue searching to the right for an exact match.
+                        */
+                       if (bp->b_buffer_length != range_length) {
+                               ASSERT(bp->b_flags & XBF_STALE);
+                               rbp = &(*rbp)->rb_right;
+                               continue;
+                       }
+                       atomic_inc(&bp->b_hold);
+                       goto found;
+               }
+       }
+
+       /* No match found */
+       if (new_bp) {
+               _xfs_buf_initialize(new_bp, btp, range_base,
+                               range_length, flags);
+               rb_link_node(&new_bp->b_rbnode, parent, rbp);
+               rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
+               /* the buffer keeps the perag reference until it is freed */
+               new_bp->b_pag = pag;
+               spin_unlock(&pag->pag_buf_lock);
+       } else {
+               XFS_STATS_INC(xb_miss_locked);
+               spin_unlock(&pag->pag_buf_lock);
+               xfs_perag_put(pag);
+       }
+       return new_bp;
+
+found:
+       spin_unlock(&pag->pag_buf_lock);
+       xfs_perag_put(pag);
+
+       if (!xfs_buf_trylock(bp)) {
+               if (flags & XBF_TRYLOCK) {
+                       xfs_buf_rele(bp);
+                       XFS_STATS_INC(xb_busy_locked);
+                       return NULL;
+               }
+               xfs_buf_lock(bp);
+               XFS_STATS_INC(xb_get_locked_waited);
+       }
+
+       /*
+        * if the buffer is stale, clear all the external state associated with
+        * it. We need to keep flags such as how we allocated the buffer memory
+        * intact here.
+        */
+       if (bp->b_flags & XBF_STALE) {
+               ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
+               bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES;
+       }
+
+       trace_xfs_buf_find(bp, flags, _RET_IP_);
+       XFS_STATS_INC(xb_get_locked);
+       return bp;
+}
+
+/*
+ *     Assembles a buffer covering the specified range.
+ *     Storage in memory for all portions of the buffer will be allocated,
+ *     although backing storage may not be.
+ */
+xfs_buf_t *
+xfs_buf_get(
+       xfs_buftarg_t           *target,/* target for buffer            */
+       xfs_off_t               ioff,   /* starting offset of range     */
+       size_t                  isize,  /* length of range              */
+       xfs_buf_flags_t         flags)
+{
+       xfs_buf_t               *bp, *new_bp;
+       int                     error = 0;
+
+       new_bp = xfs_buf_allocate(flags);
+       if (unlikely(!new_bp))
+               return NULL;
+
+       bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
+       if (bp == new_bp) {
+               error = xfs_buf_allocate_memory(bp, flags);
+               if (error)
+                       goto no_buffer;
+       } else {
+               xfs_buf_deallocate(new_bp);
+               if (unlikely(bp == NULL))
+                       return NULL;
+       }
+
+       if (!(bp->b_flags & XBF_MAPPED)) {
+               error = _xfs_buf_map_pages(bp, flags);
+               if (unlikely(error)) {
+                       xfs_warn(target->bt_mount,
+                               "%s: failed to map pages\n", __func__);
+                       goto no_buffer;
+               }
+       }
+
+       XFS_STATS_INC(xb_get);
+
+       /*
+        * Always fill in the block number now, the mapped cases can do
+        * their own overlay of this later.
+        */
+       bp->b_bn = ioff;
+       bp->b_count_desired = bp->b_buffer_length;
+
+       trace_xfs_buf_get(bp, flags, _RET_IP_);
+       return bp;
+
+ no_buffer:
+       if (flags & (XBF_LOCK | XBF_TRYLOCK))
+               xfs_buf_unlock(bp);
+       xfs_buf_rele(bp);
+       return NULL;
+}
+
+STATIC int
+_xfs_buf_read(
+       xfs_buf_t               *bp,
+       xfs_buf_flags_t         flags)
+{
+       int                     status;
+
+       ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
+       ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
+
+       bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD);
+       bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
+
+       status = xfs_buf_iorequest(bp);
+       if (status || bp->b_error || (flags & XBF_ASYNC))
+               return status;
+       return xfs_buf_iowait(bp);
+}
+
+xfs_buf_t *
+xfs_buf_read(
+       xfs_buftarg_t           *target,
+       xfs_off_t               ioff,
+       size_t                  isize,
+       xfs_buf_flags_t         flags)
+{
+       xfs_buf_t               *bp;
+
+       flags |= XBF_READ;
+
+       bp = xfs_buf_get(target, ioff, isize, flags);
+       if (bp) {
+               trace_xfs_buf_read(bp, flags, _RET_IP_);
+
+               if (!XFS_BUF_ISDONE(bp)) {
+                       XFS_STATS_INC(xb_get_read);
+                       _xfs_buf_read(bp, flags);
+               } else if (flags & XBF_ASYNC) {
+                       /*
+                        * Read ahead call which is already satisfied,
+                        * drop the buffer
+                        */
+                       goto no_buffer;
+               } else {
+                       /* We do not want read in the flags */
+                       bp->b_flags &= ~XBF_READ;
+               }
+       }
+
+       return bp;
+
+ no_buffer:
+       if (flags & (XBF_LOCK | XBF_TRYLOCK))
+               xfs_buf_unlock(bp);
+       xfs_buf_rele(bp);
+       return NULL;
+}
+
+/*
+ *     If we are not low on memory then do the readahead in a deadlock
+ *     safe manner.
+ */
+void
+xfs_buf_readahead(
+       xfs_buftarg_t           *target,
+       xfs_off_t               ioff,
+       size_t                  isize)
+{
+       if (bdi_read_congested(target->bt_bdi))
+               return;
+
+       xfs_buf_read(target, ioff, isize,
+                    XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK);
+}
+
+/*
+ * Read an uncached buffer from disk. Allocates and returns a locked
+ * buffer containing the disk contents or nothing.
+ */
+struct xfs_buf *
+xfs_buf_read_uncached(
+       struct xfs_mount        *mp,
+       struct xfs_buftarg      *target,
+       xfs_daddr_t             daddr,
+       size_t                  length,
+       int                     flags)
+{
+       xfs_buf_t               *bp;
+       int                     error;
+
+       bp = xfs_buf_get_uncached(target, length, flags);
+       if (!bp)
+               return NULL;
+
+       /* set up the buffer for a read IO */
+       XFS_BUF_SET_ADDR(bp, daddr);
+       XFS_BUF_READ(bp);
+
+       xfsbdstrat(mp, bp);
+       error = xfs_buf_iowait(bp);
+       if (error || bp->b_error) {
+               xfs_buf_relse(bp);
+               return NULL;
+       }
+       return bp;
+}
+
+xfs_buf_t *
+xfs_buf_get_empty(
+       size_t                  len,
+       xfs_buftarg_t           *target)
+{
+       xfs_buf_t               *bp;
+
+       bp = xfs_buf_allocate(0);
+       if (bp)
+               _xfs_buf_initialize(bp, target, 0, len, 0);
+       return bp;
+}
+
+/*
+ * Return a buffer allocated as an empty buffer and associated to external
+ * memory via xfs_buf_associate_memory() back to it's empty state.
+ */
+void
+xfs_buf_set_empty(
+       struct xfs_buf          *bp,
+       size_t                  len)
+{
+       if (bp->b_pages)
+               _xfs_buf_free_pages(bp);
+
+       bp->b_pages = NULL;
+       bp->b_page_count = 0;
+       bp->b_addr = NULL;
+       bp->b_file_offset = 0;
+       bp->b_buffer_length = bp->b_count_desired = len;
+       bp->b_bn = XFS_BUF_DADDR_NULL;
+       bp->b_flags &= ~XBF_MAPPED;
+}
+
+static inline struct page *
+mem_to_page(
+       void                    *addr)
+{
+       if ((!is_vmalloc_addr(addr))) {
+               return virt_to_page(addr);
+       } else {
+               return vmalloc_to_page(addr);
+       }
+}
+
+int
+xfs_buf_associate_memory(
+       xfs_buf_t               *bp,
+       void                    *mem,
+       size_t                  len)
+{
+       int                     rval;
+       int                     i = 0;
+       unsigned long           pageaddr;
+       unsigned long           offset;
+       size_t                  buflen;
+       int                     page_count;
+
+       pageaddr = (unsigned long)mem & PAGE_MASK;
+       offset = (unsigned long)mem - pageaddr;
+       buflen = PAGE_ALIGN(len + offset);
+       page_count = buflen >> PAGE_SHIFT;
+
+       /* Free any previous set of page pointers */
+       if (bp->b_pages)
+               _xfs_buf_free_pages(bp);
+
+       bp->b_pages = NULL;
+       bp->b_addr = mem;
+
+       rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK);
+       if (rval)
+               return rval;
+
+       bp->b_offset = offset;
+
+       for (i = 0; i < bp->b_page_count; i++) {
+               bp->b_pages[i] = mem_to_page((void *)pageaddr);
+               pageaddr += PAGE_SIZE;
+       }
+
+       bp->b_count_desired = len;
+       bp->b_buffer_length = buflen;
+       bp->b_flags |= XBF_MAPPED;
+
+       return 0;
+}
+
+xfs_buf_t *
+xfs_buf_get_uncached(
+       struct xfs_buftarg      *target,
+       size_t                  len,
+       int                     flags)
+{
+       unsigned long           page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
+       int                     error, i;
+       xfs_buf_t               *bp;
+
+       bp = xfs_buf_allocate(0);
+       if (unlikely(bp == NULL))
+               goto fail;
+       _xfs_buf_initialize(bp, target, 0, len, 0);
+
+       error = _xfs_buf_get_pages(bp, page_count, 0);
+       if (error)
+               goto fail_free_buf;
+
+       for (i = 0; i < page_count; i++) {
+               bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
+               if (!bp->b_pages[i])
+                       goto fail_free_mem;
+       }
+       bp->b_flags |= _XBF_PAGES;
+
+       error = _xfs_buf_map_pages(bp, XBF_MAPPED);
+       if (unlikely(error)) {
+               xfs_warn(target->bt_mount,
+                       "%s: failed to map pages\n", __func__);
+               goto fail_free_mem;
+       }
+
+       trace_xfs_buf_get_uncached(bp, _RET_IP_);
+       return bp;
+
+ fail_free_mem:
+       while (--i >= 0)
+               __free_page(bp->b_pages[i]);
+       _xfs_buf_free_pages(bp);
+ fail_free_buf:
+       xfs_buf_deallocate(bp);
+ fail:
+       return NULL;
+}
+
+/*
+ *     Increment reference count on buffer, to hold the buffer concurrently
+ *     with another thread which may release (free) the buffer asynchronously.
+ *     Must hold the buffer already to call this function.
+ */
+void
+xfs_buf_hold(
+       xfs_buf_t               *bp)
+{
+       trace_xfs_buf_hold(bp, _RET_IP_);
+       atomic_inc(&bp->b_hold);
+}
+
+/*
+ *     Releases a hold on the specified buffer.  If the
+ *     the hold count is 1, calls xfs_buf_free.
+ */
+void
+xfs_buf_rele(
+       xfs_buf_t               *bp)
+{
+       struct xfs_perag        *pag = bp->b_pag;
+
+       trace_xfs_buf_rele(bp, _RET_IP_);
+
+       if (!pag) {
+               ASSERT(list_empty(&bp->b_lru));
+               ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
+               if (atomic_dec_and_test(&bp->b_hold))
+                       xfs_buf_free(bp);
+               return;
+       }
+
+       ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
+
+       ASSERT(atomic_read(&bp->b_hold) > 0);
+       if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
+               if (!(bp->b_flags & XBF_STALE) &&
+                          atomic_read(&bp->b_lru_ref)) {
+                       xfs_buf_lru_add(bp);
+                       spin_unlock(&pag->pag_buf_lock);
+               } else {
+                       xfs_buf_lru_del(bp);
+                       ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
+                       rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
+                       spin_unlock(&pag->pag_buf_lock);
+                       xfs_perag_put(pag);
+                       xfs_buf_free(bp);
+               }
+       }
+}
+
+
+/*
+ *     Lock a buffer object, if it is not already locked.
+ *
+ *     If we come across a stale, pinned, locked buffer, we know that we are
+ *     being asked to lock a buffer that has been reallocated. Because it is
+ *     pinned, we know that the log has not been pushed to disk and hence it
+ *     will still be locked.  Rather than continuing to have trylock attempts
+ *     fail until someone else pushes the log, push it ourselves before
+ *     returning.  This means that the xfsaild will not get stuck trying
+ *     to push on stale inode buffers.
+ */
+int
+xfs_buf_trylock(
+       struct xfs_buf          *bp)
+{
+       int                     locked;
+
+       locked = down_trylock(&bp->b_sema) == 0;
+       if (locked)
+               XB_SET_OWNER(bp);
+       else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
+               xfs_log_force(bp->b_target->bt_mount, 0);
+
+       trace_xfs_buf_trylock(bp, _RET_IP_);
+       return locked;
+}
+
+/*
+ *     Lock a buffer object.
+ *
+ *     If we come across a stale, pinned, locked buffer, we know that we
+ *     are being asked to lock a buffer that has been reallocated. Because
+ *     it is pinned, we know that the log has not been pushed to disk and
+ *     hence it will still be locked. Rather than sleeping until someone
+ *     else pushes the log, push it ourselves before trying to get the lock.
+ */
+void
+xfs_buf_lock(
+       struct xfs_buf          *bp)
+{
+       trace_xfs_buf_lock(bp, _RET_IP_);
+
+       if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
+               xfs_log_force(bp->b_target->bt_mount, 0);
+       down(&bp->b_sema);
+       XB_SET_OWNER(bp);
+
+       trace_xfs_buf_lock_done(bp, _RET_IP_);
+}
+
+/*
+ *     Releases the lock on the buffer object.
+ *     If the buffer is marked delwri but is not queued, do so before we
+ *     unlock the buffer as we need to set flags correctly.  We also need to
+ *     take a reference for the delwri queue because the unlocker is going to
+ *     drop their's and they don't know we just queued it.
+ */
+void
+xfs_buf_unlock(
+       struct xfs_buf          *bp)
+{
+       if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
+               atomic_inc(&bp->b_hold);
+               bp->b_flags |= XBF_ASYNC;
+               xfs_buf_delwri_queue(bp, 0);
+       }
+
+       XB_CLEAR_OWNER(bp);
+       up(&bp->b_sema);
+
+       trace_xfs_buf_unlock(bp, _RET_IP_);
+}
+
+STATIC void
+xfs_buf_wait_unpin(
+       xfs_buf_t               *bp)
+{
+       DECLARE_WAITQUEUE       (wait, current);
+
+       if (atomic_read(&bp->b_pin_count) == 0)
+               return;
+
+       add_wait_queue(&bp->b_waiters, &wait);
+       for (;;) {
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               if (atomic_read(&bp->b_pin_count) == 0)
+                       break;
+               io_schedule();
+       }
+       remove_wait_queue(&bp->b_waiters, &wait);
+       set_current_state(TASK_RUNNING);
+}
+
+/*
+ *     Buffer Utility Routines
+ */
+
+STATIC void
+xfs_buf_iodone_work(
+       struct work_struct      *work)
+{
+       xfs_buf_t               *bp =
+               container_of(work, xfs_buf_t, b_iodone_work);
+
+       if (bp->b_iodone)
+               (*(bp->b_iodone))(bp);
+       else if (bp->b_flags & XBF_ASYNC)
+               xfs_buf_relse(bp);
+}
+
+void
+xfs_buf_ioend(
+       xfs_buf_t               *bp,
+       int                     schedule)
+{
+       trace_xfs_buf_iodone(bp, _RET_IP_);
+
+       bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
+       if (bp->b_error == 0)
+               bp->b_flags |= XBF_DONE;
+
+       if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
+               if (schedule) {
+                       INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work);
+                       queue_work(xfslogd_workqueue, &bp->b_iodone_work);
+               } else {
+                       xfs_buf_iodone_work(&bp->b_iodone_work);
+               }
+       } else {
+               complete(&bp->b_iowait);
+       }
+}
+
+void
+xfs_buf_ioerror(
+       xfs_buf_t               *bp,
+       int                     error)
+{
+       ASSERT(error >= 0 && error <= 0xffff);
+       bp->b_error = (unsigned short)error;
+       trace_xfs_buf_ioerror(bp, error, _RET_IP_);
+}
+
+int
+xfs_bwrite(
+       struct xfs_mount        *mp,
+       struct xfs_buf          *bp)
+{
+       int                     error;
+
+       bp->b_flags |= XBF_WRITE;
+       bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
+
+       xfs_buf_delwri_dequeue(bp);
+       xfs_bdstrat_cb(bp);
+
+       error = xfs_buf_iowait(bp);
+       if (error)
+               xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
+       xfs_buf_relse(bp);
+       return error;
+}
+
+void
+xfs_bdwrite(
+       void                    *mp,
+       struct xfs_buf          *bp)
+{
+       trace_xfs_buf_bdwrite(bp, _RET_IP_);
+
+       bp->b_flags &= ~XBF_READ;
+       bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
+
+       xfs_buf_delwri_queue(bp, 1);
+}
+
+/*
+ * Called when we want to stop a buffer from getting written or read.
+ * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
+ * so that the proper iodone callbacks get called.
+ */
+STATIC int
+xfs_bioerror(
+       xfs_buf_t *bp)
+{
+#ifdef XFSERRORDEBUG
+       ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
+#endif
+
+       /*
+        * No need to wait until the buffer is unpinned, we aren't flushing it.
+        */
+       xfs_buf_ioerror(bp, EIO);
+
+       /*
+        * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
+        */
+       XFS_BUF_UNREAD(bp);
+       XFS_BUF_UNDELAYWRITE(bp);
+       XFS_BUF_UNDONE(bp);
+       XFS_BUF_STALE(bp);
+
+       xfs_buf_ioend(bp, 0);
+
+       return EIO;
+}
+
+/*
+ * Same as xfs_bioerror, except that we are releasing the buffer
+ * here ourselves, and avoiding the xfs_buf_ioend call.
+ * This is meant for userdata errors; metadata bufs come with
+ * iodone functions attached, so that we can track down errors.
+ */
+STATIC int
+xfs_bioerror_relse(
+       struct xfs_buf  *bp)
+{
+       int64_t         fl = bp->b_flags;
+       /*
+        * No need to wait until the buffer is unpinned.
+        * We aren't flushing it.
+        *
+        * chunkhold expects B_DONE to be set, whether
+        * we actually finish the I/O or not. We don't want to
+        * change that interface.
+        */
+       XFS_BUF_UNREAD(bp);
+       XFS_BUF_UNDELAYWRITE(bp);
+       XFS_BUF_DONE(bp);
+       XFS_BUF_STALE(bp);
+       bp->b_iodone = NULL;
+       if (!(fl & XBF_ASYNC)) {
+               /*
+                * Mark b_error and B_ERROR _both_.
+                * Lot's of chunkcache code assumes that.
+                * There's no reason to mark error for
+                * ASYNC buffers.
+                */
+               xfs_buf_ioerror(bp, EIO);
+               XFS_BUF_FINISH_IOWAIT(bp);
+       } else {
+               xfs_buf_relse(bp);
+       }
+
+       return EIO;
+}
+
+
+/*
+ * All xfs metadata buffers except log state machine buffers
+ * get this attached as their b_bdstrat callback function.
+ * This is so that we can catch a buffer
+ * after prematurely unpinning it to forcibly shutdown the filesystem.
+ */
+int
+xfs_bdstrat_cb(
+       struct xfs_buf  *bp)
+{
+       if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
+               trace_xfs_bdstrat_shut(bp, _RET_IP_);
+               /*
+                * Metadata write that didn't get logged but
+                * written delayed anyway. These aren't associated
+                * with a transaction, and can be ignored.
+                */
+               if (!bp->b_iodone && !XFS_BUF_ISREAD(bp))
+                       return xfs_bioerror_relse(bp);
+               else
+                       return xfs_bioerror(bp);
+       }
+
+       xfs_buf_iorequest(bp);
+       return 0;
+}
+
+/*
+ * Wrapper around bdstrat so that we can stop data from going to disk in case
+ * we are shutting down the filesystem.  Typically user data goes thru this
+ * path; one of the exceptions is the superblock.
+ */
+void
+xfsbdstrat(
+       struct xfs_mount        *mp,
+       struct xfs_buf          *bp)
+{
+       if (XFS_FORCED_SHUTDOWN(mp)) {
+               trace_xfs_bdstrat_shut(bp, _RET_IP_);
+               xfs_bioerror_relse(bp);
+               return;
+       }
+
+       xfs_buf_iorequest(bp);
+}
+
+STATIC void
+_xfs_buf_ioend(
+       xfs_buf_t               *bp,
+       int                     schedule)
+{
+       if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
+               xfs_buf_ioend(bp, schedule);
+}
+
+STATIC void
+xfs_buf_bio_end_io(
+       struct bio              *bio,
+       int                     error)
+{
+       xfs_buf_t               *bp = (xfs_buf_t *)bio->bi_private;
+
+       xfs_buf_ioerror(bp, -error);
+
+       if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
+               invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
+
+       _xfs_buf_ioend(bp, 1);
+       bio_put(bio);
+}
+
+STATIC void
+_xfs_buf_ioapply(
+       xfs_buf_t               *bp)
+{
+       int                     rw, map_i, total_nr_pages, nr_pages;
+       struct bio              *bio;
+       int                     offset = bp->b_offset;
+       int                     size = bp->b_count_desired;
+       sector_t                sector = bp->b_bn;
+
+       total_nr_pages = bp->b_page_count;
+       map_i = 0;
+
+       if (bp->b_flags & XBF_WRITE) {
+               if (bp->b_flags & XBF_SYNCIO)
+                       rw = WRITE_SYNC;
+               else
+                       rw = WRITE;
+               if (bp->b_flags & XBF_FUA)
+                       rw |= REQ_FUA;
+               if (bp->b_flags & XBF_FLUSH)
+                       rw |= REQ_FLUSH;
+       } else if (bp->b_flags & XBF_READ_AHEAD) {
+               rw = READA;
+       } else {
+               rw = READ;
+       }
+
+       /* we only use the buffer cache for meta-data */
+       rw |= REQ_META;
+
+next_chunk:
+       atomic_inc(&bp->b_io_remaining);
+       nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
+       if (nr_pages > total_nr_pages)
+               nr_pages = total_nr_pages;
+
+       bio = bio_alloc(GFP_NOIO, nr_pages);
+       bio->bi_bdev = bp->b_target->bt_bdev;
+       bio->bi_sector = sector;
+       bio->bi_end_io = xfs_buf_bio_end_io;
+       bio->bi_private = bp;
+
+
+       for (; size && nr_pages; nr_pages--, map_i++) {
+               int     rbytes, nbytes = PAGE_SIZE - offset;
+
+               if (nbytes > size)
+                       nbytes = size;
+
+               rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset);
+               if (rbytes < nbytes)
+                       break;
+
+               offset = 0;
+               sector += nbytes >> BBSHIFT;
+               size -= nbytes;
+               total_nr_pages--;
+       }
+
+       if (likely(bio->bi_size)) {
+               if (xfs_buf_is_vmapped(bp)) {
+                       flush_kernel_vmap_range(bp->b_addr,
+                                               xfs_buf_vmap_len(bp));
+               }
+               submit_bio(rw, bio);
+               if (size)
+                       goto next_chunk;
+       } else {
+               xfs_buf_ioerror(bp, EIO);
+               bio_put(bio);
+       }
+}
+
+int
+xfs_buf_iorequest(
+       xfs_buf_t               *bp)
+{
+       trace_xfs_buf_iorequest(bp, _RET_IP_);
+
+       if (bp->b_flags & XBF_DELWRI) {
+               xfs_buf_delwri_queue(bp, 1);
+               return 0;
+       }
+
+       if (bp->b_flags & XBF_WRITE) {
+               xfs_buf_wait_unpin(bp);
+       }
+
+       xfs_buf_hold(bp);
+
+       /* Set the count to 1 initially, this will stop an I/O
+        * completion callout which happens before we have started
+        * all the I/O from calling xfs_buf_ioend too early.
+        */
+       atomic_set(&bp->b_io_remaining, 1);
+       _xfs_buf_ioapply(bp);
+       _xfs_buf_ioend(bp, 0);
+
+       xfs_buf_rele(bp);
+       return 0;
+}
+
+/*
+ *     Waits for I/O to complete on the buffer supplied.
+ *     It returns immediately if no I/O is pending.
+ *     It returns the I/O error code, if any, or 0 if there was no error.
+ */
+int
+xfs_buf_iowait(
+       xfs_buf_t               *bp)
+{
+       trace_xfs_buf_iowait(bp, _RET_IP_);
+
+       wait_for_completion(&bp->b_iowait);
+
+       trace_xfs_buf_iowait_done(bp, _RET_IP_);
+       return bp->b_error;
+}
+
+xfs_caddr_t
+xfs_buf_offset(
+       xfs_buf_t               *bp,
+       size_t                  offset)
+{
+       struct page             *page;
+
+       if (bp->b_flags & XBF_MAPPED)
+               return bp->b_addr + offset;
+
+       offset += bp->b_offset;
+       page = bp->b_pages[offset >> PAGE_SHIFT];
+       return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1));
+}
+
+/*
+ *     Move data into or out of a buffer.
+ */
+void
+xfs_buf_iomove(
+       xfs_buf_t               *bp,    /* buffer to process            */
+       size_t                  boff,   /* starting buffer offset       */
+       size_t                  bsize,  /* length to copy               */
+       void                    *data,  /* data address                 */
+       xfs_buf_rw_t            mode)   /* read/write/zero flag         */
+{
+       size_t                  bend, cpoff, csize;
+       struct page             *page;
+
+       bend = boff + bsize;
+       while (boff < bend) {
+               page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
+               cpoff = xfs_buf_poff(boff + bp->b_offset);
+               csize = min_t(size_t,
+                             PAGE_SIZE-cpoff, bp->b_count_desired-boff);
+
+               ASSERT(((csize + cpoff) <= PAGE_SIZE));
+
+               switch (mode) {
+               case XBRW_ZERO:
+                       memset(page_address(page) + cpoff, 0, csize);
+                       break;
+               case XBRW_READ:
+                       memcpy(data, page_address(page) + cpoff, csize);
+                       break;
+               case XBRW_WRITE:
+                       memcpy(page_address(page) + cpoff, data, csize);
+               }
+
+               boff += csize;
+               data += csize;
+       }
+}
+
+/*
+ *     Handling of buffer targets (buftargs).
+ */
+
+/*
+ * Wait for any bufs with callbacks that have been submitted but have not yet
+ * returned. These buffers will have an elevated hold count, so wait on those
+ * while freeing all the buffers only held by the LRU.
+ */
+void
+xfs_wait_buftarg(
+       struct xfs_buftarg      *btp)
+{
+       struct xfs_buf          *bp;
+
+restart:
+       spin_lock(&btp->bt_lru_lock);
+       while (!list_empty(&btp->bt_lru)) {
+               bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
+               if (atomic_read(&bp->b_hold) > 1) {
+                       spin_unlock(&btp->bt_lru_lock);
+                       delay(100);
+                       goto restart;
+               }
+               /*
+                * clear the LRU reference count so the bufer doesn't get
+                * ignored in xfs_buf_rele().
+                */
+               atomic_set(&bp->b_lru_ref, 0);
+               spin_unlock(&btp->bt_lru_lock);
+               xfs_buf_rele(bp);
+               spin_lock(&btp->bt_lru_lock);
+       }
+       spin_unlock(&btp->bt_lru_lock);
+}
+
+int
+xfs_buftarg_shrink(
+       struct shrinker         *shrink,
+       struct shrink_control   *sc)
+{
+       struct xfs_buftarg      *btp = container_of(shrink,
+                                       struct xfs_buftarg, bt_shrinker);
+       struct xfs_buf          *bp;
+       int nr_to_scan = sc->nr_to_scan;
+       LIST_HEAD(dispose);
+
+       if (!nr_to_scan)
+               return btp->bt_lru_nr;
+
+       spin_lock(&btp->bt_lru_lock);
+       while (!list_empty(&btp->bt_lru)) {
+               if (nr_to_scan-- <= 0)
+                       break;
+
+               bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
+
+               /*
+                * Decrement the b_lru_ref count unless the value is already
+                * zero. If the value is already zero, we need to reclaim the
+                * buffer, otherwise it gets another trip through the LRU.
+                */
+               if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
+                       list_move_tail(&bp->b_lru, &btp->bt_lru);
+                       continue;
+               }
+
+               /*
+                * remove the buffer from the LRU now to avoid needing another
+                * lock round trip inside xfs_buf_rele().
+                */
+               list_move(&bp->b_lru, &dispose);
+               btp->bt_lru_nr--;
+       }
+       spin_unlock(&btp->bt_lru_lock);
+
+       while (!list_empty(&dispose)) {
+               bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
+               list_del_init(&bp->b_lru);
+               xfs_buf_rele(bp);
+       }
+
+       return btp->bt_lru_nr;
+}
+
+void
+xfs_free_buftarg(
+       struct xfs_mount        *mp,
+       struct xfs_buftarg      *btp)
+{
+       unregister_shrinker(&btp->bt_shrinker);
+
+       xfs_flush_buftarg(btp, 1);
+       if (mp->m_flags & XFS_MOUNT_BARRIER)
+               xfs_blkdev_issue_flush(btp);
+
+       kthread_stop(btp->bt_task);
+       kmem_free(btp);
+}
+
+STATIC int
+xfs_setsize_buftarg_flags(
+       xfs_buftarg_t           *btp,
+       unsigned int            blocksize,
+       unsigned int            sectorsize,
+       int                     verbose)
+{
+       btp->bt_bsize = blocksize;
+       btp->bt_sshift = ffs(sectorsize) - 1;
+       btp->bt_smask = sectorsize - 1;
+
+       if (set_blocksize(btp->bt_bdev, sectorsize)) {
+               xfs_warn(btp->bt_mount,
+                       "Cannot set_blocksize to %u on device %s\n",
+                       sectorsize, xfs_buf_target_name(btp));
+               return EINVAL;
+       }
+
+       return 0;
+}
+
+/*
+ *     When allocating the initial buffer target we have not yet
+ *     read in the superblock, so don't know what sized sectors
+ *     are being used is at this early stage.  Play safe.
+ */
+STATIC int
+xfs_setsize_buftarg_early(
+       xfs_buftarg_t           *btp,
+       struct block_device     *bdev)
+{
+       return xfs_setsize_buftarg_flags(btp,
+                       PAGE_SIZE, bdev_logical_block_size(bdev), 0);
+}
+
+int
+xfs_setsize_buftarg(
+       xfs_buftarg_t           *btp,
+       unsigned int            blocksize,
+       unsigned int            sectorsize)
+{
+       return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);
+}
+
+STATIC int
+xfs_alloc_delwrite_queue(
+       xfs_buftarg_t           *btp,
+       const char              *fsname)
+{
+       INIT_LIST_HEAD(&btp->bt_delwrite_queue);
+       spin_lock_init(&btp->bt_delwrite_lock);
+       btp->bt_flags = 0;
+       btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
+       if (IS_ERR(btp->bt_task))
+               return PTR_ERR(btp->bt_task);
+       return 0;
+}
+
+xfs_buftarg_t *
+xfs_alloc_buftarg(
+       struct xfs_mount        *mp,
+       struct block_device     *bdev,
+       int                     external,
+       const char              *fsname)
+{
+       xfs_buftarg_t           *btp;
+
+       btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
+
+       btp->bt_mount = mp;
+       btp->bt_dev =  bdev->bd_dev;
+       btp->bt_bdev = bdev;
+       btp->bt_bdi = blk_get_backing_dev_info(bdev);
+       if (!btp->bt_bdi)
+               goto error;
+
+       INIT_LIST_HEAD(&btp->bt_lru);
+       spin_lock_init(&btp->bt_lru_lock);
+       if (xfs_setsize_buftarg_early(btp, bdev))
+               goto error;
+       if (xfs_alloc_delwrite_queue(btp, fsname))
+               goto error;
+       btp->bt_shrinker.shrink = xfs_buftarg_shrink;
+       btp->bt_shrinker.seeks = DEFAULT_SEEKS;
+       register_shrinker(&btp->bt_shrinker);
+       return btp;
+
+error:
+       kmem_free(btp);
+       return NULL;
+}
+
+
+/*
+ *     Delayed write buffer handling
+ */
+STATIC void
+xfs_buf_delwri_queue(
+       xfs_buf_t               *bp,
+       int                     unlock)
+{
+       struct list_head        *dwq = &bp->b_target->bt_delwrite_queue;
+       spinlock_t              *dwlk = &bp->b_target->bt_delwrite_lock;
+
+       trace_xfs_buf_delwri_queue(bp, _RET_IP_);
+
+       ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));
+
+       spin_lock(dwlk);
+       /* If already in the queue, dequeue and place at tail */
+       if (!list_empty(&bp->b_list)) {
+               ASSERT(bp->b_flags & _XBF_DELWRI_Q);
+               if (unlock)
+                       atomic_dec(&bp->b_hold);
+               list_del(&bp->b_list);
+       }
+
+       if (list_empty(dwq)) {
+               /* start xfsbufd as it is about to have something to do */
+               wake_up_process(bp->b_target->bt_task);
+       }
+
+       bp->b_flags |= _XBF_DELWRI_Q;
+       list_add_tail(&bp->b_list, dwq);
+       bp->b_queuetime = jiffies;
+       spin_unlock(dwlk);
+
+       if (unlock)
+               xfs_buf_unlock(bp);
+}
+
+void
+xfs_buf_delwri_dequeue(
+       xfs_buf_t               *bp)
+{
+       spinlock_t              *dwlk = &bp->b_target->bt_delwrite_lock;
+       int                     dequeued = 0;
+
+       spin_lock(dwlk);
+       if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) {
+               ASSERT(bp->b_flags & _XBF_DELWRI_Q);
+               list_del_init(&bp->b_list);
+               dequeued = 1;
+       }
+       bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
+       spin_unlock(dwlk);
+
+       if (dequeued)
+               xfs_buf_rele(bp);
+
+       trace_xfs_buf_delwri_dequeue(bp, _RET_IP_);
+}
+
+/*
+ * If a delwri buffer needs to be pushed before it has aged out, then promote
+ * it to the head of the delwri queue so that it will be flushed on the next
+ * xfsbufd run. We do this by resetting the queuetime of the buffer to be older
+ * than the age currently needed to flush the buffer. Hence the next time the
+ * xfsbufd sees it is guaranteed to be considered old enough to flush.
+ */
+void
+xfs_buf_delwri_promote(
+       struct xfs_buf  *bp)
+{
+       struct xfs_buftarg *btp = bp->b_target;
+       long            age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1;
+
+       ASSERT(bp->b_flags & XBF_DELWRI);
+       ASSERT(bp->b_flags & _XBF_DELWRI_Q);
+
+       /*
+        * Check the buffer age before locking the delayed write queue as we
+        * don't need to promote buffers that are already past the flush age.
+        */
+       if (bp->b_queuetime < jiffies - age)
+               return;
+       bp->b_queuetime = jiffies - age;
+       spin_lock(&btp->bt_delwrite_lock);
+       list_move(&bp->b_list, &btp->bt_delwrite_queue);
+       spin_unlock(&btp->bt_delwrite_lock);
+}
+
+STATIC void
+xfs_buf_runall_queues(
+       struct workqueue_struct *queue)
+{
+       flush_workqueue(queue);
+}
+
+/*
+ * Move as many buffers as specified to the supplied list
+ * idicating if we skipped any buffers to prevent deadlocks.
+ */
+STATIC int
+xfs_buf_delwri_split(
+       xfs_buftarg_t   *target,
+       struct list_head *list,
+       unsigned long   age)
+{
+       xfs_buf_t       *bp, *n;
+       struct list_head *dwq = &target->bt_delwrite_queue;
+       spinlock_t      *dwlk = &target->bt_delwrite_lock;
+       int             skipped = 0;
+       int             force;
+
+       force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
+       INIT_LIST_HEAD(list);
+       spin_lock(dwlk);
+       list_for_each_entry_safe(bp, n, dwq, b_list) {
+               ASSERT(bp->b_flags & XBF_DELWRI);
+
+               if (!xfs_buf_ispinned(bp) && xfs_buf_trylock(bp)) {
+                       if (!force &&
+                           time_before(jiffies, bp->b_queuetime + age)) {
+                               xfs_buf_unlock(bp);
+                               break;
+                       }
+
+                       bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q);
+                       bp->b_flags |= XBF_WRITE;
+                       list_move_tail(&bp->b_list, list);
+                       trace_xfs_buf_delwri_split(bp, _RET_IP_);
+               } else
+                       skipped++;
+       }
+       spin_unlock(dwlk);
+
+       return skipped;
+
+}
+
+/*
+ * Compare function is more complex than it needs to be because
+ * the return value is only 32 bits and we are doing comparisons
+ * on 64 bit values
+ */
+static int
+xfs_buf_cmp(
+       void            *priv,
+       struct list_head *a,
+       struct list_head *b)
+{
+       struct xfs_buf  *ap = container_of(a, struct xfs_buf, b_list);
+       struct xfs_buf  *bp = container_of(b, struct xfs_buf, b_list);
+       xfs_daddr_t             diff;
+
+       diff = ap->b_bn - bp->b_bn;
+       if (diff < 0)
+               return -1;
+       if (diff > 0)
+               return 1;
+       return 0;
+}
+
+STATIC int
+xfsbufd(
+       void            *data)
+{
+       xfs_buftarg_t   *target = (xfs_buftarg_t *)data;
+
+       current->flags |= PF_MEMALLOC;
+
+       set_freezable();
+
+       do {
+               long    age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
+               long    tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
+               struct list_head tmp;
+               struct blk_plug plug;
+
+               if (unlikely(freezing(current))) {
+                       set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
+                       refrigerator();
+               } else {
+                       clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
+               }
+
+               /* sleep for a long time if there is nothing to do. */
+               if (list_empty(&target->bt_delwrite_queue))
+                       tout = MAX_SCHEDULE_TIMEOUT;
+               schedule_timeout_interruptible(tout);
+
+               xfs_buf_delwri_split(target, &tmp, age);
+               list_sort(NULL, &tmp, xfs_buf_cmp);
+
+               blk_start_plug(&plug);
+               while (!list_empty(&tmp)) {
+                       struct xfs_buf *bp;
+                       bp = list_first_entry(&tmp, struct xfs_buf, b_list);
+                       list_del_init(&bp->b_list);
+                       xfs_bdstrat_cb(bp);
+               }
+               blk_finish_plug(&plug);
+       } while (!kthread_should_stop());
+
+       return 0;
+}
+
+/*
+ *     Go through all incore buffers, and release buffers if they belong to
+ *     the given device. This is used in filesystem error handling to
+ *     preserve the consistency of its metadata.
+ */
+int
+xfs_flush_buftarg(
+       xfs_buftarg_t   *target,
+       int             wait)
+{
+       xfs_buf_t       *bp;
+       int             pincount = 0;
+       LIST_HEAD(tmp_list);
+       LIST_HEAD(wait_list);
+       struct blk_plug plug;
+
+       xfs_buf_runall_queues(xfsconvertd_workqueue);
+       xfs_buf_runall_queues(xfsdatad_workqueue);
+       xfs_buf_runall_queues(xfslogd_workqueue);
+
+       set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
+       pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
+
+       /*
+        * Dropped the delayed write list lock, now walk the temporary list.
+        * All I/O is issued async and then if we need to wait for completion
+        * we do that after issuing all the IO.
+        */
+       list_sort(NULL, &tmp_list, xfs_buf_cmp);
+
+       blk_start_plug(&plug);
+       while (!list_empty(&tmp_list)) {
+               bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
+               ASSERT(target == bp->b_target);
+               list_del_init(&bp->b_list);
+               if (wait) {
+                       bp->b_flags &= ~XBF_ASYNC;
+                       list_add(&bp->b_list, &wait_list);
+               }
+               xfs_bdstrat_cb(bp);
+       }
+       blk_finish_plug(&plug);
+
+       if (wait) {
+               /* Wait for IO to complete. */
+               while (!list_empty(&wait_list)) {
+                       bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
+
+                       list_del_init(&bp->b_list);
+                       xfs_buf_iowait(bp);
+                       xfs_buf_relse(bp);
+               }
+       }
+
+       return pincount;
+}
+
+int __init
+xfs_buf_init(void)
+{
+       xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
+                                               KM_ZONE_HWALIGN, NULL);
+       if (!xfs_buf_zone)
+               goto out;
+
+       xfslogd_workqueue = alloc_workqueue("xfslogd",
+                                       WQ_MEM_RECLAIM | WQ_HIGHPRI, 1);
+       if (!xfslogd_workqueue)
+               goto out_free_buf_zone;
+
+       xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
+       if (!xfsdatad_workqueue)
+               goto out_destroy_xfslogd_workqueue;
+
+       xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
+                                               WQ_MEM_RECLAIM, 1);
+       if (!xfsconvertd_workqueue)
+               goto out_destroy_xfsdatad_workqueue;
+
+       return 0;
+
+ out_destroy_xfsdatad_workqueue:
+       destroy_workqueue(xfsdatad_workqueue);
+ out_destroy_xfslogd_workqueue:
+       destroy_workqueue(xfslogd_workqueue);
+ out_free_buf_zone:
+       kmem_zone_destroy(xfs_buf_zone);
+ out:
+       return -ENOMEM;
+}
+
+void
+xfs_buf_terminate(void)
+{
+       destroy_workqueue(xfsconvertd_workqueue);
+       destroy_workqueue(xfsdatad_workqueue);
+       destroy_workqueue(xfslogd_workqueue);
+       kmem_zone_destroy(xfs_buf_zone);
+}
+
+#ifdef CONFIG_KDB_MODULES
+struct list_head *
+xfs_get_buftarg_list(void)
+{
+       return &xfs_buftarg_list;
+}
+#endif
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h

new file mode 100644 (file)

index 0000000..620972b
--- /dev/null
+++ b/fs/xfs/xfs_buf.h
@@ -0,0 +1,326 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_BUF_H__
+#define __XFS_BUF_H__
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <asm/system.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/uio.h>
+
+/*
+ *     Base types
+ */
+
+#define XFS_BUF_DADDR_NULL     ((xfs_daddr_t) (-1LL))
+
+#define xfs_buf_ctob(pp)       ((pp) * PAGE_CACHE_SIZE)
+#define xfs_buf_btoc(dd)       (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT)
+#define xfs_buf_btoct(dd)      ((dd) >> PAGE_CACHE_SHIFT)
+#define xfs_buf_poff(aa)       ((aa) & ~PAGE_CACHE_MASK)
+
+typedef enum {
+       XBRW_READ = 1,                  /* transfer into target memory */
+       XBRW_WRITE = 2,                 /* transfer from target memory */
+       XBRW_ZERO = 3,                  /* Zero target memory */
+} xfs_buf_rw_t;
+
+#define XBF_READ       (1 << 0) /* buffer intended for reading from device */
+#define XBF_WRITE      (1 << 1) /* buffer intended for writing to device */
+#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */
+#define XBF_MAPPED     (1 << 3) /* buffer mapped (b_addr valid) */
+#define XBF_ASYNC      (1 << 4) /* initiator will not wait for completion */
+#define XBF_DONE       (1 << 5) /* all pages in the buffer uptodate */
+#define XBF_DELWRI     (1 << 6) /* buffer has dirty pages */
+#define XBF_STALE      (1 << 7) /* buffer has been staled, do not find it */
+
+/* I/O hints for the BIO layer */
+#define XBF_SYNCIO     (1 << 10)/* treat this buffer as synchronous I/O */
+#define XBF_FUA                (1 << 11)/* force cache write through mode */
+#define XBF_FLUSH      (1 << 12)/* flush the disk cache before a write */
+
+/* flags used only as arguments to access routines */
+#define XBF_LOCK       (1 << 15)/* lock requested */
+#define XBF_TRYLOCK    (1 << 16)/* lock requested, but do not wait */
+#define XBF_DONT_BLOCK (1 << 17)/* do not block in current thread */
+
+/* flags used only internally */
+#define _XBF_PAGES     (1 << 20)/* backed by refcounted pages */
+#define _XBF_KMEM      (1 << 21)/* backed by heap memory */
+#define _XBF_DELWRI_Q  (1 << 22)/* buffer on delwri queue */
+
+typedef unsigned int xfs_buf_flags_t;
+
+#define XFS_BUF_FLAGS \
+       { XBF_READ,             "READ" }, \
+       { XBF_WRITE,            "WRITE" }, \
+       { XBF_READ_AHEAD,       "READ_AHEAD" }, \
+       { XBF_MAPPED,           "MAPPED" }, \
+       { XBF_ASYNC,            "ASYNC" }, \
+       { XBF_DONE,             "DONE" }, \
+       { XBF_DELWRI,           "DELWRI" }, \
+       { XBF_STALE,            "STALE" }, \
+       { XBF_SYNCIO,           "SYNCIO" }, \
+       { XBF_FUA,              "FUA" }, \
+       { XBF_FLUSH,            "FLUSH" }, \
+       { XBF_LOCK,             "LOCK" },       /* should never be set */\
+       { XBF_TRYLOCK,          "TRYLOCK" },    /* ditto */\
+       { XBF_DONT_BLOCK,       "DONT_BLOCK" }, /* ditto */\
+       { _XBF_PAGES,           "PAGES" }, \
+       { _XBF_KMEM,            "KMEM" }, \
+       { _XBF_DELWRI_Q,        "DELWRI_Q" }
+
+typedef enum {
+       XBT_FORCE_SLEEP = 0,
+       XBT_FORCE_FLUSH = 1,
+} xfs_buftarg_flags_t;
+
+typedef struct xfs_buftarg {
+       dev_t                   bt_dev;
+       struct block_device     *bt_bdev;
+       struct backing_dev_info *bt_bdi;
+       struct xfs_mount        *bt_mount;
+       unsigned int            bt_bsize;
+       unsigned int            bt_sshift;
+       size_t                  bt_smask;
+
+       /* per device delwri queue */
+       struct task_struct      *bt_task;
+       struct list_head        bt_delwrite_queue;
+       spinlock_t              bt_delwrite_lock;
+       unsigned long           bt_flags;
+
+       /* LRU control structures */
+       struct shrinker         bt_shrinker;
+       struct list_head        bt_lru;
+       spinlock_t              bt_lru_lock;
+       unsigned int            bt_lru_nr;
+} xfs_buftarg_t;
+
+struct xfs_buf;
+typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
+
+#define XB_PAGES       2
+
+typedef struct xfs_buf {
+       /*
+        * first cacheline holds all the fields needed for an uncontended cache
+        * hit to be fully processed. The semaphore straddles the cacheline
+        * boundary, but the counter and lock sits on the first cacheline,
+        * which is the only bit that is touched if we hit the semaphore
+        * fast-path on locking.
+        */
+       struct rb_node          b_rbnode;       /* rbtree node */
+       xfs_off_t               b_file_offset;  /* offset in file */
+       size_t                  b_buffer_length;/* size of buffer in bytes */
+       atomic_t                b_hold;         /* reference count */
+       atomic_t                b_lru_ref;      /* lru reclaim ref count */
+       xfs_buf_flags_t         b_flags;        /* status flags */
+       struct semaphore        b_sema;         /* semaphore for lockables */
+
+       struct list_head        b_lru;          /* lru list */
+       wait_queue_head_t       b_waiters;      /* unpin waiters */
+       struct list_head        b_list;
+       struct xfs_perag        *b_pag;         /* contains rbtree root */
+       xfs_buftarg_t           *b_target;      /* buffer target (device) */
+       xfs_daddr_t             b_bn;           /* block number for I/O */
+       size_t                  b_count_desired;/* desired transfer size */
+       void                    *b_addr;        /* virtual address of buffer */
+       struct work_struct      b_iodone_work;
+       xfs_buf_iodone_t        b_iodone;       /* I/O completion function */
+       struct completion       b_iowait;       /* queue for I/O waiters */
+       void                    *b_fspriv;
+       struct xfs_trans        *b_transp;
+       struct page             **b_pages;      /* array of page pointers */
+       struct page             *b_page_array[XB_PAGES]; /* inline pages */
+       unsigned long           b_queuetime;    /* time buffer was queued */
+       atomic_t                b_pin_count;    /* pin count */
+       atomic_t                b_io_remaining; /* #outstanding I/O requests */
+       unsigned int            b_page_count;   /* size of page array */
+       unsigned int            b_offset;       /* page offset in first page */
+       unsigned short          b_error;        /* error code on I/O */
+#ifdef XFS_BUF_LOCK_TRACKING
+       int                     b_last_holder;
+#endif
+} xfs_buf_t;
+
+
+/* Finding and Reading Buffers */
+extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,
+                               xfs_buf_flags_t, xfs_buf_t *);
+#define xfs_incore(buftarg,blkno,len,lockit) \
+       _xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
+
+extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t,
+                               xfs_buf_flags_t);
+extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
+                               xfs_buf_flags_t);
+
+extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
+extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len);
+extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
+extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
+extern void xfs_buf_hold(xfs_buf_t *);
+extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t);
+struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp,
+                               struct xfs_buftarg *target,
+                               xfs_daddr_t daddr, size_t length, int flags);
+
+/* Releasing Buffers */
+extern void xfs_buf_free(xfs_buf_t *);
+extern void xfs_buf_rele(xfs_buf_t *);
+
+/* Locking and Unlocking Buffers */
+extern int xfs_buf_trylock(xfs_buf_t *);
+extern void xfs_buf_lock(xfs_buf_t *);
+extern void xfs_buf_unlock(xfs_buf_t *);
+#define xfs_buf_islocked(bp) \
+       ((bp)->b_sema.count <= 0)
+
+/* Buffer Read and Write Routines */
+extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
+extern void xfs_bdwrite(void *mp, xfs_buf_t *bp);
+
+extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
+extern int xfs_bdstrat_cb(struct xfs_buf *);
+
+extern void xfs_buf_ioend(xfs_buf_t *, int);
+extern void xfs_buf_ioerror(xfs_buf_t *, int);
+extern int xfs_buf_iorequest(xfs_buf_t *);
+extern int xfs_buf_iowait(xfs_buf_t *);
+extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
+                               xfs_buf_rw_t);
+#define xfs_buf_zero(bp, off, len) \
+           xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
+
+static inline int xfs_buf_geterror(xfs_buf_t *bp)
+{
+       return bp ? bp->b_error : ENOMEM;
+}
+
+/* Buffer Utility Routines */
+extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
+
+/* Delayed Write Buffer Routines */
+extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
+extern void xfs_buf_delwri_promote(xfs_buf_t *);
+
+/* Buffer Daemon Setup Routines */
+extern int xfs_buf_init(void);
+extern void xfs_buf_terminate(void);
+
+static inline const char *
+xfs_buf_target_name(struct xfs_buftarg *target)
+{
+       static char __b[BDEVNAME_SIZE];
+
+       return bdevname(target->bt_bdev, __b);
+}
+
+
+#define XFS_BUF_ZEROFLAGS(bp) \
+       ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \
+                           XBF_SYNCIO|XBF_FUA|XBF_FLUSH))
+
+void xfs_buf_stale(struct xfs_buf *bp);
+#define XFS_BUF_STALE(bp)      xfs_buf_stale(bp);
+#define XFS_BUF_UNSTALE(bp)    ((bp)->b_flags &= ~XBF_STALE)
+#define XFS_BUF_ISSTALE(bp)    ((bp)->b_flags & XBF_STALE)
+#define XFS_BUF_SUPER_STALE(bp)        do {                            \
+                                       XFS_BUF_STALE(bp);      \
+                                       xfs_buf_delwri_dequeue(bp);     \
+                                       XFS_BUF_DONE(bp);       \
+                               } while (0)
+
+#define XFS_BUF_DELAYWRITE(bp)         ((bp)->b_flags |= XBF_DELWRI)
+#define XFS_BUF_UNDELAYWRITE(bp)       xfs_buf_delwri_dequeue(bp)
+#define XFS_BUF_ISDELAYWRITE(bp)       ((bp)->b_flags & XBF_DELWRI)
+
+#define XFS_BUF_DONE(bp)       ((bp)->b_flags |= XBF_DONE)
+#define XFS_BUF_UNDONE(bp)     ((bp)->b_flags &= ~XBF_DONE)
+#define XFS_BUF_ISDONE(bp)     ((bp)->b_flags & XBF_DONE)
+
+#define XFS_BUF_ASYNC(bp)      ((bp)->b_flags |= XBF_ASYNC)
+#define XFS_BUF_UNASYNC(bp)    ((bp)->b_flags &= ~XBF_ASYNC)
+#define XFS_BUF_ISASYNC(bp)    ((bp)->b_flags & XBF_ASYNC)
+
+#define XFS_BUF_READ(bp)       ((bp)->b_flags |= XBF_READ)
+#define XFS_BUF_UNREAD(bp)     ((bp)->b_flags &= ~XBF_READ)
+#define XFS_BUF_ISREAD(bp)     ((bp)->b_flags & XBF_READ)
+
+#define XFS_BUF_WRITE(bp)      ((bp)->b_flags |= XBF_WRITE)
+#define XFS_BUF_UNWRITE(bp)    ((bp)->b_flags &= ~XBF_WRITE)
+#define XFS_BUF_ISWRITE(bp)    ((bp)->b_flags & XBF_WRITE)
+
+#define XFS_BUF_ADDR(bp)               ((bp)->b_bn)
+#define XFS_BUF_SET_ADDR(bp, bno)      ((bp)->b_bn = (xfs_daddr_t)(bno))
+#define XFS_BUF_OFFSET(bp)             ((bp)->b_file_offset)
+#define XFS_BUF_SET_OFFSET(bp, off)    ((bp)->b_file_offset = (off))
+#define XFS_BUF_COUNT(bp)              ((bp)->b_count_desired)
+#define XFS_BUF_SET_COUNT(bp, cnt)     ((bp)->b_count_desired = (cnt))
+#define XFS_BUF_SIZE(bp)               ((bp)->b_buffer_length)
+#define XFS_BUF_SET_SIZE(bp, cnt)      ((bp)->b_buffer_length = (cnt))
+
+static inline void
+xfs_buf_set_ref(
+       struct xfs_buf  *bp,
+       int             lru_ref)
+{
+       atomic_set(&bp->b_lru_ref, lru_ref);
+}
+#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)   xfs_buf_set_ref(bp, ref)
+#define XFS_BUF_SET_VTYPE(bp, type)            do { } while (0)
+
+static inline int xfs_buf_ispinned(struct xfs_buf *bp)
+{
+       return atomic_read(&bp->b_pin_count);
+}
+
+#define XFS_BUF_FINISH_IOWAIT(bp)      complete(&bp->b_iowait);
+
+static inline void xfs_buf_relse(xfs_buf_t *bp)
+{
+       xfs_buf_unlock(bp);
+       xfs_buf_rele(bp);
+}
+
+/*
+ *     Handling of buftargs.
+ */
+extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
+                       struct block_device *, int, const char *);
+extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
+extern void xfs_wait_buftarg(xfs_buftarg_t *);
+extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
+extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
+
+#ifdef CONFIG_KDB_MODULES
+extern struct list_head *xfs_get_buftarg_list(void);
+#endif
+
+#define xfs_getsize_buftarg(buftarg)   block_size((buftarg)->bt_bdev)
+#define xfs_readonly_buftarg(buftarg)  bdev_read_only((buftarg)->bt_bdev)
+
+#define xfs_binval(buftarg)            xfs_flush_buftarg(buftarg, 1)
+#define XFS_bflush(buftarg)            xfs_flush_buftarg(buftarg, 1)
+
+#endif /* __XFS_BUF_H__ */
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c

index 88492916c3dc0ed9b3bdd1aead549c8b00d87bbc..cac2ecfa674684762cce50d5acd323f79b8b4d5a 100644 (file)
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -124,9 +124,9 @@ xfs_buf_item_log_check(
  
         bp = bip->bli_buf;
         ASSERT(XFS_BUF_COUNT(bp) > 0);
-       ASSERT(XFS_BUF_PTR(bp) != NULL);
+       ASSERT(bp->b_addr != NULL);
         orig = bip->bli_orig;
-       buffer = XFS_BUF_PTR(bp);
+       buffer = bp->b_addr;
         for (x = 0; x < XFS_BUF_COUNT(bp); x++) {
                 if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) {
                         xfs_emerg(bp->b_mount,
@@ -371,7 +371,6 @@ xfs_buf_item_pin(
  {
         struct xfs_buf_log_item *bip = BUF_ITEM(lip);
  
-       ASSERT(XFS_BUF_ISBUSY(bip->bli_buf));
         ASSERT(atomic_read(&bip->bli_refcount) > 0);
         ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
                (bip->bli_flags & XFS_BLI_STALE));
@@ -479,13 +478,13 @@ xfs_buf_item_trylock(
         struct xfs_buf_log_item *bip = BUF_ITEM(lip);
         struct xfs_buf          *bp = bip->bli_buf;
  
-       if (XFS_BUF_ISPINNED(bp))
+       if (xfs_buf_ispinned(bp))
                 return XFS_ITEM_PINNED;
         if (!xfs_buf_trylock(bp))
                 return XFS_ITEM_LOCKED;
  
         /* take a reference to the buffer.  */
-       XFS_BUF_HOLD(bp);
+       xfs_buf_hold(bp);
  
         ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
         trace_xfs_buf_item_trylock(bip);
@@ -726,7 +725,7 @@ xfs_buf_item_init(
          * to have logged.
          */
         bip->bli_orig = (char *)kmem_alloc(XFS_BUF_COUNT(bp), KM_SLEEP);
-       memcpy(bip->bli_orig, XFS_BUF_PTR(bp), XFS_BUF_COUNT(bp));
+       memcpy(bip->bli_orig, bp->b_addr, XFS_BUF_COUNT(bp));
         bip->bli_logged = (char *)kmem_zalloc(XFS_BUF_COUNT(bp) / NBBY, KM_SLEEP);
  #endif
  
@@ -895,7 +894,6 @@ xfs_buf_attach_iodone(
  {
         xfs_log_item_t  *head_lip;
  
-       ASSERT(XFS_BUF_ISBUSY(bp));
         ASSERT(xfs_buf_islocked(bp));
  
         lip->li_cb = cb;
@@ -960,7 +958,7 @@ xfs_buf_iodone_callbacks(
         static ulong            lasttime;
         static xfs_buftarg_t    *lasttarg;
  
-       if (likely(!XFS_BUF_GETERROR(bp)))
+       if (likely(!xfs_buf_geterror(bp)))
                 goto do_callbacks;
  
         /*
@@ -973,14 +971,14 @@ xfs_buf_iodone_callbacks(
                 goto do_callbacks;
         }
  
-       if (XFS_BUF_TARGET(bp) != lasttarg ||
+       if (bp->b_target != lasttarg ||
             time_after(jiffies, (lasttime + 5*HZ))) {
                 lasttime = jiffies;
                 xfs_alert(mp, "Device %s: metadata write error block 0x%llx",
-                       XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
+                       xfs_buf_target_name(bp->b_target),
                       (__uint64_t)XFS_BUF_ADDR(bp));
         }
-       lasttarg = XFS_BUF_TARGET(bp);
+       lasttarg = bp->b_target;
  
         /*
          * If the write was asynchronous then no one will be looking for the
@@ -991,12 +989,11 @@ xfs_buf_iodone_callbacks(
          * around.
          */
         if (XFS_BUF_ISASYNC(bp)) {
-               XFS_BUF_ERROR(bp, 0); /* errno of 0 unsets the flag */
+               xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */
  
                 if (!XFS_BUF_ISSTALE(bp)) {
                         XFS_BUF_DELAYWRITE(bp);
                         XFS_BUF_DONE(bp);
-                       XFS_BUF_SET_START(bp);
                 }
                 ASSERT(bp->b_iodone != NULL);
                 trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
@@ -1013,7 +1010,6 @@ xfs_buf_iodone_callbacks(
         XFS_BUF_UNDELAYWRITE(bp);
  
         trace_xfs_buf_error_relse(bp, _RET_IP_);
-       xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
  
  do_callbacks:
         xfs_buf_do_callbacks(bp);
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c

index 5bfcb8779f9f63c93907aa45aa1a83bcfbf186a4..ee9d5427fcd4c2892577824417a186a59d8e9d88 100644 (file)
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -2050,7 +2050,7 @@ xfs_da_do_buf(
                 case 0:
                         bp = xfs_trans_get_buf(trans, mp->m_ddev_targp,
                                 mappedbno, nmapped, 0);
-                       error = bp ? XFS_BUF_GETERROR(bp) : XFS_ERROR(EIO);
+                       error = bp ? bp->b_error : XFS_ERROR(EIO);
                         break;
                 case 1:
                 case 2:
@@ -2268,7 +2268,7 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps)
                 dabuf->nbuf = 1;
                 bp = bps[0];
                 dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp));
-               dabuf->data = XFS_BUF_PTR(bp);
+               dabuf->data = bp->b_addr;
                 dabuf->bps[0] = bp;
         } else {
                 dabuf->nbuf = nbuf;
@@ -2279,7 +2279,7 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps)
                 dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP);
                 for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) {
                         bp = bps[i];
-                       memcpy((char *)dabuf->data + off, XFS_BUF_PTR(bp),
+                       memcpy((char *)dabuf->data + off, bp->b_addr,
                                 XFS_BUF_COUNT(bp));
                 }
         }
@@ -2302,8 +2302,8 @@ xfs_da_buf_clean(xfs_dabuf_t *dabuf)
                 for (i = off = 0; i < dabuf->nbuf;
                                 i++, off += XFS_BUF_COUNT(bp)) {
                         bp = dabuf->bps[i];
-                       memcpy(XFS_BUF_PTR(bp), (char *)dabuf->data + off,
-                               XFS_BUF_COUNT(bp));
+                       memcpy(bp->b_addr, dabuf->data + off,
+                                               XFS_BUF_COUNT(bp));
                 }
         }
  }
@@ -2340,7 +2340,7 @@ xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last)
  
         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
         if (dabuf->nbuf == 1) {
-               ASSERT(dabuf->data == (void *)XFS_BUF_PTR(dabuf->bps[0]));
+               ASSERT(dabuf->data == dabuf->bps[0]->b_addr);
                 xfs_trans_log_buf(tp, dabuf->bps[0], first, last);
                 return;
         }
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h

index dffba9ba0db634c29b1bc012b4f54923cda76c55..a3721633abc8744a1d0187de3382cdd973efdb15 100644 (file)
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -148,7 +148,7 @@ typedef enum xfs_dinode_fmt {
                 be32_to_cpu((dip)->di_nextents) : \
                 be16_to_cpu((dip)->di_anextents))
  
-#define        XFS_BUF_TO_DINODE(bp)   ((xfs_dinode_t *)XFS_BUF_PTR(bp))
+#define        XFS_BUF_TO_DINODE(bp)   ((xfs_dinode_t *)((bp)->b_addr))
  
  /*
   * For block and character special files the 32bit dev_t is stored at the
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c

new file mode 100644 (file)

index 0000000..244e797
--- /dev/null
+++ b/fs/xfs/xfs_discard.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright (C) 2010 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_sb.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_quota.h"
+#include "xfs_trans.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_inode.h"
+#include "xfs_alloc.h"
+#include "xfs_error.h"
+#include "xfs_discard.h"
+#include "xfs_trace.h"
+
+STATIC int
+xfs_trim_extents(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          agno,
+       xfs_fsblock_t           start,
+       xfs_fsblock_t           len,
+       xfs_fsblock_t           minlen,
+       __uint64_t              *blocks_trimmed)
+{
+       struct block_device     *bdev = mp->m_ddev_targp->bt_bdev;
+       struct xfs_btree_cur    *cur;
+       struct xfs_buf          *agbp;
+       struct xfs_perag        *pag;
+       int                     error;
+       int                     i;
+
+       pag = xfs_perag_get(mp, agno);
+
+       error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+       if (error || !agbp)
+               goto out_put_perag;
+
+       cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT);
+
+       /*
+        * Force out the log.  This means any transactions that might have freed
+        * space before we took the AGF buffer lock are now on disk, and the
+        * volatile disk cache is flushed.
+        */
+       xfs_log_force(mp, XFS_LOG_SYNC);
+
+       /*
+        * Look up the longest btree in the AGF and start with it.
+        */
+       error = xfs_alloc_lookup_le(cur, 0,
+                                   XFS_BUF_TO_AGF(agbp)->agf_longest, &i);
+       if (error)
+               goto out_del_cursor;
+
+       /*
+        * Loop until we are done with all extents that are large
+        * enough to be worth discarding.
+        */
+       while (i) {
+               xfs_agblock_t fbno;
+               xfs_extlen_t flen;
+
+               error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
+               if (error)
+                       goto out_del_cursor;
+               XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor);
+               ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest);
+
+               /*
+                * Too small?  Give up.
+                */
+               if (flen < minlen) {
+                       trace_xfs_discard_toosmall(mp, agno, fbno, flen);
+                       goto out_del_cursor;
+               }
+
+               /*
+                * If the extent is entirely outside of the range we are
+                * supposed to discard skip it.  Do not bother to trim
+                * down partially overlapping ranges for now.
+                */
+               if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start ||
+                   XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) {
+                       trace_xfs_discard_exclude(mp, agno, fbno, flen);
+                       goto next_extent;
+               }
+
+               /*
+                * If any blocks in the range are still busy, skip the
+                * discard and try again the next time.
+                */
+               if (xfs_alloc_busy_search(mp, agno, fbno, flen)) {
+                       trace_xfs_discard_busy(mp, agno, fbno, flen);
+                       goto next_extent;
+               }
+
+               trace_xfs_discard_extent(mp, agno, fbno, flen);
+               error = -blkdev_issue_discard(bdev,
+                               XFS_AGB_TO_DADDR(mp, agno, fbno),
+                               XFS_FSB_TO_BB(mp, flen),
+                               GFP_NOFS, 0);
+               if (error)
+                       goto out_del_cursor;
+               *blocks_trimmed += flen;
+
+next_extent:
+               error = xfs_btree_decrement(cur, 0, &i);
+               if (error)
+                       goto out_del_cursor;
+       }
+
+out_del_cursor:
+       xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+       xfs_buf_relse(agbp);
+out_put_perag:
+       xfs_perag_put(pag);
+       return error;
+}
+
+int
+xfs_ioc_trim(
+       struct xfs_mount                *mp,
+       struct fstrim_range __user      *urange)
+{
+       struct request_queue    *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue;
+       unsigned int            granularity = q->limits.discard_granularity;
+       struct fstrim_range     range;
+       xfs_fsblock_t           start, len, minlen;
+       xfs_agnumber_t          start_agno, end_agno, agno;
+       __uint64_t              blocks_trimmed = 0;
+       int                     error, last_error = 0;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+       if (!blk_queue_discard(q))
+               return -XFS_ERROR(EOPNOTSUPP);
+       if (copy_from_user(&range, urange, sizeof(range)))
+               return -XFS_ERROR(EFAULT);
+
+       /*
+        * Truncating down the len isn't actually quite correct, but using
+        * XFS_B_TO_FSB would mean we trivially get overflows for values
+        * of ULLONG_MAX or slightly lower.  And ULLONG_MAX is the default
+        * used by the fstrim application.  In the end it really doesn't
+        * matter as trimming blocks is an advisory interface.
+        */
+       start = XFS_B_TO_FSBT(mp, range.start);
+       len = XFS_B_TO_FSBT(mp, range.len);
+       minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen));
+
+       start_agno = XFS_FSB_TO_AGNO(mp, start);
+       if (start_agno >= mp->m_sb.sb_agcount)
+               return -XFS_ERROR(EINVAL);
+
+       end_agno = XFS_FSB_TO_AGNO(mp, start + len);
+       if (end_agno >= mp->m_sb.sb_agcount)
+               end_agno = mp->m_sb.sb_agcount - 1;
+
+       for (agno = start_agno; agno <= end_agno; agno++) {
+               error = -xfs_trim_extents(mp, agno, start, len, minlen,
+                                         &blocks_trimmed);
+               if (error)
+                       last_error = error;
+       }
+
+       if (last_error)
+               return last_error;
+
+       range.len = XFS_FSB_TO_B(mp, blocks_trimmed);
+       if (copy_to_user(urange, &range, sizeof(range)))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+int
+xfs_discard_extents(
+       struct xfs_mount        *mp,
+       struct list_head        *list)
+{
+       struct xfs_busy_extent  *busyp;
+       int                     error = 0;
+
+       list_for_each_entry(busyp, list, list) {
+               trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
+                                        busyp->length);
+
+               error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
+                               XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
+                               XFS_FSB_TO_BB(mp, busyp->length),
+                               GFP_NOFS, 0);
+               if (error && error != EOPNOTSUPP) {
+                       xfs_info(mp,
+        "discard failed for extent [0x%llu,%u], error %d",
+                                (unsigned long long)busyp->bno,
+                                busyp->length,
+                                error);
+                       return error;
+               }
+       }
+
+       return 0;
+}
diff --git a/fs/xfs/xfs_discard.h b/fs/xfs/xfs_discard.h

new file mode 100644 (file)

index 0000000..344879a
--- /dev/null
+++ b/fs/xfs/xfs_discard.h
@@ -0,0 +1,10 @@
+#ifndef XFS_DISCARD_H
+#define XFS_DISCARD_H 1
+
+struct fstrim_range;
+struct list_head;
+
+extern int     xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *);
+extern int     xfs_discard_extents(struct xfs_mount *, struct list_head *);
+
+#endif /* XFS_DISCARD_H */
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c

new file mode 100644 (file)

index 0000000..db62959
--- /dev/null
+++ b/fs/xfs/xfs_dquot.c
@@ -0,0 +1,1454 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_space.h"
+#include "xfs_trans_priv.h"
+#include "xfs_qm.h"
+#include "xfs_trace.h"
+
+
+/*
+   LOCK ORDER
+
+   inode lock              (ilock)
+   dquot hash-chain lock    (hashlock)
+   xqm dquot freelist lock  (freelistlock
+   mount's dquot list lock  (mplistlock)
+   user dquot lock - lock ordering among dquots is based on the uid or gid
+   group dquot lock - similar to udquots. Between the two dquots, the udquot
+                     has to be locked first.
+   pin lock - the dquot lock must be held to take this lock.
+   flush lock - ditto.
+*/
+
+#ifdef DEBUG
+xfs_buftarg_t *xfs_dqerror_target;
+int xfs_do_dqerror;
+int xfs_dqreq_num;
+int xfs_dqerror_mod = 33;
+#endif
+
+static struct lock_class_key xfs_dquot_other_class;
+
+/*
+ * Allocate and initialize a dquot. We don't always allocate fresh memory;
+ * we try to reclaim a free dquot if the number of incore dquots are above
+ * a threshold.
+ * The only field inside the core that gets initialized at this point
+ * is the d_id field. The idea is to fill in the entire q_core
+ * when we read in the on disk dquot.
+ */
+STATIC xfs_dquot_t *
+xfs_qm_dqinit(
+       xfs_mount_t  *mp,
+       xfs_dqid_t   id,
+       uint         type)
+{
+       xfs_dquot_t     *dqp;
+       boolean_t       brandnewdquot;
+
+       brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
+       dqp->dq_flags = type;
+       dqp->q_core.d_id = cpu_to_be32(id);
+       dqp->q_mount = mp;
+
+       /*
+        * No need to re-initialize these if this is a reclaimed dquot.
+        */
+       if (brandnewdquot) {
+               INIT_LIST_HEAD(&dqp->q_freelist);
+               mutex_init(&dqp->q_qlock);
+               init_waitqueue_head(&dqp->q_pinwait);
+
+               /*
+                * Because we want to use a counting completion, complete
+                * the flush completion once to allow a single access to
+                * the flush completion without blocking.
+                */
+               init_completion(&dqp->q_flush);
+               complete(&dqp->q_flush);
+
+               trace_xfs_dqinit(dqp);
+       } else {
+               /*
+                * Only the q_core portion was zeroed in dqreclaim_one().
+                * So, we need to reset others.
+                */
+               dqp->q_nrefs = 0;
+               dqp->q_blkno = 0;
+               INIT_LIST_HEAD(&dqp->q_mplist);
+               INIT_LIST_HEAD(&dqp->q_hashlist);
+               dqp->q_bufoffset = 0;
+               dqp->q_fileoffset = 0;
+               dqp->q_transp = NULL;
+               dqp->q_gdquot = NULL;
+               dqp->q_res_bcount = 0;
+               dqp->q_res_icount = 0;
+               dqp->q_res_rtbcount = 0;
+               atomic_set(&dqp->q_pincount, 0);
+               dqp->q_hash = NULL;
+               ASSERT(list_empty(&dqp->q_freelist));
+
+               trace_xfs_dqreuse(dqp);
+       }
+
+       /*
+        * In either case we need to make sure group quotas have a different
+        * lock class than user quotas, to make sure lockdep knows we can
+        * locks of one of each at the same time.
+        */
+       if (!(type & XFS_DQ_USER))
+               lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
+
+       /*
+        * log item gets initialized later
+        */
+       return (dqp);
+}
+
+/*
+ * This is called to free all the memory associated with a dquot
+ */
+void
+xfs_qm_dqdestroy(
+       xfs_dquot_t     *dqp)
+{
+       ASSERT(list_empty(&dqp->q_freelist));
+
+       mutex_destroy(&dqp->q_qlock);
+       kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
+
+       atomic_dec(&xfs_Gqm->qm_totaldquots);
+}
+
+/*
+ * This is what a 'fresh' dquot inside a dquot chunk looks like on disk.
+ */
+STATIC void
+xfs_qm_dqinit_core(
+       xfs_dqid_t      id,
+       uint            type,
+       xfs_dqblk_t     *d)
+{
+       /*
+        * Caller has zero'd the entire dquot 'chunk' already.
+        */
+       d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
+       d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
+       d->dd_diskdq.d_id = cpu_to_be32(id);
+       d->dd_diskdq.d_flags = type;
+}
+
+/*
+ * If default limits are in force, push them into the dquot now.
+ * We overwrite the dquot limits only if they are zero and this
+ * is not the root dquot.
+ */
+void
+xfs_qm_adjust_dqlimits(
+       xfs_mount_t             *mp,
+       xfs_disk_dquot_t        *d)
+{
+       xfs_quotainfo_t         *q = mp->m_quotainfo;
+
+       ASSERT(d->d_id);
+
+       if (q->qi_bsoftlimit && !d->d_blk_softlimit)
+               d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit);
+       if (q->qi_bhardlimit && !d->d_blk_hardlimit)
+               d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit);
+       if (q->qi_isoftlimit && !d->d_ino_softlimit)
+               d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit);
+       if (q->qi_ihardlimit && !d->d_ino_hardlimit)
+               d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit);
+       if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit)
+               d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit);
+       if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
+               d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit);
+}
+
+/*
+ * Check the limits and timers of a dquot and start or reset timers
+ * if necessary.
+ * This gets called even when quota enforcement is OFF, which makes our
+ * life a little less complicated. (We just don't reject any quota
+ * reservations in that case, when enforcement is off).
+ * We also return 0 as the values of the timers in Q_GETQUOTA calls, when
+ * enforcement's off.
+ * In contrast, warnings are a little different in that they don't
+ * 'automatically' get started when limits get exceeded.  They do
+ * get reset to zero, however, when we find the count to be under
+ * the soft limit (they are only ever set non-zero via userspace).
+ */
+void
+xfs_qm_adjust_dqtimers(
+       xfs_mount_t             *mp,
+       xfs_disk_dquot_t        *d)
+{
+       ASSERT(d->d_id);
+
+#ifdef DEBUG
+       if (d->d_blk_hardlimit)
+               ASSERT(be64_to_cpu(d->d_blk_softlimit) <=
+                      be64_to_cpu(d->d_blk_hardlimit));
+       if (d->d_ino_hardlimit)
+               ASSERT(be64_to_cpu(d->d_ino_softlimit) <=
+                      be64_to_cpu(d->d_ino_hardlimit));
+       if (d->d_rtb_hardlimit)
+               ASSERT(be64_to_cpu(d->d_rtb_softlimit) <=
+                      be64_to_cpu(d->d_rtb_hardlimit));
+#endif
+
+       if (!d->d_btimer) {
+               if ((d->d_blk_softlimit &&
+                    (be64_to_cpu(d->d_bcount) >=
+                     be64_to_cpu(d->d_blk_softlimit))) ||
+                   (d->d_blk_hardlimit &&
+                    (be64_to_cpu(d->d_bcount) >=
+                     be64_to_cpu(d->d_blk_hardlimit)))) {
+                       d->d_btimer = cpu_to_be32(get_seconds() +
+                                       mp->m_quotainfo->qi_btimelimit);
+               } else {
+                       d->d_bwarns = 0;
+               }
+       } else {
+               if ((!d->d_blk_softlimit ||
+                    (be64_to_cpu(d->d_bcount) <
+                     be64_to_cpu(d->d_blk_softlimit))) &&
+                   (!d->d_blk_hardlimit ||
+                   (be64_to_cpu(d->d_bcount) <
+                    be64_to_cpu(d->d_blk_hardlimit)))) {
+                       d->d_btimer = 0;
+               }
+       }
+
+       if (!d->d_itimer) {
+               if ((d->d_ino_softlimit &&
+                    (be64_to_cpu(d->d_icount) >=
+                     be64_to_cpu(d->d_ino_softlimit))) ||
+                   (d->d_ino_hardlimit &&
+                    (be64_to_cpu(d->d_icount) >=
+                     be64_to_cpu(d->d_ino_hardlimit)))) {
+                       d->d_itimer = cpu_to_be32(get_seconds() +
+                                       mp->m_quotainfo->qi_itimelimit);
+               } else {
+                       d->d_iwarns = 0;
+               }
+       } else {
+               if ((!d->d_ino_softlimit ||
+                    (be64_to_cpu(d->d_icount) <
+                     be64_to_cpu(d->d_ino_softlimit)))  &&
+                   (!d->d_ino_hardlimit ||
+                    (be64_to_cpu(d->d_icount) <
+                     be64_to_cpu(d->d_ino_hardlimit)))) {
+                       d->d_itimer = 0;
+               }
+       }
+
+       if (!d->d_rtbtimer) {
+               if ((d->d_rtb_softlimit &&
+                    (be64_to_cpu(d->d_rtbcount) >=
+                     be64_to_cpu(d->d_rtb_softlimit))) ||
+                   (d->d_rtb_hardlimit &&
+                    (be64_to_cpu(d->d_rtbcount) >=
+                     be64_to_cpu(d->d_rtb_hardlimit)))) {
+                       d->d_rtbtimer = cpu_to_be32(get_seconds() +
+                                       mp->m_quotainfo->qi_rtbtimelimit);
+               } else {
+                       d->d_rtbwarns = 0;
+               }
+       } else {
+               if ((!d->d_rtb_softlimit ||
+                    (be64_to_cpu(d->d_rtbcount) <
+                     be64_to_cpu(d->d_rtb_softlimit))) &&
+                   (!d->d_rtb_hardlimit ||
+                    (be64_to_cpu(d->d_rtbcount) <
+                     be64_to_cpu(d->d_rtb_hardlimit)))) {
+                       d->d_rtbtimer = 0;
+               }
+       }
+}
+
+/*
+ * initialize a buffer full of dquots and log the whole thing
+ */
+STATIC void
+xfs_qm_init_dquot_blk(
+       xfs_trans_t     *tp,
+       xfs_mount_t     *mp,
+       xfs_dqid_t      id,
+       uint            type,
+       xfs_buf_t       *bp)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       xfs_dqblk_t     *d;
+       int             curid, i;
+
+       ASSERT(tp);
+       ASSERT(xfs_buf_islocked(bp));
+
+       d = bp->b_addr;
+
+       /*
+        * ID of the first dquot in the block - id's are zero based.
+        */
+       curid = id - (id % q->qi_dqperchunk);
+       ASSERT(curid >= 0);
+       memset(d, 0, BBTOB(q->qi_dqchunklen));
+       for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++)
+               xfs_qm_dqinit_core(curid, type, d);
+       xfs_trans_dquot_buf(tp, bp,
+                           (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
+                           ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
+                            XFS_BLF_GDQUOT_BUF)));
+       xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
+}
+
+
+
+/*
+ * Allocate a block and fill it with dquots.
+ * This is called when the bmapi finds a hole.
+ */
+STATIC int
+xfs_qm_dqalloc(
+       xfs_trans_t     **tpp,
+       xfs_mount_t     *mp,
+       xfs_dquot_t     *dqp,
+       xfs_inode_t     *quotip,
+       xfs_fileoff_t   offset_fsb,
+       xfs_buf_t       **O_bpp)
+{
+       xfs_fsblock_t   firstblock;
+       xfs_bmap_free_t flist;
+       xfs_bmbt_irec_t map;
+       int             nmaps, error, committed;
+       xfs_buf_t       *bp;
+       xfs_trans_t     *tp = *tpp;
+
+       ASSERT(tp != NULL);
+
+       trace_xfs_dqalloc(dqp);
+
+       /*
+        * Initialize the bmap freelist prior to calling bmapi code.
+        */
+       xfs_bmap_init(&flist, &firstblock);
+       xfs_ilock(quotip, XFS_ILOCK_EXCL);
+       /*
+        * Return if this type of quotas is turned off while we didn't
+        * have an inode lock
+        */
+       if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
+               xfs_iunlock(quotip, XFS_ILOCK_EXCL);
+               return (ESRCH);
+       }
+
+       xfs_trans_ijoin_ref(tp, quotip, XFS_ILOCK_EXCL);
+       nmaps = 1;
+       if ((error = xfs_bmapi(tp, quotip,
+                             offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB,
+                             XFS_BMAPI_METADATA | XFS_BMAPI_WRITE,
+                             &firstblock,
+                             XFS_QM_DQALLOC_SPACE_RES(mp),
+                             &map, &nmaps, &flist))) {
+               goto error0;
+       }
+       ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
+       ASSERT(nmaps == 1);
+       ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
+              (map.br_startblock != HOLESTARTBLOCK));
+
+       /*
+        * Keep track of the blkno to save a lookup later
+        */
+       dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
+
+       /* now we can just get the buffer (there's nothing to read yet) */
+       bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
+                              dqp->q_blkno,
+                              mp->m_quotainfo->qi_dqchunklen,
+                              0);
+       if (!bp || (error = xfs_buf_geterror(bp)))
+               goto error1;
+       /*
+        * Make a chunk of dquots out of this buffer and log
+        * the entire thing.
+        */
+       xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id),
+                             dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
+
+       /*
+        * xfs_bmap_finish() may commit the current transaction and
+        * start a second transaction if the freelist is not empty.
+        *
+        * Since we still want to modify this buffer, we need to
+        * ensure that the buffer is not released on commit of
+        * the first transaction and ensure the buffer is added to the
+        * second transaction.
+        *
+        * If there is only one transaction then don't stop the buffer
+        * from being released when it commits later on.
+        */
+
+       xfs_trans_bhold(tp, bp);
+
+       if ((error = xfs_bmap_finish(tpp, &flist, &committed))) {
+               goto error1;
+       }
+
+       if (committed) {
+               tp = *tpp;
+               xfs_trans_bjoin(tp, bp);
+       } else {
+               xfs_trans_bhold_release(tp, bp);
+       }
+
+       *O_bpp = bp;
+       return 0;
+
+      error1:
+       xfs_bmap_cancel(&flist);
+      error0:
+       xfs_iunlock(quotip, XFS_ILOCK_EXCL);
+
+       return (error);
+}
+
+/*
+ * Maps a dquot to the buffer containing its on-disk version.
+ * This returns a ptr to the buffer containing the on-disk dquot
+ * in the bpp param, and a ptr to the on-disk dquot within that buffer
+ */
+STATIC int
+xfs_qm_dqtobp(
+       xfs_trans_t             **tpp,
+       xfs_dquot_t             *dqp,
+       xfs_disk_dquot_t        **O_ddpp,
+       xfs_buf_t               **O_bpp,
+       uint                    flags)
+{
+       xfs_bmbt_irec_t map;
+       int             nmaps = 1, error;
+       xfs_buf_t       *bp;
+       xfs_inode_t     *quotip = XFS_DQ_TO_QIP(dqp);
+       xfs_mount_t     *mp = dqp->q_mount;
+       xfs_disk_dquot_t *ddq;
+       xfs_dqid_t      id = be32_to_cpu(dqp->q_core.d_id);
+       xfs_trans_t     *tp = (tpp ? *tpp : NULL);
+
+       dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
+
+       xfs_ilock(quotip, XFS_ILOCK_SHARED);
+       if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
+               /*
+                * Return if this type of quotas is turned off while we
+                * didn't have the quota inode lock.
+                */
+               xfs_iunlock(quotip, XFS_ILOCK_SHARED);
+               return ESRCH;
+       }
+
+       /*
+        * Find the block map; no allocations yet
+        */
+       error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
+                         XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
+                         NULL, 0, &map, &nmaps, NULL);
+
+       xfs_iunlock(quotip, XFS_ILOCK_SHARED);
+       if (error)
+               return error;
+
+       ASSERT(nmaps == 1);
+       ASSERT(map.br_blockcount == 1);
+
+       /*
+        * Offset of dquot in the (fixed sized) dquot chunk.
+        */
+       dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
+               sizeof(xfs_dqblk_t);
+
+       ASSERT(map.br_startblock != DELAYSTARTBLOCK);
+       if (map.br_startblock == HOLESTARTBLOCK) {
+               /*
+                * We don't allocate unless we're asked to
+                */
+               if (!(flags & XFS_QMOPT_DQALLOC))
+                       return ENOENT;
+
+               ASSERT(tp);
+               error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
+                                       dqp->q_fileoffset, &bp);
+               if (error)
+                       return error;
+               tp = *tpp;
+       } else {
+               trace_xfs_dqtobp_read(dqp);
+
+               /*
+                * store the blkno etc so that we don't have to do the
+                * mapping all the time
+                */
+               dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
+
+               error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+                                          dqp->q_blkno,
+                                          mp->m_quotainfo->qi_dqchunklen,
+                                          0, &bp);
+               if (error || !bp)
+                       return XFS_ERROR(error);
+       }
+
+       ASSERT(xfs_buf_islocked(bp));
+
+       /*
+        * calculate the location of the dquot inside the buffer.
+        */
+       ddq = bp->b_addr + dqp->q_bufoffset;
+
+       /*
+        * A simple sanity check in case we got a corrupted dquot...
+        */
+       error = xfs_qm_dqcheck(mp, ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES,
+                          flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN),
+                          "dqtobp");
+       if (error) {
+               if (!(flags & XFS_QMOPT_DQREPAIR)) {
+                       xfs_trans_brelse(tp, bp);
+                       return XFS_ERROR(EIO);
+               }
+       }
+
+       *O_bpp = bp;
+       *O_ddpp = ddq;
+
+       return (0);
+}
+
+
+/*
+ * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
+ * and release the buffer immediately.
+ *
+ */
+/* ARGSUSED */
+STATIC int
+xfs_qm_dqread(
+       xfs_trans_t     **tpp,
+       xfs_dqid_t      id,
+       xfs_dquot_t     *dqp,   /* dquot to get filled in */
+       uint            flags)
+{
+       xfs_disk_dquot_t *ddqp;
+       xfs_buf_t        *bp;
+       int              error;
+       xfs_trans_t      *tp;
+
+       ASSERT(tpp);
+
+       trace_xfs_dqread(dqp);
+
+       /*
+        * get a pointer to the on-disk dquot and the buffer containing it
+        * dqp already knows its own type (GROUP/USER).
+        */
+       if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) {
+               return (error);
+       }
+       tp = *tpp;
+
+       /* copy everything from disk dquot to the incore dquot */
+       memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
+       ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
+       xfs_qm_dquot_logitem_init(dqp);
+
+       /*
+        * Reservation counters are defined as reservation plus current usage
+        * to avoid having to add every time.
+        */
+       dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount);
+       dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
+       dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
+
+       /* Mark the buf so that this will stay incore a little longer */
+       XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF);
+
+       /*
+        * We got the buffer with a xfs_trans_read_buf() (in dqtobp())
+        * So we need to release with xfs_trans_brelse().
+        * The strategy here is identical to that of inodes; we lock
+        * the dquot in xfs_qm_dqget() before making it accessible to
+        * others. This is because dquots, like inodes, need a good level of
+        * concurrency, and we don't want to take locks on the entire buffers
+        * for dquot accesses.
+        * Note also that the dquot buffer may even be dirty at this point, if
+        * this particular dquot was repaired. We still aren't afraid to
+        * brelse it because we have the changes incore.
+        */
+       ASSERT(xfs_buf_islocked(bp));
+       xfs_trans_brelse(tp, bp);
+
+       return (error);
+}
+
+
+/*
+ * allocate an incore dquot from the kernel heap,
+ * and fill its core with quota information kept on disk.
+ * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk
+ * if it wasn't already allocated.
+ */
+STATIC int
+xfs_qm_idtodq(
+       xfs_mount_t     *mp,
+       xfs_dqid_t      id,      /* gid or uid, depending on type */
+       uint            type,    /* UDQUOT or GDQUOT */
+       uint            flags,   /* DQALLOC, DQREPAIR */
+       xfs_dquot_t     **O_dqpp)/* OUT : incore dquot, not locked */
+{
+       xfs_dquot_t     *dqp;
+       int             error;
+       xfs_trans_t     *tp;
+       int             cancelflags=0;
+
+       dqp = xfs_qm_dqinit(mp, id, type);
+       tp = NULL;
+       if (flags & XFS_QMOPT_DQALLOC) {
+               tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
+               error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
+                               XFS_WRITE_LOG_RES(mp) +
+                               BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 +
+                               128,
+                               0,
+                               XFS_TRANS_PERM_LOG_RES,
+                               XFS_WRITE_LOG_COUNT);
+               if (error) {
+                       cancelflags = 0;
+                       goto error0;
+               }
+               cancelflags = XFS_TRANS_RELEASE_LOG_RES;
+       }
+
+       /*
+        * Read it from disk; xfs_dqread() takes care of
+        * all the necessary initialization of dquot's fields (locks, etc)
+        */
+       if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) {
+               /*
+                * This can happen if quotas got turned off (ESRCH),
+                * or if the dquot didn't exist on disk and we ask to
+                * allocate (ENOENT).
+                */
+               trace_xfs_dqread_fail(dqp);
+               cancelflags |= XFS_TRANS_ABORT;
+               goto error0;
+       }
+       if (tp) {
+               if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES)))
+                       goto error1;
+       }
+
+       *O_dqpp = dqp;
+       return (0);
+
+ error0:
+       ASSERT(error);
+       if (tp)
+               xfs_trans_cancel(tp, cancelflags);
+ error1:
+       xfs_qm_dqdestroy(dqp);
+       *O_dqpp = NULL;
+       return (error);
+}
+
+/*
+ * Lookup a dquot in the incore dquot hashtable. We keep two separate
+ * hashtables for user and group dquots; and, these are global tables
+ * inside the XQM, not per-filesystem tables.
+ * The hash chain must be locked by caller, and it is left locked
+ * on return. Returning dquot is locked.
+ */
+STATIC int
+xfs_qm_dqlookup(
+       xfs_mount_t             *mp,
+       xfs_dqid_t              id,
+       xfs_dqhash_t            *qh,
+       xfs_dquot_t             **O_dqpp)
+{
+       xfs_dquot_t             *dqp;
+       uint                    flist_locked;
+
+       ASSERT(mutex_is_locked(&qh->qh_lock));
+
+       flist_locked = B_FALSE;
+
+       /*
+        * Traverse the hashchain looking for a match
+        */
+       list_for_each_entry(dqp, &qh->qh_list, q_hashlist) {
+               /*
+                * We already have the hashlock. We don't need the
+                * dqlock to look at the id field of the dquot, since the
+                * id can't be modified without the hashlock anyway.
+                */
+               if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) {
+                       trace_xfs_dqlookup_found(dqp);
+
+                       /*
+                        * All in core dquots must be on the dqlist of mp
+                        */
+                       ASSERT(!list_empty(&dqp->q_mplist));
+
+                       xfs_dqlock(dqp);
+                       if (dqp->q_nrefs == 0) {
+                               ASSERT(!list_empty(&dqp->q_freelist));
+                               if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
+                                       trace_xfs_dqlookup_want(dqp);
+
+                                       /*
+                                        * We may have raced with dqreclaim_one()
+                                        * (and lost). So, flag that we don't
+                                        * want the dquot to be reclaimed.
+                                        */
+                                       dqp->dq_flags |= XFS_DQ_WANT;
+                                       xfs_dqunlock(dqp);
+                                       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+                                       xfs_dqlock(dqp);
+                                       dqp->dq_flags &= ~(XFS_DQ_WANT);
+                               }
+                               flist_locked = B_TRUE;
+                       }
+
+                       /*
+                        * id couldn't have changed; we had the hashlock all
+                        * along
+                        */
+                       ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
+
+                       if (flist_locked) {
+                               if (dqp->q_nrefs != 0) {
+                                       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+                                       flist_locked = B_FALSE;
+                               } else {
+                                       /* take it off the freelist */
+                                       trace_xfs_dqlookup_freelist(dqp);
+                                       list_del_init(&dqp->q_freelist);
+                                       xfs_Gqm->qm_dqfrlist_cnt--;
+                               }
+                       }
+
+                       XFS_DQHOLD(dqp);
+
+                       if (flist_locked)
+                               mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+                       /*
+                        * move the dquot to the front of the hashchain
+                        */
+                       ASSERT(mutex_is_locked(&qh->qh_lock));
+                       list_move(&dqp->q_hashlist, &qh->qh_list);
+                       trace_xfs_dqlookup_done(dqp);
+                       *O_dqpp = dqp;
+                       return 0;
+               }
+       }
+
+       *O_dqpp = NULL;
+       ASSERT(mutex_is_locked(&qh->qh_lock));
+       return (1);
+}
+
+/*
+ * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
+ * a locked dquot, doing an allocation (if requested) as needed.
+ * When both an inode and an id are given, the inode's id takes precedence.
+ * That is, if the id changes while we don't hold the ilock inside this
+ * function, the new dquot is returned, not necessarily the one requested
+ * in the id argument.
+ */
+int
+xfs_qm_dqget(
+       xfs_mount_t     *mp,
+       xfs_inode_t     *ip,      /* locked inode (optional) */
+       xfs_dqid_t      id,       /* uid/projid/gid depending on type */
+       uint            type,     /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */
+       uint            flags,    /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
+       xfs_dquot_t     **O_dqpp) /* OUT : locked incore dquot */
+{
+       xfs_dquot_t     *dqp;
+       xfs_dqhash_t    *h;
+       uint            version;
+       int             error;
+
+       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+       if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
+           (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
+           (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
+               return (ESRCH);
+       }
+       h = XFS_DQ_HASH(mp, id, type);
+
+#ifdef DEBUG
+       if (xfs_do_dqerror) {
+               if ((xfs_dqerror_target == mp->m_ddev_targp) &&
+                   (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
+                       xfs_debug(mp, "Returning error in dqget");
+                       return (EIO);
+               }
+       }
+#endif
+
+ again:
+
+#ifdef DEBUG
+       ASSERT(type == XFS_DQ_USER ||
+              type == XFS_DQ_PROJ ||
+              type == XFS_DQ_GROUP);
+       if (ip) {
+               ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+               if (type == XFS_DQ_USER)
+                       ASSERT(ip->i_udquot == NULL);
+               else
+                       ASSERT(ip->i_gdquot == NULL);
+       }
+#endif
+       mutex_lock(&h->qh_lock);
+
+       /*
+        * Look in the cache (hashtable).
+        * The chain is kept locked during lookup.
+        */
+       if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) {
+               XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
+               /*
+                * The dquot was found, moved to the front of the chain,
+                * taken off the freelist if it was on it, and locked
+                * at this point. Just unlock the hashchain and return.
+                */
+               ASSERT(*O_dqpp);
+               ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
+               mutex_unlock(&h->qh_lock);
+               trace_xfs_dqget_hit(*O_dqpp);
+               return (0);     /* success */
+       }
+       XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
+
+       /*
+        * Dquot cache miss. We don't want to keep the inode lock across
+        * a (potential) disk read. Also we don't want to deal with the lock
+        * ordering between quotainode and this inode. OTOH, dropping the inode
+        * lock here means dealing with a chown that can happen before
+        * we re-acquire the lock.
+        */
+       if (ip)
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       /*
+        * Save the hashchain version stamp, and unlock the chain, so that
+        * we don't keep the lock across a disk read
+        */
+       version = h->qh_version;
+       mutex_unlock(&h->qh_lock);
+
+       /*
+        * Allocate the dquot on the kernel heap, and read the ondisk
+        * portion off the disk. Also, do all the necessary initialization
+        * This can return ENOENT if dquot didn't exist on disk and we didn't
+        * ask it to allocate; ESRCH if quotas got turned off suddenly.
+        */
+       if ((error = xfs_qm_idtodq(mp, id, type,
+                                 flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR|
+                                          XFS_QMOPT_DOWARN),
+                                 &dqp))) {
+               if (ip)
+                       xfs_ilock(ip, XFS_ILOCK_EXCL);
+               return (error);
+       }
+
+       /*
+        * See if this is mount code calling to look at the overall quota limits
+        * which are stored in the id == 0 user or group's dquot.
+        * Since we may not have done a quotacheck by this point, just return
+        * the dquot without attaching it to any hashtables, lists, etc, or even
+        * taking a reference.
+        * The caller must dqdestroy this once done.
+        */
+       if (flags & XFS_QMOPT_DQSUSER) {
+               ASSERT(id == 0);
+               ASSERT(! ip);
+               goto dqret;
+       }
+
+       /*
+        * Dquot lock comes after hashlock in the lock ordering
+        */
+       if (ip) {
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+               /*
+                * A dquot could be attached to this inode by now, since
+                * we had dropped the ilock.
+                */
+               if (type == XFS_DQ_USER) {
+                       if (!XFS_IS_UQUOTA_ON(mp)) {
+                               /* inode stays locked on return */
+                               xfs_qm_dqdestroy(dqp);
+                               return XFS_ERROR(ESRCH);
+                       }
+                       if (ip->i_udquot) {
+                               xfs_qm_dqdestroy(dqp);
+                               dqp = ip->i_udquot;
+                               xfs_dqlock(dqp);
+                               goto dqret;
+                       }
+               } else {
+                       if (!XFS_IS_OQUOTA_ON(mp)) {
+                               /* inode stays locked on return */
+                               xfs_qm_dqdestroy(dqp);
+                               return XFS_ERROR(ESRCH);
+                       }
+                       if (ip->i_gdquot) {
+                               xfs_qm_dqdestroy(dqp);
+                               dqp = ip->i_gdquot;
+                               xfs_dqlock(dqp);
+                               goto dqret;
+                       }
+               }
+       }
+
+       /*
+        * Hashlock comes after ilock in lock order
+        */
+       mutex_lock(&h->qh_lock);
+       if (version != h->qh_version) {
+               xfs_dquot_t *tmpdqp;
+               /*
+                * Now, see if somebody else put the dquot in the
+                * hashtable before us. This can happen because we didn't
+                * keep the hashchain lock. We don't have to worry about
+                * lock order between the two dquots here since dqp isn't
+                * on any findable lists yet.
+                */
+               if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) {
+                       /*
+                        * Duplicate found. Just throw away the new dquot
+                        * and start over.
+                        */
+                       xfs_qm_dqput(tmpdqp);
+                       mutex_unlock(&h->qh_lock);
+                       xfs_qm_dqdestroy(dqp);
+                       XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
+                       goto again;
+               }
+       }
+
+       /*
+        * Put the dquot at the beginning of the hash-chain and mp's list
+        * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
+        */
+       ASSERT(mutex_is_locked(&h->qh_lock));
+       dqp->q_hash = h;
+       list_add(&dqp->q_hashlist, &h->qh_list);
+       h->qh_version++;
+
+       /*
+        * Attach this dquot to this filesystem's list of all dquots,
+        * kept inside the mount structure in m_quotainfo field
+        */
+       mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
+
+       /*
+        * We return a locked dquot to the caller, with a reference taken
+        */
+       xfs_dqlock(dqp);
+       dqp->q_nrefs = 1;
+
+       list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist);
+       mp->m_quotainfo->qi_dquots++;
+       mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
+       mutex_unlock(&h->qh_lock);
+ dqret:
+       ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       trace_xfs_dqget_miss(dqp);
+       *O_dqpp = dqp;
+       return (0);
+}
+
+
+/*
+ * Release a reference to the dquot (decrement ref-count)
+ * and unlock it. If there is a group quota attached to this
+ * dquot, carefully release that too without tripping over
+ * deadlocks'n'stuff.
+ */
+void
+xfs_qm_dqput(
+       xfs_dquot_t     *dqp)
+{
+       xfs_dquot_t     *gdqp;
+
+       ASSERT(dqp->q_nrefs > 0);
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+       trace_xfs_dqput(dqp);
+
+       if (dqp->q_nrefs != 1) {
+               dqp->q_nrefs--;
+               xfs_dqunlock(dqp);
+               return;
+       }
+
+       /*
+        * drop the dqlock and acquire the freelist and dqlock
+        * in the right order; but try to get it out-of-order first
+        */
+       if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
+               trace_xfs_dqput_wait(dqp);
+               xfs_dqunlock(dqp);
+               mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+               xfs_dqlock(dqp);
+       }
+
+       while (1) {
+               gdqp = NULL;
+
+               /* We can't depend on nrefs being == 1 here */
+               if (--dqp->q_nrefs == 0) {
+                       trace_xfs_dqput_free(dqp);
+
+                       list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
+                       xfs_Gqm->qm_dqfrlist_cnt++;
+
+                       /*
+                        * If we just added a udquot to the freelist, then
+                        * we want to release the gdquot reference that
+                        * it (probably) has. Otherwise it'll keep the
+                        * gdquot from getting reclaimed.
+                        */
+                       if ((gdqp = dqp->q_gdquot)) {
+                               /*
+                                * Avoid a recursive dqput call
+                                */
+                               xfs_dqlock(gdqp);
+                               dqp->q_gdquot = NULL;
+                       }
+               }
+               xfs_dqunlock(dqp);
+
+               /*
+                * If we had a group quota inside the user quota as a hint,
+                * release it now.
+                */
+               if (! gdqp)
+                       break;
+               dqp = gdqp;
+       }
+       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+}
+
+/*
+ * Release a dquot. Flush it if dirty, then dqput() it.
+ * dquot must not be locked.
+ */
+void
+xfs_qm_dqrele(
+       xfs_dquot_t     *dqp)
+{
+       if (!dqp)
+               return;
+
+       trace_xfs_dqrele(dqp);
+
+       xfs_dqlock(dqp);
+       /*
+        * We don't care to flush it if the dquot is dirty here.
+        * That will create stutters that we want to avoid.
+        * Instead we do a delayed write when we try to reclaim
+        * a dirty dquot. Also xfs_sync will take part of the burden...
+        */
+       xfs_qm_dqput(dqp);
+}
+
+/*
+ * This is the dquot flushing I/O completion routine.  It is called
+ * from interrupt level when the buffer containing the dquot is
+ * flushed to disk.  It is responsible for removing the dquot logitem
+ * from the AIL if it has not been re-logged, and unlocking the dquot's
+ * flush lock. This behavior is very similar to that of inodes..
+ */
+STATIC void
+xfs_qm_dqflush_done(
+       struct xfs_buf          *bp,
+       struct xfs_log_item     *lip)
+{
+       xfs_dq_logitem_t        *qip = (struct xfs_dq_logitem *)lip;
+       xfs_dquot_t             *dqp = qip->qli_dquot;
+       struct xfs_ail          *ailp = lip->li_ailp;
+
+       /*
+        * We only want to pull the item from the AIL if its
+        * location in the log has not changed since we started the flush.
+        * Thus, we only bother if the dquot's lsn has
+        * not changed. First we check the lsn outside the lock
+        * since it's cheaper, and then we recheck while
+        * holding the lock before removing the dquot from the AIL.
+        */
+       if ((lip->li_flags & XFS_LI_IN_AIL) &&
+           lip->li_lsn == qip->qli_flush_lsn) {
+
+               /* xfs_trans_ail_delete() drops the AIL lock. */
+               spin_lock(&ailp->xa_lock);
+               if (lip->li_lsn == qip->qli_flush_lsn)
+                       xfs_trans_ail_delete(ailp, lip);
+               else
+                       spin_unlock(&ailp->xa_lock);
+       }
+
+       /*
+        * Release the dq's flush lock since we're done with it.
+        */
+       xfs_dqfunlock(dqp);
+}
+
+/*
+ * Write a modified dquot to disk.
+ * The dquot must be locked and the flush lock too taken by caller.
+ * The flush lock will not be unlocked until the dquot reaches the disk,
+ * but the dquot is free to be unlocked and modified by the caller
+ * in the interim. Dquot is still locked on return. This behavior is
+ * identical to that of inodes.
+ */
+int
+xfs_qm_dqflush(
+       xfs_dquot_t             *dqp,
+       uint                    flags)
+{
+       struct xfs_mount        *mp = dqp->q_mount;
+       struct xfs_buf          *bp;
+       struct xfs_disk_dquot   *ddqp;
+       int                     error;
+
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+       ASSERT(!completion_done(&dqp->q_flush));
+
+       trace_xfs_dqflush(dqp);
+
+       /*
+        * If not dirty, or it's pinned and we are not supposed to block, nada.
+        */
+       if (!XFS_DQ_IS_DIRTY(dqp) ||
+           (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) {
+               xfs_dqfunlock(dqp);
+               return 0;
+       }
+       xfs_qm_dqunpin_wait(dqp);
+
+       /*
+        * This may have been unpinned because the filesystem is shutting
+        * down forcibly. If that's the case we must not write this dquot
+        * to disk, because the log record didn't make it to disk!
+        */
+       if (XFS_FORCED_SHUTDOWN(mp)) {
+               dqp->dq_flags &= ~XFS_DQ_DIRTY;
+               xfs_dqfunlock(dqp);
+               return XFS_ERROR(EIO);
+       }
+
+       /*
+        * Get the buffer containing the on-disk dquot
+        */
+       error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
+                                  mp->m_quotainfo->qi_dqchunklen, 0, &bp);
+       if (error) {
+               ASSERT(error != ENOENT);
+               xfs_dqfunlock(dqp);
+               return error;
+       }
+
+       /*
+        * Calculate the location of the dquot inside the buffer.
+        */
+       ddqp = bp->b_addr + dqp->q_bufoffset;
+
+       /*
+        * A simple sanity check in case we got a corrupted dquot..
+        */
+       error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
+                          XFS_QMOPT_DOWARN, "dqflush (incore copy)");
+       if (error) {
+               xfs_buf_relse(bp);
+               xfs_dqfunlock(dqp);
+               xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+               return XFS_ERROR(EIO);
+       }
+
+       /* This is the only portion of data that needs to persist */
+       memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
+
+       /*
+        * Clear the dirty field and remember the flush lsn for later use.
+        */
+       dqp->dq_flags &= ~XFS_DQ_DIRTY;
+
+       xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
+                                       &dqp->q_logitem.qli_item.li_lsn);
+
+       /*
+        * Attach an iodone routine so that we can remove this dquot from the
+        * AIL and release the flush lock once the dquot is synced to disk.
+        */
+       xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done,
+                                 &dqp->q_logitem.qli_item);
+
+       /*
+        * If the buffer is pinned then push on the log so we won't
+        * get stuck waiting in the write for too long.
+        */
+       if (xfs_buf_ispinned(bp)) {
+               trace_xfs_dqflush_force(dqp);
+               xfs_log_force(mp, 0);
+       }
+
+       if (flags & SYNC_WAIT)
+               error = xfs_bwrite(mp, bp);
+       else
+               xfs_bdwrite(mp, bp);
+
+       trace_xfs_dqflush_done(dqp);
+
+       /*
+        * dqp is still locked, but caller is free to unlock it now.
+        */
+       return error;
+
+}
+
+int
+xfs_qm_dqlock_nowait(
+       xfs_dquot_t *dqp)
+{
+       return mutex_trylock(&dqp->q_qlock);
+}
+
+void
+xfs_dqlock(
+       xfs_dquot_t *dqp)
+{
+       mutex_lock(&dqp->q_qlock);
+}
+
+void
+xfs_dqunlock(
+       xfs_dquot_t *dqp)
+{
+       mutex_unlock(&(dqp->q_qlock));
+       if (dqp->q_logitem.qli_dquot == dqp) {
+               /* Once was dqp->q_mount, but might just have been cleared */
+               xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
+                                       (xfs_log_item_t*)&(dqp->q_logitem));
+       }
+}
+
+
+void
+xfs_dqunlock_nonotify(
+       xfs_dquot_t *dqp)
+{
+       mutex_unlock(&(dqp->q_qlock));
+}
+
+/*
+ * Lock two xfs_dquot structures.
+ *
+ * To avoid deadlocks we always lock the quota structure with
+ * the lowerd id first.
+ */
+void
+xfs_dqlock2(
+       xfs_dquot_t     *d1,
+       xfs_dquot_t     *d2)
+{
+       if (d1 && d2) {
+               ASSERT(d1 != d2);
+               if (be32_to_cpu(d1->q_core.d_id) >
+                   be32_to_cpu(d2->q_core.d_id)) {
+                       mutex_lock(&d2->q_qlock);
+                       mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED);
+               } else {
+                       mutex_lock(&d1->q_qlock);
+                       mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED);
+               }
+       } else if (d1) {
+               mutex_lock(&d1->q_qlock);
+       } else if (d2) {
+               mutex_lock(&d2->q_qlock);
+       }
+}
+
+
+/*
+ * Take a dquot out of the mount's dqlist as well as the hashlist.
+ * This is called via unmount as well as quotaoff, and the purge
+ * will always succeed unless there are soft (temp) references
+ * outstanding.
+ *
+ * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
+ * that we're returning! XXXsup - not cool.
+ */
+/* ARGSUSED */
+int
+xfs_qm_dqpurge(
+       xfs_dquot_t     *dqp)
+{
+       xfs_dqhash_t    *qh = dqp->q_hash;
+       xfs_mount_t     *mp = dqp->q_mount;
+
+       ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
+       ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));
+
+       xfs_dqlock(dqp);
+       /*
+        * We really can't afford to purge a dquot that is
+        * referenced, because these are hard refs.
+        * It shouldn't happen in general because we went thru _all_ inodes in
+        * dqrele_all_inodes before calling this and didn't let the mountlock go.
+        * However it is possible that we have dquots with temporary
+        * references that are not attached to an inode. e.g. see xfs_setattr().
+        */
+       if (dqp->q_nrefs != 0) {
+               xfs_dqunlock(dqp);
+               mutex_unlock(&dqp->q_hash->qh_lock);
+               return (1);
+       }
+
+       ASSERT(!list_empty(&dqp->q_freelist));
+
+       /*
+        * If we're turning off quotas, we have to make sure that, for
+        * example, we don't delete quota disk blocks while dquots are
+        * in the process of getting written to those disk blocks.
+        * This dquot might well be on AIL, and we can't leave it there
+        * if we're turning off quotas. Basically, we need this flush
+        * lock, and are willing to block on it.
+        */
+       if (!xfs_dqflock_nowait(dqp)) {
+               /*
+                * Block on the flush lock after nudging dquot buffer,
+                * if it is incore.
+                */
+               xfs_qm_dqflock_pushbuf_wait(dqp);
+       }
+
+       /*
+        * XXXIf we're turning this type of quotas off, we don't care
+        * about the dirty metadata sitting in this dquot. OTOH, if
+        * we're unmounting, we do care, so we flush it and wait.
+        */
+       if (XFS_DQ_IS_DIRTY(dqp)) {
+               int     error;
+
+               /* dqflush unlocks dqflock */
+               /*
+                * Given that dqpurge is a very rare occurrence, it is OK
+                * that we're holding the hashlist and mplist locks
+                * across the disk write. But, ... XXXsup
+                *
+                * We don't care about getting disk errors here. We need
+                * to purge this dquot anyway, so we go ahead regardless.
+                */
+               error = xfs_qm_dqflush(dqp, SYNC_WAIT);
+               if (error)
+                       xfs_warn(mp, "%s: dquot %p flush failed",
+                               __func__, dqp);
+               xfs_dqflock(dqp);
+       }
+       ASSERT(atomic_read(&dqp->q_pincount) == 0);
+       ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
+              !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
+
+       list_del_init(&dqp->q_hashlist);
+       qh->qh_version++;
+       list_del_init(&dqp->q_mplist);
+       mp->m_quotainfo->qi_dqreclaims++;
+       mp->m_quotainfo->qi_dquots--;
+       /*
+        * XXX Move this to the front of the freelist, if we can get the
+        * freelist lock.
+        */
+       ASSERT(!list_empty(&dqp->q_freelist));
+
+       dqp->q_mount = NULL;
+       dqp->q_hash = NULL;
+       dqp->dq_flags = XFS_DQ_INACTIVE;
+       memset(&dqp->q_core, 0, sizeof(dqp->q_core));
+       xfs_dqfunlock(dqp);
+       xfs_dqunlock(dqp);
+       mutex_unlock(&qh->qh_lock);
+       return (0);
+}
+
+
+/*
+ * Give the buffer a little push if it is incore and
+ * wait on the flush lock.
+ */
+void
+xfs_qm_dqflock_pushbuf_wait(
+       xfs_dquot_t     *dqp)
+{
+       xfs_mount_t     *mp = dqp->q_mount;
+       xfs_buf_t       *bp;
+
+       /*
+        * Check to see if the dquot has been flushed delayed
+        * write.  If so, grab its buffer and send it
+        * out immediately.  We'll be able to acquire
+        * the flush lock when the I/O completes.
+        */
+       bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno,
+                       mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
+       if (!bp)
+               goto out_lock;
+
+       if (XFS_BUF_ISDELAYWRITE(bp)) {
+               if (xfs_buf_ispinned(bp))
+                       xfs_log_force(mp, 0);
+               xfs_buf_delwri_promote(bp);
+               wake_up_process(bp->b_target->bt_task);
+       }
+       xfs_buf_relse(bp);
+out_lock:
+       xfs_dqflock(dqp);
+}
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h

new file mode 100644 (file)

index 0000000..34b7e94
--- /dev/null
+++ b/fs/xfs/xfs_dquot.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_DQUOT_H__
+#define __XFS_DQUOT_H__
+
+/*
+ * Dquots are structures that hold quota information about a user or a group,
+ * much like inodes are for files. In fact, dquots share many characteristics
+ * with inodes. However, dquots can also be a centralized resource, relative
+ * to a collection of inodes. In this respect, dquots share some characteristics
+ * of the superblock.
+ * XFS dquots exploit both those in its algorithms. They make every attempt
+ * to not be a bottleneck when quotas are on and have minimal impact, if any,
+ * when quotas are off.
+ */
+
+/*
+ * The hash chain headers (hash buckets)
+ */
+typedef struct xfs_dqhash {
+       struct list_head  qh_list;
+       struct mutex      qh_lock;
+       uint              qh_version;   /* ever increasing version */
+       uint              qh_nelems;    /* number of dquots on the list */
+} xfs_dqhash_t;
+
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * The incore dquot structure
+ */
+typedef struct xfs_dquot {
+       uint             dq_flags;      /* various flags (XFS_DQ_*) */
+       struct list_head q_freelist;    /* global free list of dquots */
+       struct list_head q_mplist;      /* mount's list of dquots */
+       struct list_head q_hashlist;    /* gloabl hash list of dquots */
+       xfs_dqhash_t    *q_hash;        /* the hashchain header */
+       struct xfs_mount*q_mount;       /* filesystem this relates to */
+       struct xfs_trans*q_transp;      /* trans this belongs to currently */
+       uint             q_nrefs;       /* # active refs from inodes */
+       xfs_daddr_t      q_blkno;       /* blkno of dquot buffer */
+       int              q_bufoffset;   /* off of dq in buffer (# dquots) */
+       xfs_fileoff_t    q_fileoffset;  /* offset in quotas file */
+
+       struct xfs_dquot*q_gdquot;      /* group dquot, hint only */
+       xfs_disk_dquot_t q_core;        /* actual usage & quotas */
+       xfs_dq_logitem_t q_logitem;     /* dquot log item */
+       xfs_qcnt_t       q_res_bcount;  /* total regular nblks used+reserved */
+       xfs_qcnt_t       q_res_icount;  /* total inos allocd+reserved */
+       xfs_qcnt_t       q_res_rtbcount;/* total realtime blks used+reserved */
+       struct mutex     q_qlock;       /* quota lock */
+       struct completion q_flush;      /* flush completion queue */
+       atomic_t          q_pincount;   /* dquot pin count */
+       wait_queue_head_t q_pinwait;    /* dquot pinning wait queue */
+} xfs_dquot_t;
+
+/*
+ * Lock hierarchy for q_qlock:
+ *     XFS_QLOCK_NORMAL is the implicit default,
+ *     XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2
+ */
+enum {
+       XFS_QLOCK_NORMAL = 0,
+       XFS_QLOCK_NESTED,
+};
+
+#define XFS_DQHOLD(dqp)                ((dqp)->q_nrefs++)
+
+/*
+ * Manage the q_flush completion queue embedded in the dquot.  This completion
+ * queue synchronizes processes attempting to flush the in-core dquot back to
+ * disk.
+ */
+static inline void xfs_dqflock(xfs_dquot_t *dqp)
+{
+       wait_for_completion(&dqp->q_flush);
+}
+
+static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp)
+{
+       return try_wait_for_completion(&dqp->q_flush);
+}
+
+static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
+{
+       complete(&dqp->q_flush);
+}
+
+#define XFS_DQ_IS_LOCKED(dqp)  (mutex_is_locked(&((dqp)->q_qlock)))
+#define XFS_DQ_IS_DIRTY(dqp)   ((dqp)->dq_flags & XFS_DQ_DIRTY)
+#define XFS_QM_ISUDQ(dqp)      ((dqp)->dq_flags & XFS_DQ_USER)
+#define XFS_QM_ISPDQ(dqp)      ((dqp)->dq_flags & XFS_DQ_PROJ)
+#define XFS_QM_ISGDQ(dqp)      ((dqp)->dq_flags & XFS_DQ_GROUP)
+#define XFS_DQ_TO_QINF(dqp)    ((dqp)->q_mount->m_quotainfo)
+#define XFS_DQ_TO_QIP(dqp)     (XFS_QM_ISUDQ(dqp) ? \
+                                XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \
+                                XFS_DQ_TO_QINF(dqp)->qi_gquotaip)
+
+#define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \
+                                    (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
+                                    (XFS_IS_OQUOTA_ON((d)->q_mount))))
+
+extern void            xfs_qm_dqdestroy(xfs_dquot_t *);
+extern int             xfs_qm_dqflush(xfs_dquot_t *, uint);
+extern int             xfs_qm_dqpurge(xfs_dquot_t *);
+extern void            xfs_qm_dqunpin_wait(xfs_dquot_t *);
+extern int             xfs_qm_dqlock_nowait(xfs_dquot_t *);
+extern void            xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);
+extern void            xfs_qm_adjust_dqtimers(xfs_mount_t *,
+                                       xfs_disk_dquot_t *);
+extern void            xfs_qm_adjust_dqlimits(xfs_mount_t *,
+                                       xfs_disk_dquot_t *);
+extern int             xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
+                                       xfs_dqid_t, uint, uint, xfs_dquot_t **);
+extern void            xfs_qm_dqput(xfs_dquot_t *);
+extern void            xfs_dqlock(xfs_dquot_t *);
+extern void            xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *);
+extern void            xfs_dqunlock(xfs_dquot_t *);
+extern void            xfs_dqunlock_nonotify(xfs_dquot_t *);
+
+#endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c

new file mode 100644 (file)

index 0000000..9e0e2fa
--- /dev/null
+++ b/fs/xfs/xfs_dquot_item.c
@@ -0,0 +1,529 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_priv.h"
+#include "xfs_qm.h"
+
+static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip)
+{
+       return container_of(lip, struct xfs_dq_logitem, qli_item);
+}
+
+/*
+ * returns the number of iovecs needed to log the given dquot item.
+ */
+STATIC uint
+xfs_qm_dquot_logitem_size(
+       struct xfs_log_item     *lip)
+{
+       /*
+        * we need only two iovecs, one for the format, one for the real thing
+        */
+       return 2;
+}
+
+/*
+ * fills in the vector of log iovecs for the given dquot log item.
+ */
+STATIC void
+xfs_qm_dquot_logitem_format(
+       struct xfs_log_item     *lip,
+       struct xfs_log_iovec    *logvec)
+{
+       struct xfs_dq_logitem   *qlip = DQUOT_ITEM(lip);
+
+       logvec->i_addr = &qlip->qli_format;
+       logvec->i_len  = sizeof(xfs_dq_logformat_t);
+       logvec->i_type = XLOG_REG_TYPE_QFORMAT;
+       logvec++;
+       logvec->i_addr = &qlip->qli_dquot->q_core;
+       logvec->i_len  = sizeof(xfs_disk_dquot_t);
+       logvec->i_type = XLOG_REG_TYPE_DQUOT;
+
+       ASSERT(2 == lip->li_desc->lid_size);
+       qlip->qli_format.qlf_size = 2;
+
+}
+
+/*
+ * Increment the pin count of the given dquot.
+ */
+STATIC void
+xfs_qm_dquot_logitem_pin(
+       struct xfs_log_item     *lip)
+{
+       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
+
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+       atomic_inc(&dqp->q_pincount);
+}
+
+/*
+ * Decrement the pin count of the given dquot, and wake up
+ * anyone in xfs_dqwait_unpin() if the count goes to 0.         The
+ * dquot must have been previously pinned with a call to
+ * xfs_qm_dquot_logitem_pin().
+ */
+STATIC void
+xfs_qm_dquot_logitem_unpin(
+       struct xfs_log_item     *lip,
+       int                     remove)
+{
+       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
+
+       ASSERT(atomic_read(&dqp->q_pincount) > 0);
+       if (atomic_dec_and_test(&dqp->q_pincount))
+               wake_up(&dqp->q_pinwait);
+}
+
+/*
+ * Given the logitem, this writes the corresponding dquot entry to disk
+ * asynchronously. This is called with the dquot entry securely locked;
+ * we simply get xfs_qm_dqflush() to do the work, and unlock the dquot
+ * at the end.
+ */
+STATIC void
+xfs_qm_dquot_logitem_push(
+       struct xfs_log_item     *lip)
+{
+       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
+       int                     error;
+
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+       ASSERT(!completion_done(&dqp->q_flush));
+
+       /*
+        * Since we were able to lock the dquot's flush lock and
+        * we found it on the AIL, the dquot must be dirty.  This
+        * is because the dquot is removed from the AIL while still
+        * holding the flush lock in xfs_dqflush_done().  Thus, if
+        * we found it in the AIL and were able to obtain the flush
+        * lock without sleeping, then there must not have been
+        * anyone in the process of flushing the dquot.
+        */
+       error = xfs_qm_dqflush(dqp, 0);
+       if (error)
+               xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
+                       __func__, error, dqp);
+       xfs_dqunlock(dqp);
+}
+
+STATIC xfs_lsn_t
+xfs_qm_dquot_logitem_committed(
+       struct xfs_log_item     *lip,
+       xfs_lsn_t               lsn)
+{
+       /*
+        * We always re-log the entire dquot when it becomes dirty,
+        * so, the latest copy _is_ the only one that matters.
+        */
+       return lsn;
+}
+
+/*
+ * This is called to wait for the given dquot to be unpinned.
+ * Most of these pin/unpin routines are plagiarized from inode code.
+ */
+void
+xfs_qm_dqunpin_wait(
+       struct xfs_dquot        *dqp)
+{
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+       if (atomic_read(&dqp->q_pincount) == 0)
+               return;
+
+       /*
+        * Give the log a push so we don't wait here too long.
+        */
+       xfs_log_force(dqp->q_mount, 0);
+       wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
+}
+
+/*
+ * This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that
+ * the dquot is locked by us, but the flush lock isn't. So, here we are
+ * going to see if the relevant dquot buffer is incore, waiting on DELWRI.
+ * If so, we want to push it out to help us take this item off the AIL as soon
+ * as possible.
+ *
+ * We must not be holding the AIL lock at this point. Calling incore() to
+ * search the buffer cache can be a time consuming thing, and AIL lock is a
+ * spinlock.
+ */
+STATIC void
+xfs_qm_dquot_logitem_pushbuf(
+       struct xfs_log_item     *lip)
+{
+       struct xfs_dq_logitem   *qlip = DQUOT_ITEM(lip);
+       struct xfs_dquot        *dqp = qlip->qli_dquot;
+       struct xfs_buf          *bp;
+
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+       /*
+        * If flushlock isn't locked anymore, chances are that the
+        * inode flush completed and the inode was taken off the AIL.
+        * So, just get out.
+        */
+       if (completion_done(&dqp->q_flush) ||
+           !(lip->li_flags & XFS_LI_IN_AIL)) {
+               xfs_dqunlock(dqp);
+               return;
+       }
+
+       bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno,
+                       dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
+       xfs_dqunlock(dqp);
+       if (!bp)
+               return;
+       if (XFS_BUF_ISDELAYWRITE(bp))
+               xfs_buf_delwri_promote(bp);
+       xfs_buf_relse(bp);
+}
+
+/*
+ * This is called to attempt to lock the dquot associated with this
+ * dquot log item.  Don't sleep on the dquot lock or the flush lock.
+ * If the flush lock is already held, indicating that the dquot has
+ * been or is in the process of being flushed, then see if we can
+ * find the dquot's buffer in the buffer cache without sleeping.  If
+ * we can and it is marked delayed write, then we want to send it out.
+ * We delay doing so until the push routine, though, to avoid sleeping
+ * in any device strategy routines.
+ */
+STATIC uint
+xfs_qm_dquot_logitem_trylock(
+       struct xfs_log_item     *lip)
+{
+       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
+
+       if (atomic_read(&dqp->q_pincount) > 0)
+               return XFS_ITEM_PINNED;
+
+       if (!xfs_qm_dqlock_nowait(dqp))
+               return XFS_ITEM_LOCKED;
+
+       if (!xfs_dqflock_nowait(dqp)) {
+               /*
+                * dquot has already been flushed to the backing buffer,
+                * leave it locked, pushbuf routine will unlock it.
+                */
+               return XFS_ITEM_PUSHBUF;
+       }
+
+       ASSERT(lip->li_flags & XFS_LI_IN_AIL);
+       return XFS_ITEM_SUCCESS;
+}
+
+/*
+ * Unlock the dquot associated with the log item.
+ * Clear the fields of the dquot and dquot log item that
+ * are specific to the current transaction.  If the
+ * hold flags is set, do not unlock the dquot.
+ */
+STATIC void
+xfs_qm_dquot_logitem_unlock(
+       struct xfs_log_item     *lip)
+{
+       struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
+
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+       /*
+        * Clear the transaction pointer in the dquot
+        */
+       dqp->q_transp = NULL;
+
+       /*
+        * dquots are never 'held' from getting unlocked at the end of
+        * a transaction.  Their locking and unlocking is hidden inside the
+        * transaction layer, within trans_commit. Hence, no LI_HOLD flag
+        * for the logitem.
+        */
+       xfs_dqunlock(dqp);
+}
+
+/*
+ * this needs to stamp an lsn into the dquot, I think.
+ * rpc's that look at user dquot's would then have to
+ * push on the dependency recorded in the dquot
+ */
+STATIC void
+xfs_qm_dquot_logitem_committing(
+       struct xfs_log_item     *lip,
+       xfs_lsn_t               lsn)
+{
+}
+
+/*
+ * This is the ops vector for dquots
+ */
+static struct xfs_item_ops xfs_dquot_item_ops = {
+       .iop_size       = xfs_qm_dquot_logitem_size,
+       .iop_format     = xfs_qm_dquot_logitem_format,
+       .iop_pin        = xfs_qm_dquot_logitem_pin,
+       .iop_unpin      = xfs_qm_dquot_logitem_unpin,
+       .iop_trylock    = xfs_qm_dquot_logitem_trylock,
+       .iop_unlock     = xfs_qm_dquot_logitem_unlock,
+       .iop_committed  = xfs_qm_dquot_logitem_committed,
+       .iop_push       = xfs_qm_dquot_logitem_push,
+       .iop_pushbuf    = xfs_qm_dquot_logitem_pushbuf,
+       .iop_committing = xfs_qm_dquot_logitem_committing
+};
+
+/*
+ * Initialize the dquot log item for a newly allocated dquot.
+ * The dquot isn't locked at this point, but it isn't on any of the lists
+ * either, so we don't care.
+ */
+void
+xfs_qm_dquot_logitem_init(
+       struct xfs_dquot        *dqp)
+{
+       struct xfs_dq_logitem   *lp = &dqp->q_logitem;
+
+       xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT,
+                                       &xfs_dquot_item_ops);
+       lp->qli_dquot = dqp;
+       lp->qli_format.qlf_type = XFS_LI_DQUOT;
+       lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id);
+       lp->qli_format.qlf_blkno = dqp->q_blkno;
+       lp->qli_format.qlf_len = 1;
+       /*
+        * This is just the offset of this dquot within its buffer
+        * (which is currently 1 FSB and probably won't change).
+        * Hence 32 bits for this offset should be just fine.
+        * Alternatively, we can store (bufoffset / sizeof(xfs_dqblk_t))
+        * here, and recompute it at recovery time.
+        */
+       lp->qli_format.qlf_boffset = (__uint32_t)dqp->q_bufoffset;
+}
+
+/*------------------  QUOTAOFF LOG ITEMS  -------------------*/
+
+static inline struct xfs_qoff_logitem *QOFF_ITEM(struct xfs_log_item *lip)
+{
+       return container_of(lip, struct xfs_qoff_logitem, qql_item);
+}
+
+
+/*
+ * This returns the number of iovecs needed to log the given quotaoff item.
+ * We only need 1 iovec for an quotaoff item.  It just logs the
+ * quotaoff_log_format structure.
+ */
+STATIC uint
+xfs_qm_qoff_logitem_size(
+       struct xfs_log_item     *lip)
+{
+       return 1;
+}
+
+/*
+ * This is called to fill in the vector of log iovecs for the
+ * given quotaoff log item. We use only 1 iovec, and we point that
+ * at the quotaoff_log_format structure embedded in the quotaoff item.
+ * It is at this point that we assert that all of the extent
+ * slots in the quotaoff item have been filled.
+ */
+STATIC void
+xfs_qm_qoff_logitem_format(
+       struct xfs_log_item     *lip,
+       struct xfs_log_iovec    *log_vector)
+{
+       struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip);
+
+       ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF);
+
+       log_vector->i_addr = &qflip->qql_format;
+       log_vector->i_len = sizeof(xfs_qoff_logitem_t);
+       log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF;
+       qflip->qql_format.qf_size = 1;
+}
+
+/*
+ * Pinning has no meaning for an quotaoff item, so just return.
+ */
+STATIC void
+xfs_qm_qoff_logitem_pin(
+       struct xfs_log_item     *lip)
+{
+}
+
+/*
+ * Since pinning has no meaning for an quotaoff item, unpinning does
+ * not either.
+ */
+STATIC void
+xfs_qm_qoff_logitem_unpin(
+       struct xfs_log_item     *lip,
+       int                     remove)
+{
+}
+
+/*
+ * Quotaoff items have no locking, so just return success.
+ */
+STATIC uint
+xfs_qm_qoff_logitem_trylock(
+       struct xfs_log_item     *lip)
+{
+       return XFS_ITEM_LOCKED;
+}
+
+/*
+ * Quotaoff items have no locking or pushing, so return failure
+ * so that the caller doesn't bother with us.
+ */
+STATIC void
+xfs_qm_qoff_logitem_unlock(
+       struct xfs_log_item     *lip)
+{
+}
+
+/*
+ * The quotaoff-start-item is logged only once and cannot be moved in the log,
+ * so simply return the lsn at which it's been logged.
+ */
+STATIC xfs_lsn_t
+xfs_qm_qoff_logitem_committed(
+       struct xfs_log_item     *lip,
+       xfs_lsn_t               lsn)
+{
+       return lsn;
+}
+
+/*
+ * There isn't much you can do to push on an quotaoff item.  It is simply
+ * stuck waiting for the log to be flushed to disk.
+ */
+STATIC void
+xfs_qm_qoff_logitem_push(
+       struct xfs_log_item     *lip)
+{
+}
+
+
+STATIC xfs_lsn_t
+xfs_qm_qoffend_logitem_committed(
+       struct xfs_log_item     *lip,
+       xfs_lsn_t               lsn)
+{
+       struct xfs_qoff_logitem *qfe = QOFF_ITEM(lip);
+       struct xfs_qoff_logitem *qfs = qfe->qql_start_lip;
+       struct xfs_ail          *ailp = qfs->qql_item.li_ailp;
+
+       /*
+        * Delete the qoff-start logitem from the AIL.
+        * xfs_trans_ail_delete() drops the AIL lock.
+        */
+       spin_lock(&ailp->xa_lock);
+       xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs);
+
+       kmem_free(qfs);
+       kmem_free(qfe);
+       return (xfs_lsn_t)-1;
+}
+
+/*
+ * XXX rcc - don't know quite what to do with this.  I think we can
+ * just ignore it.  The only time that isn't the case is if we allow
+ * the client to somehow see that quotas have been turned off in which
+ * we can't allow that to get back until the quotaoff hits the disk.
+ * So how would that happen?  Also, do we need different routines for
+ * quotaoff start and quotaoff end?  I suspect the answer is yes but
+ * to be sure, I need to look at the recovery code and see how quota off
+ * recovery is handled (do we roll forward or back or do something else).
+ * If we roll forwards or backwards, then we need two separate routines,
+ * one that does nothing and one that stamps in the lsn that matters
+ * (truly makes the quotaoff irrevocable).  If we do something else,
+ * then maybe we don't need two.
+ */
+STATIC void
+xfs_qm_qoff_logitem_committing(
+       struct xfs_log_item     *lip,
+       xfs_lsn_t               commit_lsn)
+{
+}
+
+static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
+       .iop_size       = xfs_qm_qoff_logitem_size,
+       .iop_format     = xfs_qm_qoff_logitem_format,
+       .iop_pin        = xfs_qm_qoff_logitem_pin,
+       .iop_unpin      = xfs_qm_qoff_logitem_unpin,
+       .iop_trylock    = xfs_qm_qoff_logitem_trylock,
+       .iop_unlock     = xfs_qm_qoff_logitem_unlock,
+       .iop_committed  = xfs_qm_qoffend_logitem_committed,
+       .iop_push       = xfs_qm_qoff_logitem_push,
+       .iop_committing = xfs_qm_qoff_logitem_committing
+};
+
+/*
+ * This is the ops vector shared by all quotaoff-start log items.
+ */
+static struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
+       .iop_size       = xfs_qm_qoff_logitem_size,
+       .iop_format     = xfs_qm_qoff_logitem_format,
+       .iop_pin        = xfs_qm_qoff_logitem_pin,
+       .iop_unpin      = xfs_qm_qoff_logitem_unpin,
+       .iop_trylock    = xfs_qm_qoff_logitem_trylock,
+       .iop_unlock     = xfs_qm_qoff_logitem_unlock,
+       .iop_committed  = xfs_qm_qoff_logitem_committed,
+       .iop_push       = xfs_qm_qoff_logitem_push,
+       .iop_committing = xfs_qm_qoff_logitem_committing
+};
+
+/*
+ * Allocate and initialize an quotaoff item of the correct quota type(s).
+ */
+struct xfs_qoff_logitem *
+xfs_qm_qoff_logitem_init(
+       struct xfs_mount        *mp,
+       struct xfs_qoff_logitem *start,
+       uint                    flags)
+{
+       struct xfs_qoff_logitem *qf;
+
+       qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), KM_SLEEP);
+
+       xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ?
+                       &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops);
+       qf->qql_item.li_mountp = mp;
+       qf->qql_format.qf_type = XFS_LI_QUOTAOFF;
+       qf->qql_format.qf_flags = flags;
+       qf->qql_start_lip = start;
+       return qf;
+}
diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h

new file mode 100644 (file)

index 0000000..5acae2a
--- /dev/null
+++ b/fs/xfs/xfs_dquot_item.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_DQUOT_ITEM_H__
+#define __XFS_DQUOT_ITEM_H__
+
+struct xfs_dquot;
+struct xfs_trans;
+struct xfs_mount;
+struct xfs_qoff_logitem;
+
+typedef struct xfs_dq_logitem {
+       xfs_log_item_t           qli_item;         /* common portion */
+       struct xfs_dquot        *qli_dquot;        /* dquot ptr */
+       xfs_lsn_t                qli_flush_lsn;    /* lsn at last flush */
+       xfs_dq_logformat_t       qli_format;       /* logged structure */
+} xfs_dq_logitem_t;
+
+typedef struct xfs_qoff_logitem {
+       xfs_log_item_t           qql_item;      /* common portion */
+       struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
+       xfs_qoff_logformat_t     qql_format;    /* logged structure */
+} xfs_qoff_logitem_t;
+
+
+extern void               xfs_qm_dquot_logitem_init(struct xfs_dquot *);
+extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *,
+                                       struct xfs_qoff_logitem *, uint);
+extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *,
+                                       struct xfs_qoff_logitem *, uint);
+extern void               xfs_trans_log_quotaoff_item(struct xfs_trans *,
+                                       struct xfs_qoff_logitem *);
+
+#endif /* __XFS_DQUOT_ITEM_H__ */
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c

new file mode 100644 (file)

index 0000000..75e5d32
--- /dev/null
+++ b/fs/xfs/xfs_export.c
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2004-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_types.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir2.h"
+#include "xfs_mount.h"
+#include "xfs_export.h"
+#include "xfs_vnodeops.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_inode_item.h"
+#include "xfs_trace.h"
+
+/*
+ * Note that we only accept fileids which are long enough rather than allow
+ * the parent generation number to default to zero.  XFS considers zero a
+ * valid generation number not an invalid/wildcard value.
+ */
+static int xfs_fileid_length(int fileid_type)
+{
+       switch (fileid_type) {
+       case FILEID_INO32_GEN:
+               return 2;
+       case FILEID_INO32_GEN_PARENT:
+               return 4;
+       case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
+               return 3;
+       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
+               return 6;
+       }
+       return 255; /* invalid */
+}
+
+STATIC int
+xfs_fs_encode_fh(
+       struct dentry           *dentry,
+       __u32                   *fh,
+       int                     *max_len,
+       int                     connectable)
+{
+       struct fid              *fid = (struct fid *)fh;
+       struct xfs_fid64        *fid64 = (struct xfs_fid64 *)fh;
+       struct inode            *inode = dentry->d_inode;
+       int                     fileid_type;
+       int                     len;
+
+       /* Directories don't need their parent encoded, they have ".." */
+       if (S_ISDIR(inode->i_mode) || !connectable)
+               fileid_type = FILEID_INO32_GEN;
+       else
+               fileid_type = FILEID_INO32_GEN_PARENT;
+
+       /*
+        * If the the filesystem may contain 64bit inode numbers, we need
+        * to use larger file handles that can represent them.
+        *
+        * While we only allocate inodes that do not fit into 32 bits any
+        * large enough filesystem may contain them, thus the slightly
+        * confusing looking conditional below.
+        */
+       if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) ||
+           (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES))
+               fileid_type |= XFS_FILEID_TYPE_64FLAG;
+
+       /*
+        * Only encode if there is enough space given.  In practice
+        * this means we can't export a filesystem with 64bit inodes
+        * over NFSv2 with the subtree_check export option; the other
+        * seven combinations work.  The real answer is "don't use v2".
+        */
+       len = xfs_fileid_length(fileid_type);
+       if (*max_len < len) {
+               *max_len = len;
+               return 255;
+       }
+       *max_len = len;
+
+       switch (fileid_type) {
+       case FILEID_INO32_GEN_PARENT:
+               spin_lock(&dentry->d_lock);
+               fid->i32.parent_ino = dentry->d_parent->d_inode->i_ino;
+               fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation;
+               spin_unlock(&dentry->d_lock);
+               /*FALLTHRU*/
+       case FILEID_INO32_GEN:
+               fid->i32.ino = inode->i_ino;
+               fid->i32.gen = inode->i_generation;
+               break;
+       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
+               spin_lock(&dentry->d_lock);
+               fid64->parent_ino = dentry->d_parent->d_inode->i_ino;
+               fid64->parent_gen = dentry->d_parent->d_inode->i_generation;
+               spin_unlock(&dentry->d_lock);
+               /*FALLTHRU*/
+       case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
+               fid64->ino = inode->i_ino;
+               fid64->gen = inode->i_generation;
+               break;
+       }
+
+       return fileid_type;
+}
+
+STATIC struct inode *
+xfs_nfs_get_inode(
+       struct super_block      *sb,
+       u64                     ino,
+       u32                     generation)
+ {
+       xfs_mount_t             *mp = XFS_M(sb);
+       xfs_inode_t             *ip;
+       int                     error;
+
+       /*
+        * NFS can sometimes send requests for ino 0.  Fail them gracefully.
+        */
+       if (ino == 0)
+               return ERR_PTR(-ESTALE);
+
+       /*
+        * The XFS_IGET_UNTRUSTED means that an invalid inode number is just
+        * fine and not an indication of a corrupted filesystem as clients can
+        * send invalid file handles and we have to handle it gracefully..
+        */
+       error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, 0, &ip);
+       if (error) {
+               /*
+                * EINVAL means the inode cluster doesn't exist anymore.
+                * This implies the filehandle is stale, so we should
+                * translate it here.
+                * We don't use ESTALE directly down the chain to not
+                * confuse applications using bulkstat that expect EINVAL.
+                */
+               if (error == EINVAL || error == ENOENT)
+                       error = ESTALE;
+               return ERR_PTR(-error);
+       }
+
+       if (ip->i_d.di_gen != generation) {
+               IRELE(ip);
+               return ERR_PTR(-ESTALE);
+       }
+
+       return VFS_I(ip);
+}
+
+STATIC struct dentry *
+xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+                int fh_len, int fileid_type)
+{
+       struct xfs_fid64        *fid64 = (struct xfs_fid64 *)fid;
+       struct inode            *inode = NULL;
+
+       if (fh_len < xfs_fileid_length(fileid_type))
+               return NULL;
+
+       switch (fileid_type) {
+       case FILEID_INO32_GEN_PARENT:
+       case FILEID_INO32_GEN:
+               inode = xfs_nfs_get_inode(sb, fid->i32.ino, fid->i32.gen);
+               break;
+       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
+       case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
+               inode = xfs_nfs_get_inode(sb, fid64->ino, fid64->gen);
+               break;
+       }
+
+       return d_obtain_alias(inode);
+}
+
+STATIC struct dentry *
+xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid,
+                int fh_len, int fileid_type)
+{
+       struct xfs_fid64        *fid64 = (struct xfs_fid64 *)fid;
+       struct inode            *inode = NULL;
+
+       switch (fileid_type) {
+       case FILEID_INO32_GEN_PARENT:
+               inode = xfs_nfs_get_inode(sb, fid->i32.parent_ino,
+                                             fid->i32.parent_gen);
+               break;
+       case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
+               inode = xfs_nfs_get_inode(sb, fid64->parent_ino,
+                                             fid64->parent_gen);
+               break;
+       }
+
+       return d_obtain_alias(inode);
+}
+
+STATIC struct dentry *
+xfs_fs_get_parent(
+       struct dentry           *child)
+{
+       int                     error;
+       struct xfs_inode        *cip;
+
+       error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL);
+       if (unlikely(error))
+               return ERR_PTR(-error);
+
+       return d_obtain_alias(VFS_I(cip));
+}
+
+STATIC int
+xfs_fs_nfs_commit_metadata(
+       struct inode            *inode)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       int                     error = 0;
+
+       xfs_ilock(ip, XFS_ILOCK_SHARED);
+       if (xfs_ipincount(ip)) {
+               error = _xfs_log_force_lsn(mp, ip->i_itemp->ili_last_lsn,
+                               XFS_LOG_SYNC, NULL);
+       }
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+       return error;
+}
+
+const struct export_operations xfs_export_operations = {
+       .encode_fh              = xfs_fs_encode_fh,
+       .fh_to_dentry           = xfs_fs_fh_to_dentry,
+       .fh_to_parent           = xfs_fs_fh_to_parent,
+       .get_parent             = xfs_fs_get_parent,
+       .commit_metadata        = xfs_fs_nfs_commit_metadata,
+};
diff --git a/fs/xfs/xfs_export.h b/fs/xfs/xfs_export.h

new file mode 100644 (file)

index 0000000..3272b6a
--- /dev/null
+++ b/fs/xfs/xfs_export.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_EXPORT_H__
+#define __XFS_EXPORT_H__
+
+/*
+ * Common defines for code related to exporting XFS filesystems over NFS.
+ *
+ * The NFS fileid goes out on the wire as an array of
+ * 32bit unsigned ints in host order.  There are 5 possible
+ * formats.
+ *
+ * (1) fileid_type=0x00
+ *     (no fileid data; handled by the generic code)
+ *
+ * (2) fileid_type=0x01
+ *     inode-num
+ *     generation
+ *
+ * (3) fileid_type=0x02
+ *     inode-num
+ *     generation
+ *     parent-inode-num
+ *     parent-generation
+ *
+ * (4) fileid_type=0x81
+ *     inode-num-lo32
+ *     inode-num-hi32
+ *     generation
+ *
+ * (5) fileid_type=0x82
+ *     inode-num-lo32
+ *     inode-num-hi32
+ *     generation
+ *     parent-inode-num-lo32
+ *     parent-inode-num-hi32
+ *     parent-generation
+ *
+ * Note, the NFS filehandle also includes an fsid portion which
+ * may have an inode number in it.  That number is hardcoded to
+ * 32bits and there is no way for XFS to intercept it.  In
+ * practice this means when exporting an XFS filesystem with 64bit
+ * inodes you should either export the mountpoint (rather than
+ * a subdirectory) or use the "fsid" export option.
+ */
+
+struct xfs_fid64 {
+       u64 ino;
+       u32 gen;
+       u64 parent_ino;
+       u32 parent_gen;
+} __attribute__((packed));
+
+/* This flag goes on the wire.  Don't play with it. */
+#define XFS_FILEID_TYPE_64FLAG 0x80    /* NFS fileid has 64bit inodes */
+
+#endif /* __XFS_EXPORT_H__ */
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c

new file mode 100644 (file)

index 0000000..7f7b424
--- /dev/null
+++ b/fs/xfs/xfs_file.c
@@ -0,0 +1,1096 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_trans.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_inode_item.h"
+#include "xfs_bmap.h"
+#include "xfs_error.h"
+#include "xfs_vnodeops.h"
+#include "xfs_da_btree.h"
+#include "xfs_ioctl.h"
+#include "xfs_trace.h"
+
+#include <linux/dcache.h>
+#include <linux/falloc.h>
+
+static const struct vm_operations_struct xfs_file_vm_ops;
+
+/*
+ * Locking primitives for read and write IO paths to ensure we consistently use
+ * and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
+ */
+static inline void
+xfs_rw_ilock(
+       struct xfs_inode        *ip,
+       int                     type)
+{
+       if (type & XFS_IOLOCK_EXCL)
+               mutex_lock(&VFS_I(ip)->i_mutex);
+       xfs_ilock(ip, type);
+}
+
+static inline void
+xfs_rw_iunlock(
+       struct xfs_inode        *ip,
+       int                     type)
+{
+       xfs_iunlock(ip, type);
+       if (type & XFS_IOLOCK_EXCL)
+               mutex_unlock(&VFS_I(ip)->i_mutex);
+}
+
+static inline void
+xfs_rw_ilock_demote(
+       struct xfs_inode        *ip,
+       int                     type)
+{
+       xfs_ilock_demote(ip, type);
+       if (type & XFS_IOLOCK_EXCL)
+               mutex_unlock(&VFS_I(ip)->i_mutex);
+}
+
+/*
+ *     xfs_iozero
+ *
+ *     xfs_iozero clears the specified range of buffer supplied,
+ *     and marks all the affected blocks as valid and modified.  If
+ *     an affected block is not allocated, it will be allocated.  If
+ *     an affected block is not completely overwritten, and is not
+ *     valid before the operation, it will be read from disk before
+ *     being partially zeroed.
+ */
+STATIC int
+xfs_iozero(
+       struct xfs_inode        *ip,    /* inode                        */
+       loff_t                  pos,    /* offset in file               */
+       size_t                  count)  /* size of data to zero         */
+{
+       struct page             *page;
+       struct address_space    *mapping;
+       int                     status;
+
+       mapping = VFS_I(ip)->i_mapping;
+       do {
+               unsigned offset, bytes;
+               void *fsdata;
+
+               offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
+               bytes = PAGE_CACHE_SIZE - offset;
+               if (bytes > count)
+                       bytes = count;
+
+               status = pagecache_write_begin(NULL, mapping, pos, bytes,
+                                       AOP_FLAG_UNINTERRUPTIBLE,
+                                       &page, &fsdata);
+               if (status)
+                       break;
+
+               zero_user(page, offset, bytes);
+
+               status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
+                                       page, fsdata);
+               WARN_ON(status <= 0); /* can't return less than zero! */
+               pos += bytes;
+               count -= bytes;
+               status = 0;
+       } while (count);
+
+       return (-status);
+}
+
+STATIC int
+xfs_file_fsync(
+       struct file             *file,
+       loff_t                  start,
+       loff_t                  end,
+       int                     datasync)
+{
+       struct inode            *inode = file->f_mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_trans        *tp;
+       int                     error = 0;
+       int                     log_flushed = 0;
+
+       trace_xfs_file_fsync(ip);
+
+       error = filemap_write_and_wait_range(inode->i_mapping, start, end);
+       if (error)
+               return error;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -XFS_ERROR(EIO);
+
+       xfs_iflags_clear(ip, XFS_ITRUNCATED);
+
+       xfs_ilock(ip, XFS_IOLOCK_SHARED);
+       xfs_ioend_wait(ip);
+       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+
+       if (mp->m_flags & XFS_MOUNT_BARRIER) {
+               /*
+                * If we have an RT and/or log subvolume we need to make sure
+                * to flush the write cache the device used for file data
+                * first.  This is to ensure newly written file data make
+                * it to disk before logging the new inode size in case of
+                * an extending write.
+                */
+               if (XFS_IS_REALTIME_INODE(ip))
+                       xfs_blkdev_issue_flush(mp->m_rtdev_targp);
+               else if (mp->m_logdev_targp != mp->m_ddev_targp)
+                       xfs_blkdev_issue_flush(mp->m_ddev_targp);
+       }
+
+       /*
+        * We always need to make sure that the required inode state is safe on
+        * disk.  The inode might be clean but we still might need to force the
+        * log because of committed transactions that haven't hit the disk yet.
+        * Likewise, there could be unflushed non-transactional changes to the
+        * inode core that have to go to disk and this requires us to issue
+        * a synchronous transaction to capture these changes correctly.
+        *
+        * This code relies on the assumption that if the i_update_core field
+        * of the inode is clear and the inode is unpinned then it is clean
+        * and no action is required.
+        */
+       xfs_ilock(ip, XFS_ILOCK_SHARED);
+
+       /*
+        * First check if the VFS inode is marked dirty.  All the dirtying
+        * of non-transactional updates no goes through mark_inode_dirty*,
+        * which allows us to distinguish beteeen pure timestamp updates
+        * and i_size updates which need to be caught for fdatasync.
+        * After that also theck for the dirty state in the XFS inode, which
+        * might gets cleared when the inode gets written out via the AIL
+        * or xfs_iflush_cluster.
+        */
+       if (((inode->i_state & I_DIRTY_DATASYNC) ||
+           ((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
+           ip->i_update_core) {
+               /*
+                * Kick off a transaction to log the inode core to get the
+                * updates.  The sync transaction will also force the log.
+                */
+               xfs_iunlock(ip, XFS_ILOCK_SHARED);
+               tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+               error = xfs_trans_reserve(tp, 0,
+                               XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+               if (error) {
+                       xfs_trans_cancel(tp, 0);
+                       return -error;
+               }
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+               /*
+                * Note - it's possible that we might have pushed ourselves out
+                * of the way during trans_reserve which would flush the inode.
+                * But there's no guarantee that the inode buffer has actually
+                * gone out yet (it's delwri).  Plus the buffer could be pinned
+                * anyway if it's part of an inode in another recent
+                * transaction.  So we play it safe and fire off the
+                * transaction anyway.
+                */
+               xfs_trans_ijoin(tp, ip);
+               xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+               xfs_trans_set_sync(tp);
+               error = _xfs_trans_commit(tp, 0, &log_flushed);
+
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       } else {
+               /*
+                * Timestamps/size haven't changed since last inode flush or
+                * inode transaction commit.  That means either nothing got
+                * written or a transaction committed which caught the updates.
+                * If the latter happened and the transaction hasn't hit the
+                * disk yet, the inode will be still be pinned.  If it is,
+                * force the log.
+                */
+               if (xfs_ipincount(ip)) {
+                       error = _xfs_log_force_lsn(mp,
+                                       ip->i_itemp->ili_last_lsn,
+                                       XFS_LOG_SYNC, &log_flushed);
+               }
+               xfs_iunlock(ip, XFS_ILOCK_SHARED);
+       }
+
+       /*
+        * If we only have a single device, and the log force about was
+        * a no-op we might have to flush the data device cache here.
+        * This can only happen for fdatasync/O_DSYNC if we were overwriting
+        * an already allocated file and thus do not have any metadata to
+        * commit.
+        */
+       if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
+           mp->m_logdev_targp == mp->m_ddev_targp &&
+           !XFS_IS_REALTIME_INODE(ip) &&
+           !log_flushed)
+               xfs_blkdev_issue_flush(mp->m_ddev_targp);
+
+       return -error;
+}
+
+STATIC ssize_t
+xfs_file_aio_read(
+       struct kiocb            *iocb,
+       const struct iovec      *iovp,
+       unsigned long           nr_segs,
+       loff_t                  pos)
+{
+       struct file             *file = iocb->ki_filp;
+       struct inode            *inode = file->f_mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       size_t                  size = 0;
+       ssize_t                 ret = 0;
+       int                     ioflags = 0;
+       xfs_fsize_t             n;
+       unsigned long           seg;
+
+       XFS_STATS_INC(xs_read_calls);
+
+       BUG_ON(iocb->ki_pos != pos);
+
+       if (unlikely(file->f_flags & O_DIRECT))
+               ioflags |= IO_ISDIRECT;
+       if (file->f_mode & FMODE_NOCMTIME)
+               ioflags |= IO_INVIS;
+
+       /* START copy & waste from filemap.c */
+       for (seg = 0; seg < nr_segs; seg++) {
+               const struct iovec *iv = &iovp[seg];
+
+               /*
+                * If any segment has a negative length, or the cumulative
+                * length ever wraps negative then return -EINVAL.
+                */
+               size += iv->iov_len;
+               if (unlikely((ssize_t)(size|iv->iov_len) < 0))
+                       return XFS_ERROR(-EINVAL);
+       }
+       /* END copy & waste from filemap.c */
+
+       if (unlikely(ioflags & IO_ISDIRECT)) {
+               xfs_buftarg_t   *target =
+                       XFS_IS_REALTIME_INODE(ip) ?
+                               mp->m_rtdev_targp : mp->m_ddev_targp;
+               if ((iocb->ki_pos & target->bt_smask) ||
+                   (size & target->bt_smask)) {
+                       if (iocb->ki_pos == ip->i_size)
+                               return 0;
+                       return -XFS_ERROR(EINVAL);
+               }
+       }
+
+       n = XFS_MAXIOFFSET(mp) - iocb->ki_pos;
+       if (n <= 0 || size == 0)
+               return 0;
+
+       if (n < size)
+               size = n;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -EIO;
+
+       if (unlikely(ioflags & IO_ISDIRECT)) {
+               xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
+
+               if (inode->i_mapping->nrpages) {
+                       ret = -xfs_flushinval_pages(ip,
+                                       (iocb->ki_pos & PAGE_CACHE_MASK),
+                                       -1, FI_REMAPF_LOCKED);
+                       if (ret) {
+                               xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
+                               return ret;
+                       }
+               }
+               xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+       } else
+               xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
+
+       trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
+
+       ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos);
+       if (ret > 0)
+               XFS_STATS_ADD(xs_read_bytes, ret);
+
+       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+       return ret;
+}
+
+STATIC ssize_t
+xfs_file_splice_read(
+       struct file             *infilp,
+       loff_t                  *ppos,
+       struct pipe_inode_info  *pipe,
+       size_t                  count,
+       unsigned int            flags)
+{
+       struct xfs_inode        *ip = XFS_I(infilp->f_mapping->host);
+       int                     ioflags = 0;
+       ssize_t                 ret;
+
+       XFS_STATS_INC(xs_read_calls);
+
+       if (infilp->f_mode & FMODE_NOCMTIME)
+               ioflags |= IO_INVIS;
+
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+               return -EIO;
+
+       xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
+
+       trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
+
+       ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
+       if (ret > 0)
+               XFS_STATS_ADD(xs_read_bytes, ret);
+
+       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+       return ret;
+}
+
+STATIC void
+xfs_aio_write_isize_update(
+       struct inode    *inode,
+       loff_t          *ppos,
+       ssize_t         bytes_written)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       xfs_fsize_t             isize = i_size_read(inode);
+
+       if (bytes_written > 0)
+               XFS_STATS_ADD(xs_write_bytes, bytes_written);
+
+       if (unlikely(bytes_written < 0 && bytes_written != -EFAULT &&
+                                       *ppos > isize))
+               *ppos = isize;
+
+       if (*ppos > ip->i_size) {
+               xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
+               if (*ppos > ip->i_size)
+                       ip->i_size = *ppos;
+               xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
+       }
+}
+
+/*
+ * If this was a direct or synchronous I/O that failed (such as ENOSPC) then
+ * part of the I/O may have been written to disk before the error occurred.  In
+ * this case the on-disk file size may have been adjusted beyond the in-memory
+ * file size and now needs to be truncated back.
+ */
+STATIC void
+xfs_aio_write_newsize_update(
+       struct xfs_inode        *ip)
+{
+       if (ip->i_new_size) {
+               xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
+               ip->i_new_size = 0;
+               if (ip->i_d.di_size > ip->i_size)
+                       ip->i_d.di_size = ip->i_size;
+               xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
+       }
+}
+
+/*
+ * xfs_file_splice_write() does not use xfs_rw_ilock() because
+ * generic_file_splice_write() takes the i_mutex itself. This, in theory,
+ * couuld cause lock inversions between the aio_write path and the splice path
+ * if someone is doing concurrent splice(2) based writes and write(2) based
+ * writes to the same inode. The only real way to fix this is to re-implement
+ * the generic code here with correct locking orders.
+ */
+STATIC ssize_t
+xfs_file_splice_write(
+       struct pipe_inode_info  *pipe,
+       struct file             *outfilp,
+       loff_t                  *ppos,
+       size_t                  count,
+       unsigned int            flags)
+{
+       struct inode            *inode = outfilp->f_mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       xfs_fsize_t             new_size;
+       int                     ioflags = 0;
+       ssize_t                 ret;
+
+       XFS_STATS_INC(xs_write_calls);
+
+       if (outfilp->f_mode & FMODE_NOCMTIME)
+               ioflags |= IO_INVIS;
+
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+               return -EIO;
+
+       xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+       new_size = *ppos + count;
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       if (new_size > ip->i_size)
+               ip->i_new_size = new_size;
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+       trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
+
+       ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
+
+       xfs_aio_write_isize_update(inode, ppos, ret);
+       xfs_aio_write_newsize_update(ip);
+       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+       return ret;
+}
+
+/*
+ * This routine is called to handle zeroing any space in the last
+ * block of the file that is beyond the EOF.  We do this since the
+ * size is being increased without writing anything to that block
+ * and we don't want anyone to read the garbage on the disk.
+ */
+STATIC int                             /* error (positive) */
+xfs_zero_last_block(
+       xfs_inode_t     *ip,
+       xfs_fsize_t     offset,
+       xfs_fsize_t     isize)
+{
+       xfs_fileoff_t   last_fsb;
+       xfs_mount_t     *mp = ip->i_mount;
+       int             nimaps;
+       int             zero_offset;
+       int             zero_len;
+       int             error = 0;
+       xfs_bmbt_irec_t imap;
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
+       zero_offset = XFS_B_FSB_OFFSET(mp, isize);
+       if (zero_offset == 0) {
+               /*
+                * There are no extra bytes in the last block on disk to
+                * zero, so return.
+                */
+               return 0;
+       }
+
+       last_fsb = XFS_B_TO_FSBT(mp, isize);
+       nimaps = 1;
+       error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
+                         &nimaps, NULL);
+       if (error) {
+               return error;
+       }
+       ASSERT(nimaps > 0);
+       /*
+        * If the block underlying isize is just a hole, then there
+        * is nothing to zero.
+        */
+       if (imap.br_startblock == HOLESTARTBLOCK) {
+               return 0;
+       }
+       /*
+        * Zero the part of the last block beyond the EOF, and write it
+        * out sync.  We need to drop the ilock while we do this so we
+        * don't deadlock when the buffer cache calls back to us.
+        */
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+       zero_len = mp->m_sb.sb_blocksize - zero_offset;
+       if (isize + zero_len > offset)
+               zero_len = offset - isize;
+       error = xfs_iozero(ip, isize, zero_len);
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       ASSERT(error >= 0);
+       return error;
+}
+
+/*
+ * Zero any on disk space between the current EOF and the new,
+ * larger EOF.  This handles the normal case of zeroing the remainder
+ * of the last block in the file and the unusual case of zeroing blocks
+ * out beyond the size of the file.  This second case only happens
+ * with fixed size extents and when the system crashes before the inode
+ * size was updated but after blocks were allocated.  If fill is set,
+ * then any holes in the range are filled and zeroed.  If not, the holes
+ * are left alone as holes.
+ */
+
+int                                    /* error (positive) */
+xfs_zero_eof(
+       xfs_inode_t     *ip,
+       xfs_off_t       offset,         /* starting I/O offset */
+       xfs_fsize_t     isize)          /* current inode size */
+{
+       xfs_mount_t     *mp = ip->i_mount;
+       xfs_fileoff_t   start_zero_fsb;
+       xfs_fileoff_t   end_zero_fsb;
+       xfs_fileoff_t   zero_count_fsb;
+       xfs_fileoff_t   last_fsb;
+       xfs_fileoff_t   zero_off;
+       xfs_fsize_t     zero_len;
+       int             nimaps;
+       int             error = 0;
+       xfs_bmbt_irec_t imap;
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+       ASSERT(offset > isize);
+
+       /*
+        * First handle zeroing the block on which isize resides.
+        * We only zero a part of that block so it is handled specially.
+        */
+       error = xfs_zero_last_block(ip, offset, isize);
+       if (error) {
+               ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+               return error;
+       }
+
+       /*
+        * Calculate the range between the new size and the old
+        * where blocks needing to be zeroed may exist.  To get the
+        * block where the last byte in the file currently resides,
+        * we need to subtract one from the size and truncate back
+        * to a block boundary.  We subtract 1 in case the size is
+        * exactly on a block boundary.
+        */
+       last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
+       start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
+       end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
+       ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
+       if (last_fsb == end_zero_fsb) {
+               /*
+                * The size was only incremented on its last block.
+                * We took care of that above, so just return.
+                */
+               return 0;
+       }
+
+       ASSERT(start_zero_fsb <= end_zero_fsb);
+       while (start_zero_fsb <= end_zero_fsb) {
+               nimaps = 1;
+               zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
+               error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
+                                 0, NULL, 0, &imap, &nimaps, NULL);
+               if (error) {
+                       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+                       return error;
+               }
+               ASSERT(nimaps > 0);
+
+               if (imap.br_state == XFS_EXT_UNWRITTEN ||
+                   imap.br_startblock == HOLESTARTBLOCK) {
+                       /*
+                        * This loop handles initializing pages that were
+                        * partially initialized by the code below this
+                        * loop. It basically zeroes the part of the page
+                        * that sits on a hole and sets the page as P_HOLE
+                        * and calls remapf if it is a mapped file.
+                        */
+                       start_zero_fsb = imap.br_startoff + imap.br_blockcount;
+                       ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
+                       continue;
+               }
+
+               /*
+                * There are blocks we need to zero.
+                * Drop the inode lock while we're doing the I/O.
+                * We'll still have the iolock to protect us.
+                */
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+               zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
+               zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
+
+               if ((zero_off + zero_len) > offset)
+                       zero_len = offset - zero_off;
+
+               error = xfs_iozero(ip, zero_off, zero_len);
+               if (error) {
+                       goto out_lock;
+               }
+
+               start_zero_fsb = imap.br_startoff + imap.br_blockcount;
+               ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
+
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+       }
+
+       return 0;
+
+out_lock:
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       ASSERT(error >= 0);
+       return error;
+}
+
+/*
+ * Common pre-write limit and setup checks.
+ *
+ * Returns with iolock held according to @iolock.
+ */
+STATIC ssize_t
+xfs_file_aio_write_checks(
+       struct file             *file,
+       loff_t                  *pos,
+       size_t                  *count,
+       int                     *iolock)
+{
+       struct inode            *inode = file->f_mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       xfs_fsize_t             new_size;
+       int                     error = 0;
+
+       error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
+       if (error) {
+               xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
+               *iolock = 0;
+               return error;
+       }
+
+       new_size = *pos + *count;
+       if (new_size > ip->i_size)
+               ip->i_new_size = new_size;
+
+       if (likely(!(file->f_mode & FMODE_NOCMTIME)))
+               file_update_time(file);
+
+       /*
+        * If the offset is beyond the size of the file, we need to zero any
+        * blocks that fall between the existing EOF and the start of this
+        * write.
+        */
+       if (*pos > ip->i_size)
+               error = -xfs_zero_eof(ip, *pos, ip->i_size);
+
+       xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
+       if (error)
+               return error;
+
+       /*
+        * If we're writing the file then make sure to clear the setuid and
+        * setgid bits if the process is not being run by root.  This keeps
+        * people from modifying setuid and setgid binaries.
+        */
+       return file_remove_suid(file);
+
+}
+
+/*
+ * xfs_file_dio_aio_write - handle direct IO writes
+ *
+ * Lock the inode appropriately to prepare for and issue a direct IO write.
+ * By separating it from the buffered write path we remove all the tricky to
+ * follow locking changes and looping.
+ *
+ * If there are cached pages or we're extending the file, we need IOLOCK_EXCL
+ * until we're sure the bytes at the new EOF have been zeroed and/or the cached
+ * pages are flushed out.
+ *
+ * In most cases the direct IO writes will be done holding IOLOCK_SHARED
+ * allowing them to be done in parallel with reads and other direct IO writes.
+ * However, if the IO is not aligned to filesystem blocks, the direct IO layer
+ * needs to do sub-block zeroing and that requires serialisation against other
+ * direct IOs to the same block. In this case we need to serialise the
+ * submission of the unaligned IOs so that we don't get racing block zeroing in
+ * the dio layer.  To avoid the problem with aio, we also need to wait for
+ * outstanding IOs to complete so that unwritten extent conversion is completed
+ * before we try to map the overlapping block. This is currently implemented by
+ * hitting it with a big hammer (i.e. xfs_ioend_wait()).
+ *
+ * Returns with locks held indicated by @iolock and errors indicated by
+ * negative return values.
+ */
+STATIC ssize_t
+xfs_file_dio_aio_write(
+       struct kiocb            *iocb,
+       const struct iovec      *iovp,
+       unsigned long           nr_segs,
+       loff_t                  pos,
+       size_t                  ocount,
+       int                     *iolock)
+{
+       struct file             *file = iocb->ki_filp;
+       struct address_space    *mapping = file->f_mapping;
+       struct inode            *inode = mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       ssize_t                 ret = 0;
+       size_t                  count = ocount;
+       int                     unaligned_io = 0;
+       struct xfs_buftarg      *target = XFS_IS_REALTIME_INODE(ip) ?
+                                       mp->m_rtdev_targp : mp->m_ddev_targp;
+
+       *iolock = 0;
+       if ((pos & target->bt_smask) || (count & target->bt_smask))
+               return -XFS_ERROR(EINVAL);
+
+       if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))
+               unaligned_io = 1;
+
+       if (unaligned_io || mapping->nrpages || pos > ip->i_size)
+               *iolock = XFS_IOLOCK_EXCL;
+       else
+               *iolock = XFS_IOLOCK_SHARED;
+       xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
+
+       ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
+       if (ret)
+               return ret;
+
+       if (mapping->nrpages) {
+               WARN_ON(*iolock != XFS_IOLOCK_EXCL);
+               ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
+                                                       FI_REMAPF_LOCKED);
+               if (ret)
+                       return ret;
+       }
+
+       /*
+        * If we are doing unaligned IO, wait for all other IO to drain,
+        * otherwise demote the lock if we had to flush cached pages
+        */
+       if (unaligned_io)
+               xfs_ioend_wait(ip);
+       else if (*iolock == XFS_IOLOCK_EXCL) {
+               xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+               *iolock = XFS_IOLOCK_SHARED;
+       }
+
+       trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
+       ret = generic_file_direct_write(iocb, iovp,
+                       &nr_segs, pos, &iocb->ki_pos, count, ocount);
+
+       /* No fallback to buffered IO on errors for XFS. */
+       ASSERT(ret < 0 || ret == count);
+       return ret;
+}
+
+STATIC ssize_t
+xfs_file_buffered_aio_write(
+       struct kiocb            *iocb,
+       const struct iovec      *iovp,
+       unsigned long           nr_segs,
+       loff_t                  pos,
+       size_t                  ocount,
+       int                     *iolock)
+{
+       struct file             *file = iocb->ki_filp;
+       struct address_space    *mapping = file->f_mapping;
+       struct inode            *inode = mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       ssize_t                 ret;
+       int                     enospc = 0;
+       size_t                  count = ocount;
+
+       *iolock = XFS_IOLOCK_EXCL;
+       xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
+
+       ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
+       if (ret)
+               return ret;
+
+       /* We can write back this queue in page reclaim */
+       current->backing_dev_info = mapping->backing_dev_info;
+
+write_retry:
+       trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
+       ret = generic_file_buffered_write(iocb, iovp, nr_segs,
+                       pos, &iocb->ki_pos, count, ret);
+       /*
+        * if we just got an ENOSPC, flush the inode now we aren't holding any
+        * page locks and retry *once*
+        */
+       if (ret == -ENOSPC && !enospc) {
+               ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
+               if (ret)
+                       return ret;
+               enospc = 1;
+               goto write_retry;
+       }
+       current->backing_dev_info = NULL;
+       return ret;
+}
+
+STATIC ssize_t
+xfs_file_aio_write(
+       struct kiocb            *iocb,
+       const struct iovec      *iovp,
+       unsigned long           nr_segs,
+       loff_t                  pos)
+{
+       struct file             *file = iocb->ki_filp;
+       struct address_space    *mapping = file->f_mapping;
+       struct inode            *inode = mapping->host;
+       struct xfs_inode        *ip = XFS_I(inode);
+       ssize_t                 ret;
+       int                     iolock;
+       size_t                  ocount = 0;
+
+       XFS_STATS_INC(xs_write_calls);
+
+       BUG_ON(iocb->ki_pos != pos);
+
+       ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
+       if (ret)
+               return ret;
+
+       if (ocount == 0)
+               return 0;
+
+       xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE);
+
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+               return -EIO;
+
+       if (unlikely(file->f_flags & O_DIRECT))
+               ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
+                                               ocount, &iolock);
+       else
+               ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
+                                               ocount, &iolock);
+
+       xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);
+
+       if (ret <= 0)
+               goto out_unlock;
+
+       /* Handle various SYNC-type writes */
+       if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
+               loff_t end = pos + ret - 1;
+               int error;
+
+               xfs_rw_iunlock(ip, iolock);
+               error = xfs_file_fsync(file, pos, end,
+                                     (file->f_flags & __O_SYNC) ? 0 : 1);
+               xfs_rw_ilock(ip, iolock);
+               if (error)
+                       ret = error;
+       }
+
+out_unlock:
+       xfs_aio_write_newsize_update(ip);
+       xfs_rw_iunlock(ip, iolock);
+       return ret;
+}
+
+STATIC long
+xfs_file_fallocate(
+       struct file     *file,
+       int             mode,
+       loff_t          offset,
+       loff_t          len)
+{
+       struct inode    *inode = file->f_path.dentry->d_inode;
+       long            error;
+       loff_t          new_size = 0;
+       xfs_flock64_t   bf;
+       xfs_inode_t     *ip = XFS_I(inode);
+       int             cmd = XFS_IOC_RESVSP;
+       int             attr_flags = XFS_ATTR_NOLOCK;
+
+       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+               return -EOPNOTSUPP;
+
+       bf.l_whence = 0;
+       bf.l_start = offset;
+       bf.l_len = len;
+
+       xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+       if (mode & FALLOC_FL_PUNCH_HOLE)
+               cmd = XFS_IOC_UNRESVSP;
+
+       /* check the new inode size is valid before allocating */
+       if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+           offset + len > i_size_read(inode)) {
+               new_size = offset + len;
+               error = inode_newsize_ok(inode, new_size);
+               if (error)
+                       goto out_unlock;
+       }
+
+       if (file->f_flags & O_DSYNC)
+               attr_flags |= XFS_ATTR_SYNC;
+
+       error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags);
+       if (error)
+               goto out_unlock;
+
+       /* Change file size if needed */
+       if (new_size) {
+               struct iattr iattr;
+
+               iattr.ia_valid = ATTR_SIZE;
+               iattr.ia_size = new_size;
+               error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK);
+       }
+
+out_unlock:
+       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+       return error;
+}
+
+
+STATIC int
+xfs_file_open(
+       struct inode    *inode,
+       struct file     *file)
+{
+       if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
+               return -EFBIG;
+       if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
+               return -EIO;
+       return 0;
+}
+
+STATIC int
+xfs_dir_open(
+       struct inode    *inode,
+       struct file     *file)
+{
+       struct xfs_inode *ip = XFS_I(inode);
+       int             mode;
+       int             error;
+
+       error = xfs_file_open(inode, file);
+       if (error)
+               return error;
+
+       /*
+        * If there are any blocks, read-ahead block 0 as we're almost
+        * certain to have the next operation be a read there.
+        */
+       mode = xfs_ilock_map_shared(ip);
+       if (ip->i_d.di_nextents > 0)
+               xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
+       xfs_iunlock(ip, mode);
+       return 0;
+}
+
+STATIC int
+xfs_file_release(
+       struct inode    *inode,
+       struct file     *filp)
+{
+       return -xfs_release(XFS_I(inode));
+}
+
+STATIC int
+xfs_file_readdir(
+       struct file     *filp,
+       void            *dirent,
+       filldir_t       filldir)
+{
+       struct inode    *inode = filp->f_path.dentry->d_inode;
+       xfs_inode_t     *ip = XFS_I(inode);
+       int             error;
+       size_t          bufsize;
+
+       /*
+        * The Linux API doesn't pass down the total size of the buffer
+        * we read into down to the filesystem.  With the filldir concept
+        * it's not needed for correct information, but the XFS dir2 leaf
+        * code wants an estimate of the buffer size to calculate it's
+        * readahead window and size the buffers used for mapping to
+        * physical blocks.
+        *
+        * Try to give it an estimate that's good enough, maybe at some
+        * point we can change the ->readdir prototype to include the
+        * buffer size.  For now we use the current glibc buffer size.
+        */
+       bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size);
+
+       error = xfs_readdir(ip, dirent, bufsize,
+                               (xfs_off_t *)&filp->f_pos, filldir);
+       if (error)
+               return -error;
+       return 0;
+}
+
+STATIC int
+xfs_file_mmap(
+       struct file     *filp,
+       struct vm_area_struct *vma)
+{
+       vma->vm_ops = &xfs_file_vm_ops;
+       vma->vm_flags |= VM_CAN_NONLINEAR;
+
+       file_accessed(filp);
+       return 0;
+}
+
+/*
+ * mmap()d file has taken write protection fault and is being made
+ * writable. We can set the page state up correctly for a writable
+ * page, which means we can do correct delalloc accounting (ENOSPC
+ * checking!) and unwritten extent mapping.
+ */
+STATIC int
+xfs_vm_page_mkwrite(
+       struct vm_area_struct   *vma,
+       struct vm_fault         *vmf)
+{
+       return block_page_mkwrite(vma, vmf, xfs_get_blocks);
+}
+
+const struct file_operations xfs_file_operations = {
+       .llseek         = generic_file_llseek,
+       .read           = do_sync_read,
+       .write          = do_sync_write,
+       .aio_read       = xfs_file_aio_read,
+       .aio_write      = xfs_file_aio_write,
+       .splice_read    = xfs_file_splice_read,
+       .splice_write   = xfs_file_splice_write,
+       .unlocked_ioctl = xfs_file_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl   = xfs_file_compat_ioctl,
+#endif
+       .mmap           = xfs_file_mmap,
+       .open           = xfs_file_open,
+       .release        = xfs_file_release,
+       .fsync          = xfs_file_fsync,
+       .fallocate      = xfs_file_fallocate,
+};
+
+const struct file_operations xfs_dir_file_operations = {
+       .open           = xfs_dir_open,
+       .read           = generic_read_dir,
+       .readdir        = xfs_file_readdir,
+       .llseek         = generic_file_llseek,
+       .unlocked_ioctl = xfs_file_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl   = xfs_file_compat_ioctl,
+#endif
+       .fsync          = xfs_file_fsync,
+};
+
+static const struct vm_operations_struct xfs_file_vm_ops = {
+       .fault          = filemap_fault,
+       .page_mkwrite   = xfs_vm_page_mkwrite,
+};
diff --git a/fs/xfs/xfs_fs_subr.c b/fs/xfs/xfs_fs_subr.c

new file mode 100644 (file)

index 0000000..ed88ed1
--- /dev/null
+++ b/fs/xfs/xfs_fs_subr.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2000-2002,2005-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_vnodeops.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_trace.h"
+
+/*
+ * note: all filemap functions return negative error codes. These
+ * need to be inverted before returning to the xfs core functions.
+ */
+void
+xfs_tosspages(
+       xfs_inode_t     *ip,
+       xfs_off_t       first,
+       xfs_off_t       last,
+       int             fiopt)
+{
+       /* can't toss partial tail pages, so mask them out */
+       last &= ~(PAGE_SIZE - 1);
+       truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1);
+}
+
+int
+xfs_flushinval_pages(
+       xfs_inode_t     *ip,
+       xfs_off_t       first,
+       xfs_off_t       last,
+       int             fiopt)
+{
+       struct address_space *mapping = VFS_I(ip)->i_mapping;
+       int             ret = 0;
+
+       trace_xfs_pagecache_inval(ip, first, last);
+
+       xfs_iflags_clear(ip, XFS_ITRUNCATED);
+       ret = filemap_write_and_wait_range(mapping, first,
+                               last == -1 ? LLONG_MAX : last);
+       if (!ret)
+               truncate_inode_pages_range(mapping, first, last);
+       return -ret;
+}
+
+int
+xfs_flush_pages(
+       xfs_inode_t     *ip,
+       xfs_off_t       first,
+       xfs_off_t       last,
+       uint64_t        flags,
+       int             fiopt)
+{
+       struct address_space *mapping = VFS_I(ip)->i_mapping;
+       int             ret = 0;
+       int             ret2;
+
+       xfs_iflags_clear(ip, XFS_ITRUNCATED);
+       ret = -filemap_fdatawrite_range(mapping, first,
+                               last == -1 ? LLONG_MAX : last);
+       if (flags & XBF_ASYNC)
+               return ret;
+       ret2 = xfs_wait_on_pages(ip, first, last);
+       if (!ret)
+               ret = ret2;
+       return ret;
+}
+
+int
+xfs_wait_on_pages(
+       xfs_inode_t     *ip,
+       xfs_off_t       first,
+       xfs_off_t       last)
+{
+       struct address_space *mapping = VFS_I(ip)->i_mapping;
+
+       if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
+               return -filemap_fdatawait_range(mapping, first,
+                                       last == -1 ? ip->i_size - 1 : last);
+       }
+       return 0;
+}
diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c

new file mode 100644 (file)

index 0000000..76e81cf
--- /dev/null
+++ b/fs/xfs/xfs_globals.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_sysctl.h"
+
+/*
+ * Tunable XFS parameters.  xfs_params is required even when CONFIG_SYSCTL=n,
+ * other XFS code uses these values.  Times are measured in centisecs (i.e.
+ * 100ths of a second).
+ */
+xfs_param_t xfs_params = {
+                         /*    MIN             DFLT            MAX     */
+       .sgid_inherit   = {     0,              0,              1       },
+       .symlink_mode   = {     0,              0,              1       },
+       .panic_mask     = {     0,              0,              255     },
+       .error_level    = {     0,              3,              11      },
+       .syncd_timer    = {     1*100,          30*100,         7200*100},
+       .stats_clear    = {     0,              0,              1       },
+       .inherit_sync   = {     0,              1,              1       },
+       .inherit_nodump = {     0,              1,              1       },
+       .inherit_noatim = {     0,              1,              1       },
+       .xfs_buf_timer  = {     100/2,          1*100,          30*100  },
+       .xfs_buf_age    = {     1*100,          15*100,         7200*100},
+       .inherit_nosym  = {     0,              0,              1       },
+       .rotorstep      = {     1,              1,              255     },
+       .inherit_nodfrg = {     0,              1,              1       },
+       .fstrm_timer    = {     1,              30*100,         3600*100},
+};
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c

index dd5628bd8d0be16c417e8116081d986af1a2cda5..9f24ec28283bfa1ea237efc3f43a03c480999403 100644 (file)
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -202,8 +202,7 @@ xfs_ialloc_inode_init(
                 fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
                                          mp->m_bsize * blks_per_cluster,
                                          XBF_LOCK);
-               ASSERT(fbuf);
-               ASSERT(!XFS_BUF_GETERROR(fbuf));
+               ASSERT(!xfs_buf_geterror(fbuf));
  
                 /*
                  * Initialize all inodes in this buffer and then log them.
@@ -1486,7 +1485,7 @@ xfs_read_agi(
         if (error)
                 return error;
  
-       ASSERT(*bpp && !XFS_BUF_GETERROR(*bpp));
+       ASSERT(!xfs_buf_geterror(*bpp));
         agi = XFS_BUF_TO_AGI(*bpp);
  
         /*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index 2fcca4b03ed389ef048f744e421289e2dc3bafc8..0239a7c7c886a2c83a74352ee0504082cb328e26 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2473,7 +2473,7 @@ cluster_corrupt_out:
                 if (bp->b_iodone) {
                         XFS_BUF_UNDONE(bp);
                         XFS_BUF_STALE(bp);
-                       XFS_BUF_ERROR(bp,EIO);
+                       xfs_buf_ioerror(bp, EIO);
                         xfs_buf_ioend(bp, 0);
                 } else {
                         XFS_BUF_STALE(bp);
@@ -2585,7 +2585,7 @@ xfs_iflush(
          * If the buffer is pinned then push on the log now so we won't
          * get stuck waiting in the write for too long.
          */
-       if (XFS_BUF_ISPINNED(bp))
+       if (xfs_buf_ispinned(bp))
                 xfs_log_force(mp, 0);
  
         /*
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c

new file mode 100644 (file)

index 0000000..f7ce7de
--- /dev/null
+++ b/fs/xfs/xfs_ioctl.c
@@ -0,0 +1,1556 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_ioctl.h"
+#include "xfs_rtalloc.h"
+#include "xfs_itable.h"
+#include "xfs_error.h"
+#include "xfs_attr.h"
+#include "xfs_bmap.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_dfrag.h"
+#include "xfs_fsops.h"
+#include "xfs_vnodeops.h"
+#include "xfs_discard.h"
+#include "xfs_quota.h"
+#include "xfs_inode_item.h"
+#include "xfs_export.h"
+#include "xfs_trace.h"
+
+#include <linux/capability.h>
+#include <linux/dcache.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/exportfs.h>
+
+/*
+ * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
+ * a file or fs handle.
+ *
+ * XFS_IOC_PATH_TO_FSHANDLE
+ *    returns fs handle for a mount point or path within that mount point
+ * XFS_IOC_FD_TO_HANDLE
+ *    returns full handle for a FD opened in user space
+ * XFS_IOC_PATH_TO_HANDLE
+ *    returns full handle for a path
+ */
+int
+xfs_find_handle(
+       unsigned int            cmd,
+       xfs_fsop_handlereq_t    *hreq)
+{
+       int                     hsize;
+       xfs_handle_t            handle;
+       struct inode            *inode;
+       struct file             *file = NULL;
+       struct path             path;
+       int                     error;
+       struct xfs_inode        *ip;
+
+       if (cmd == XFS_IOC_FD_TO_HANDLE) {
+               file = fget(hreq->fd);
+               if (!file)
+                       return -EBADF;
+               inode = file->f_path.dentry->d_inode;
+       } else {
+               error = user_lpath((const char __user *)hreq->path, &path);
+               if (error)
+                       return error;
+               inode = path.dentry->d_inode;
+       }
+       ip = XFS_I(inode);
+
+       /*
+        * We can only generate handles for inodes residing on a XFS filesystem,
+        * and only for regular files, directories or symbolic links.
+        */
+       error = -EINVAL;
+       if (inode->i_sb->s_magic != XFS_SB_MAGIC)
+               goto out_put;
+
+       error = -EBADF;
+       if (!S_ISREG(inode->i_mode) &&
+           !S_ISDIR(inode->i_mode) &&
+           !S_ISLNK(inode->i_mode))
+               goto out_put;
+
+
+       memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t));
+
+       if (cmd == XFS_IOC_PATH_TO_FSHANDLE) {
+               /*
+                * This handle only contains an fsid, zero the rest.
+                */
+               memset(&handle.ha_fid, 0, sizeof(handle.ha_fid));
+               hsize = sizeof(xfs_fsid_t);
+       } else {
+               int             lock_mode;
+
+               lock_mode = xfs_ilock_map_shared(ip);
+               handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
+                                       sizeof(handle.ha_fid.fid_len);
+               handle.ha_fid.fid_pad = 0;
+               handle.ha_fid.fid_gen = ip->i_d.di_gen;
+               handle.ha_fid.fid_ino = ip->i_ino;
+               xfs_iunlock_map_shared(ip, lock_mode);
+
+               hsize = XFS_HSIZE(handle);
+       }
+
+       error = -EFAULT;
+       if (copy_to_user(hreq->ohandle, &handle, hsize) ||
+           copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32)))
+               goto out_put;
+
+       error = 0;
+
+ out_put:
+       if (cmd == XFS_IOC_FD_TO_HANDLE)
+               fput(file);
+       else
+               path_put(&path);
+       return error;
+}
+
+/*
+ * No need to do permission checks on the various pathname components
+ * as the handle operations are privileged.
+ */
+STATIC int
+xfs_handle_acceptable(
+       void                    *context,
+       struct dentry           *dentry)
+{
+       return 1;
+}
+
+/*
+ * Convert userspace handle data into a dentry.
+ */
+struct dentry *
+xfs_handle_to_dentry(
+       struct file             *parfilp,
+       void __user             *uhandle,
+       u32                     hlen)
+{
+       xfs_handle_t            handle;
+       struct xfs_fid64        fid;
+
+       /*
+        * Only allow handle opens under a directory.
+        */
+       if (!S_ISDIR(parfilp->f_path.dentry->d_inode->i_mode))
+               return ERR_PTR(-ENOTDIR);
+
+       if (hlen != sizeof(xfs_handle_t))
+               return ERR_PTR(-EINVAL);
+       if (copy_from_user(&handle, uhandle, hlen))
+               return ERR_PTR(-EFAULT);
+       if (handle.ha_fid.fid_len !=
+           sizeof(handle.ha_fid) - sizeof(handle.ha_fid.fid_len))
+               return ERR_PTR(-EINVAL);
+
+       memset(&fid, 0, sizeof(struct fid));
+       fid.ino = handle.ha_fid.fid_ino;
+       fid.gen = handle.ha_fid.fid_gen;
+
+       return exportfs_decode_fh(parfilp->f_path.mnt, (struct fid *)&fid, 3,
+                       FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG,
+                       xfs_handle_acceptable, NULL);
+}
+
+STATIC struct dentry *
+xfs_handlereq_to_dentry(
+       struct file             *parfilp,
+       xfs_fsop_handlereq_t    *hreq)
+{
+       return xfs_handle_to_dentry(parfilp, hreq->ihandle, hreq->ihandlen);
+}
+
+int
+xfs_open_by_handle(
+       struct file             *parfilp,
+       xfs_fsop_handlereq_t    *hreq)
+{
+       const struct cred       *cred = current_cred();
+       int                     error;
+       int                     fd;
+       int                     permflag;
+       struct file             *filp;
+       struct inode            *inode;
+       struct dentry           *dentry;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+
+       dentry = xfs_handlereq_to_dentry(parfilp, hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+       inode = dentry->d_inode;
+
+       /* Restrict xfs_open_by_handle to directories & regular files. */
+       if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
+               error = -XFS_ERROR(EPERM);
+               goto out_dput;
+       }
+
+#if BITS_PER_LONG != 32
+       hreq->oflags |= O_LARGEFILE;
+#endif
+
+       /* Put open permission in namei format. */
+       permflag = hreq->oflags;
+       if ((permflag+1) & O_ACCMODE)
+               permflag++;
+       if (permflag & O_TRUNC)
+               permflag |= 2;
+
+       if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) &&
+           (permflag & FMODE_WRITE) && IS_APPEND(inode)) {
+               error = -XFS_ERROR(EPERM);
+               goto out_dput;
+       }
+
+       if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
+               error = -XFS_ERROR(EACCES);
+               goto out_dput;
+       }
+
+       /* Can't write directories. */
+       if (S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) {
+               error = -XFS_ERROR(EISDIR);
+               goto out_dput;
+       }
+
+       fd = get_unused_fd();
+       if (fd < 0) {
+               error = fd;
+               goto out_dput;
+       }
+
+       filp = dentry_open(dentry, mntget(parfilp->f_path.mnt),
+                          hreq->oflags, cred);
+       if (IS_ERR(filp)) {
+               put_unused_fd(fd);
+               return PTR_ERR(filp);
+       }
+
+       if (S_ISREG(inode->i_mode)) {
+               filp->f_flags |= O_NOATIME;
+               filp->f_mode |= FMODE_NOCMTIME;
+       }
+
+       fd_install(fd, filp);
+       return fd;
+
+ out_dput:
+       dput(dentry);
+       return error;
+}
+
+/*
+ * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's
+ * unused first argument.
+ */
+STATIC int
+do_readlink(
+       char __user             *buffer,
+       int                     buflen,
+       const char              *link)
+{
+        int len;
+
+       len = PTR_ERR(link);
+       if (IS_ERR(link))
+               goto out;
+
+       len = strlen(link);
+       if (len > (unsigned) buflen)
+               len = buflen;
+       if (copy_to_user(buffer, link, len))
+               len = -EFAULT;
+ out:
+       return len;
+}
+
+
+int
+xfs_readlink_by_handle(
+       struct file             *parfilp,
+       xfs_fsop_handlereq_t    *hreq)
+{
+       struct dentry           *dentry;
+       __u32                   olen;
+       void                    *link;
+       int                     error;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+
+       dentry = xfs_handlereq_to_dentry(parfilp, hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+
+       /* Restrict this handle operation to symlinks only. */
+       if (!S_ISLNK(dentry->d_inode->i_mode)) {
+               error = -XFS_ERROR(EINVAL);
+               goto out_dput;
+       }
+
+       if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) {
+               error = -XFS_ERROR(EFAULT);
+               goto out_dput;
+       }
+
+       link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
+       if (!link) {
+               error = -XFS_ERROR(ENOMEM);
+               goto out_dput;
+       }
+
+       error = -xfs_readlink(XFS_I(dentry->d_inode), link);
+       if (error)
+               goto out_kfree;
+       error = do_readlink(hreq->ohandle, olen, link);
+       if (error)
+               goto out_kfree;
+
+ out_kfree:
+       kfree(link);
+ out_dput:
+       dput(dentry);
+       return error;
+}
+
+STATIC int
+xfs_fssetdm_by_handle(
+       struct file             *parfilp,
+       void                    __user *arg)
+{
+       int                     error;
+       struct fsdmidata        fsd;
+       xfs_fsop_setdm_handlereq_t dmhreq;
+       struct dentry           *dentry;
+
+       if (!capable(CAP_MKNOD))
+               return -XFS_ERROR(EPERM);
+       if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
+               return -XFS_ERROR(EFAULT);
+
+       dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+
+       if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
+               error = -XFS_ERROR(EPERM);
+               goto out;
+       }
+
+       if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) {
+               error = -XFS_ERROR(EFAULT);
+               goto out;
+       }
+
+       error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
+                                fsd.fsd_dmstate);
+
+ out:
+       dput(dentry);
+       return error;
+}
+
+STATIC int
+xfs_attrlist_by_handle(
+       struct file             *parfilp,
+       void                    __user *arg)
+{
+       int                     error = -ENOMEM;
+       attrlist_cursor_kern_t  *cursor;
+       xfs_fsop_attrlist_handlereq_t al_hreq;
+       struct dentry           *dentry;
+       char                    *kbuf;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+       if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
+               return -XFS_ERROR(EFAULT);
+       if (al_hreq.buflen > XATTR_LIST_MAX)
+               return -XFS_ERROR(EINVAL);
+
+       /*
+        * Reject flags, only allow namespaces.
+        */
+       if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
+               return -XFS_ERROR(EINVAL);
+
+       dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+
+       kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL);
+       if (!kbuf)
+               goto out_dput;
+
+       cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
+       error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
+                                       al_hreq.flags, cursor);
+       if (error)
+               goto out_kfree;
+
+       if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen))
+               error = -EFAULT;
+
+ out_kfree:
+       kfree(kbuf);
+ out_dput:
+       dput(dentry);
+       return error;
+}
+
+int
+xfs_attrmulti_attr_get(
+       struct inode            *inode,
+       unsigned char           *name,
+       unsigned char           __user *ubuf,
+       __uint32_t              *len,
+       __uint32_t              flags)
+{
+       unsigned char           *kbuf;
+       int                     error = EFAULT;
+
+       if (*len > XATTR_SIZE_MAX)
+               return EINVAL;
+       kbuf = kmalloc(*len, GFP_KERNEL);
+       if (!kbuf)
+               return ENOMEM;
+
+       error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
+       if (error)
+               goto out_kfree;
+
+       if (copy_to_user(ubuf, kbuf, *len))
+               error = EFAULT;
+
+ out_kfree:
+       kfree(kbuf);
+       return error;
+}
+
+int
+xfs_attrmulti_attr_set(
+       struct inode            *inode,
+       unsigned char           *name,
+       const unsigned char     __user *ubuf,
+       __uint32_t              len,
+       __uint32_t              flags)
+{
+       unsigned char           *kbuf;
+       int                     error = EFAULT;
+
+       if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+               return EPERM;
+       if (len > XATTR_SIZE_MAX)
+               return EINVAL;
+
+       kbuf = memdup_user(ubuf, len);
+       if (IS_ERR(kbuf))
+               return PTR_ERR(kbuf);
+
+       error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
+
+       return error;
+}
+
+int
+xfs_attrmulti_attr_remove(
+       struct inode            *inode,
+       unsigned char           *name,
+       __uint32_t              flags)
+{
+       if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+               return EPERM;
+       return xfs_attr_remove(XFS_I(inode), name, flags);
+}
+
+STATIC int
+xfs_attrmulti_by_handle(
+       struct file             *parfilp,
+       void                    __user *arg)
+{
+       int                     error;
+       xfs_attr_multiop_t      *ops;
+       xfs_fsop_attrmulti_handlereq_t am_hreq;
+       struct dentry           *dentry;
+       unsigned int            i, size;
+       unsigned char           *attr_name;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+       if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
+               return -XFS_ERROR(EFAULT);
+
+       /* overflow check */
+       if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t))
+               return -E2BIG;
+
+       dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+
+       error = E2BIG;
+       size = am_hreq.opcount * sizeof(xfs_attr_multiop_t);
+       if (!size || size > 16 * PAGE_SIZE)
+               goto out_dput;
+
+       ops = memdup_user(am_hreq.ops, size);
+       if (IS_ERR(ops)) {
+               error = PTR_ERR(ops);
+               goto out_dput;
+       }
+
+       attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
+       if (!attr_name)
+               goto out_kfree_ops;
+
+       error = 0;
+       for (i = 0; i < am_hreq.opcount; i++) {
+               ops[i].am_error = strncpy_from_user((char *)attr_name,
+                               ops[i].am_attrname, MAXNAMELEN);
+               if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
+                       error = -ERANGE;
+               if (ops[i].am_error < 0)
+                       break;
+
+               switch (ops[i].am_opcode) {
+               case ATTR_OP_GET:
+                       ops[i].am_error = xfs_attrmulti_attr_get(
+                                       dentry->d_inode, attr_name,
+                                       ops[i].am_attrvalue, &ops[i].am_length,
+                                       ops[i].am_flags);
+                       break;
+               case ATTR_OP_SET:
+                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+                       if (ops[i].am_error)
+                               break;
+                       ops[i].am_error = xfs_attrmulti_attr_set(
+                                       dentry->d_inode, attr_name,
+                                       ops[i].am_attrvalue, ops[i].am_length,
+                                       ops[i].am_flags);
+                       mnt_drop_write(parfilp->f_path.mnt);
+                       break;
+               case ATTR_OP_REMOVE:
+                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+                       if (ops[i].am_error)
+                               break;
+                       ops[i].am_error = xfs_attrmulti_attr_remove(
+                                       dentry->d_inode, attr_name,
+                                       ops[i].am_flags);
+                       mnt_drop_write(parfilp->f_path.mnt);
+                       break;
+               default:
+                       ops[i].am_error = EINVAL;
+               }
+       }
+
+       if (copy_to_user(am_hreq.ops, ops, size))
+               error = XFS_ERROR(EFAULT);
+
+       kfree(attr_name);
+ out_kfree_ops:
+       kfree(ops);
+ out_dput:
+       dput(dentry);
+       return -error;
+}
+
+int
+xfs_ioc_space(
+       struct xfs_inode        *ip,
+       struct inode            *inode,
+       struct file             *filp,
+       int                     ioflags,
+       unsigned int            cmd,
+       xfs_flock64_t           *bf)
+{
+       int                     attr_flags = 0;
+       int                     error;
+
+       /*
+        * Only allow the sys admin to reserve space unless
+        * unwritten extents are enabled.
+        */
+       if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) &&
+           !capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+
+       if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
+               return -XFS_ERROR(EPERM);
+
+       if (!(filp->f_mode & FMODE_WRITE))
+               return -XFS_ERROR(EBADF);
+
+       if (!S_ISREG(inode->i_mode))
+               return -XFS_ERROR(EINVAL);
+
+       if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+               attr_flags |= XFS_ATTR_NONBLOCK;
+
+       if (filp->f_flags & O_DSYNC)
+               attr_flags |= XFS_ATTR_SYNC;
+
+       if (ioflags & IO_INVIS)
+               attr_flags |= XFS_ATTR_DMI;
+
+       error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags);
+       return -error;
+}
+
+STATIC int
+xfs_ioc_bulkstat(
+       xfs_mount_t             *mp,
+       unsigned int            cmd,
+       void                    __user *arg)
+{
+       xfs_fsop_bulkreq_t      bulkreq;
+       int                     count;  /* # of records returned */
+       xfs_ino_t               inlast; /* last inode number */
+       int                     done;
+       int                     error;
+
+       /* done = 1 if there are more stats to get and if bulkstat */
+       /* should be called again (unused here, but used in dmapi) */
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -XFS_ERROR(EIO);
+
+       if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t)))
+               return -XFS_ERROR(EFAULT);
+
+       if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
+               return -XFS_ERROR(EFAULT);
+
+       if ((count = bulkreq.icount) <= 0)
+               return -XFS_ERROR(EINVAL);
+
+       if (bulkreq.ubuffer == NULL)
+               return -XFS_ERROR(EINVAL);
+
+       if (cmd == XFS_IOC_FSINUMBERS)
+               error = xfs_inumbers(mp, &inlast, &count,
+                                       bulkreq.ubuffer, xfs_inumbers_fmt);
+       else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
+               error = xfs_bulkstat_single(mp, &inlast,
+                                               bulkreq.ubuffer, &done);
+       else    /* XFS_IOC_FSBULKSTAT */
+               error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
+                                    sizeof(xfs_bstat_t), bulkreq.ubuffer,
+                                    &done);
+
+       if (error)
+               return -error;
+
+       if (bulkreq.ocount != NULL) {
+               if (copy_to_user(bulkreq.lastip, &inlast,
+                                               sizeof(xfs_ino_t)))
+                       return -XFS_ERROR(EFAULT);
+
+               if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
+                       return -XFS_ERROR(EFAULT);
+       }
+
+       return 0;
+}
+
+STATIC int
+xfs_ioc_fsgeometry_v1(
+       xfs_mount_t             *mp,
+       void                    __user *arg)
+{
+       xfs_fsop_geom_t         fsgeo;
+       int                     error;
+
+       error = xfs_fs_geometry(mp, &fsgeo, 3);
+       if (error)
+               return -error;
+
+       /*
+        * Caller should have passed an argument of type
+        * xfs_fsop_geom_v1_t.  This is a proper subset of the
+        * xfs_fsop_geom_t that xfs_fs_geometry() fills in.
+        */
+       if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+STATIC int
+xfs_ioc_fsgeometry(
+       xfs_mount_t             *mp,
+       void                    __user *arg)
+{
+       xfs_fsop_geom_t         fsgeo;
+       int                     error;
+
+       error = xfs_fs_geometry(mp, &fsgeo, 4);
+       if (error)
+               return -error;
+
+       if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+/*
+ * Linux extended inode flags interface.
+ */
+
+STATIC unsigned int
+xfs_merge_ioc_xflags(
+       unsigned int    flags,
+       unsigned int    start)
+{
+       unsigned int    xflags = start;
+
+       if (flags & FS_IMMUTABLE_FL)
+               xflags |= XFS_XFLAG_IMMUTABLE;
+       else
+               xflags &= ~XFS_XFLAG_IMMUTABLE;
+       if (flags & FS_APPEND_FL)
+               xflags |= XFS_XFLAG_APPEND;
+       else
+               xflags &= ~XFS_XFLAG_APPEND;
+       if (flags & FS_SYNC_FL)
+               xflags |= XFS_XFLAG_SYNC;
+       else
+               xflags &= ~XFS_XFLAG_SYNC;
+       if (flags & FS_NOATIME_FL)
+               xflags |= XFS_XFLAG_NOATIME;
+       else
+               xflags &= ~XFS_XFLAG_NOATIME;
+       if (flags & FS_NODUMP_FL)
+               xflags |= XFS_XFLAG_NODUMP;
+       else
+               xflags &= ~XFS_XFLAG_NODUMP;
+
+       return xflags;
+}
+
+STATIC unsigned int
+xfs_di2lxflags(
+       __uint16_t      di_flags)
+{
+       unsigned int    flags = 0;
+
+       if (di_flags & XFS_DIFLAG_IMMUTABLE)
+               flags |= FS_IMMUTABLE_FL;
+       if (di_flags & XFS_DIFLAG_APPEND)
+               flags |= FS_APPEND_FL;
+       if (di_flags & XFS_DIFLAG_SYNC)
+               flags |= FS_SYNC_FL;
+       if (di_flags & XFS_DIFLAG_NOATIME)
+               flags |= FS_NOATIME_FL;
+       if (di_flags & XFS_DIFLAG_NODUMP)
+               flags |= FS_NODUMP_FL;
+       return flags;
+}
+
+STATIC int
+xfs_ioc_fsgetxattr(
+       xfs_inode_t             *ip,
+       int                     attr,
+       void                    __user *arg)
+{
+       struct fsxattr          fa;
+
+       memset(&fa, 0, sizeof(struct fsxattr));
+
+       xfs_ilock(ip, XFS_ILOCK_SHARED);
+       fa.fsx_xflags = xfs_ip2xflags(ip);
+       fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
+       fa.fsx_projid = xfs_get_projid(ip);
+
+       if (attr) {
+               if (ip->i_afp) {
+                       if (ip->i_afp->if_flags & XFS_IFEXTENTS)
+                               fa.fsx_nextents = ip->i_afp->if_bytes /
+                                                       sizeof(xfs_bmbt_rec_t);
+                       else
+                               fa.fsx_nextents = ip->i_d.di_anextents;
+               } else
+                       fa.fsx_nextents = 0;
+       } else {
+               if (ip->i_df.if_flags & XFS_IFEXTENTS)
+                       fa.fsx_nextents = ip->i_df.if_bytes /
+                                               sizeof(xfs_bmbt_rec_t);
+               else
+                       fa.fsx_nextents = ip->i_d.di_nextents;
+       }
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+       if (copy_to_user(arg, &fa, sizeof(fa)))
+               return -EFAULT;
+       return 0;
+}
+
+STATIC void
+xfs_set_diflags(
+       struct xfs_inode        *ip,
+       unsigned int            xflags)
+{
+       unsigned int            di_flags;
+
+       /* can't set PREALLOC this way, just preserve it */
+       di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
+       if (xflags & XFS_XFLAG_IMMUTABLE)
+               di_flags |= XFS_DIFLAG_IMMUTABLE;
+       if (xflags & XFS_XFLAG_APPEND)
+               di_flags |= XFS_DIFLAG_APPEND;
+       if (xflags & XFS_XFLAG_SYNC)
+               di_flags |= XFS_DIFLAG_SYNC;
+       if (xflags & XFS_XFLAG_NOATIME)
+               di_flags |= XFS_DIFLAG_NOATIME;
+       if (xflags & XFS_XFLAG_NODUMP)
+               di_flags |= XFS_DIFLAG_NODUMP;
+       if (xflags & XFS_XFLAG_PROJINHERIT)
+               di_flags |= XFS_DIFLAG_PROJINHERIT;
+       if (xflags & XFS_XFLAG_NODEFRAG)
+               di_flags |= XFS_DIFLAG_NODEFRAG;
+       if (xflags & XFS_XFLAG_FILESTREAM)
+               di_flags |= XFS_DIFLAG_FILESTREAM;
+       if (S_ISDIR(ip->i_d.di_mode)) {
+               if (xflags & XFS_XFLAG_RTINHERIT)
+                       di_flags |= XFS_DIFLAG_RTINHERIT;
+               if (xflags & XFS_XFLAG_NOSYMLINKS)
+                       di_flags |= XFS_DIFLAG_NOSYMLINKS;
+               if (xflags & XFS_XFLAG_EXTSZINHERIT)
+                       di_flags |= XFS_DIFLAG_EXTSZINHERIT;
+       } else if (S_ISREG(ip->i_d.di_mode)) {
+               if (xflags & XFS_XFLAG_REALTIME)
+                       di_flags |= XFS_DIFLAG_REALTIME;
+               if (xflags & XFS_XFLAG_EXTSIZE)
+                       di_flags |= XFS_DIFLAG_EXTSIZE;
+       }
+
+       ip->i_d.di_flags = di_flags;
+}
+
+STATIC void
+xfs_diflags_to_linux(
+       struct xfs_inode        *ip)
+{
+       struct inode            *inode = VFS_I(ip);
+       unsigned int            xflags = xfs_ip2xflags(ip);
+
+       if (xflags & XFS_XFLAG_IMMUTABLE)
+               inode->i_flags |= S_IMMUTABLE;
+       else
+               inode->i_flags &= ~S_IMMUTABLE;
+       if (xflags & XFS_XFLAG_APPEND)
+               inode->i_flags |= S_APPEND;
+       else
+               inode->i_flags &= ~S_APPEND;
+       if (xflags & XFS_XFLAG_SYNC)
+               inode->i_flags |= S_SYNC;
+       else
+               inode->i_flags &= ~S_SYNC;
+       if (xflags & XFS_XFLAG_NOATIME)
+               inode->i_flags |= S_NOATIME;
+       else
+               inode->i_flags &= ~S_NOATIME;
+}
+
+#define FSX_PROJID     1
+#define FSX_EXTSIZE    2
+#define FSX_XFLAGS     4
+#define FSX_NONBLOCK   8
+
+STATIC int
+xfs_ioctl_setattr(
+       xfs_inode_t             *ip,
+       struct fsxattr          *fa,
+       int                     mask)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_trans        *tp;
+       unsigned int            lock_flags = 0;
+       struct xfs_dquot        *udqp = NULL;
+       struct xfs_dquot        *gdqp = NULL;
+       struct xfs_dquot        *olddquot = NULL;
+       int                     code;
+
+       trace_xfs_ioctl_setattr(ip);
+
+       if (mp->m_flags & XFS_MOUNT_RDONLY)
+               return XFS_ERROR(EROFS);
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+
+       /*
+        * Disallow 32bit project ids when projid32bit feature is not enabled.
+        */
+       if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) &&
+                       !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
+               return XFS_ERROR(EINVAL);
+
+       /*
+        * If disk quotas is on, we make sure that the dquots do exist on disk,
+        * before we start any other transactions. Trying to do this later
+        * is messy. We don't care to take a readlock to look at the ids
+        * in inode here, because we can't hold it across the trans_reserve.
+        * If the IDs do change before we take the ilock, we're covered
+        * because the i_*dquot fields will get updated anyway.
+        */
+       if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {
+               code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid,
+                                        ip->i_d.di_gid, fa->fsx_projid,
+                                        XFS_QMOPT_PQUOTA, &udqp, &gdqp);
+               if (code)
+                       return code;
+       }
+
+       /*
+        * For the other attributes, we acquire the inode lock and
+        * first do an error checking pass.
+        */
+       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
+       code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+       if (code)
+               goto error_return;
+
+       lock_flags = XFS_ILOCK_EXCL;
+       xfs_ilock(ip, lock_flags);
+
+       /*
+        * CAP_FOWNER overrides the following restrictions:
+        *
+        * The user ID of the calling process must be equal
+        * to the file owner ID, except in cases where the
+        * CAP_FSETID capability is applicable.
+        */
+       if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) {
+               code = XFS_ERROR(EPERM);
+               goto error_return;
+       }
+
+       /*
+        * Do a quota reservation only if projid is actually going to change.
+        */
+       if (mask & FSX_PROJID) {
+               if (XFS_IS_QUOTA_RUNNING(mp) &&
+                   XFS_IS_PQUOTA_ON(mp) &&
+                   xfs_get_projid(ip) != fa->fsx_projid) {
+                       ASSERT(tp);
+                       code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
+                                               capable(CAP_FOWNER) ?
+                                               XFS_QMOPT_FORCE_RES : 0);
+                       if (code)       /* out of quota */
+                               goto error_return;
+               }
+       }
+
+       if (mask & FSX_EXTSIZE) {
+               /*
+                * Can't change extent size if any extents are allocated.
+                */
+               if (ip->i_d.di_nextents &&
+                   ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
+                    fa->fsx_extsize)) {
+                       code = XFS_ERROR(EINVAL);       /* EFBIG? */
+                       goto error_return;
+               }
+
+               /*
+                * Extent size must be a multiple of the appropriate block
+                * size, if set at all. It must also be smaller than the
+                * maximum extent size supported by the filesystem.
+                *
+                * Also, for non-realtime files, limit the extent size hint to
+                * half the size of the AGs in the filesystem so alignment
+                * doesn't result in extents larger than an AG.
+                */
+               if (fa->fsx_extsize != 0) {
+                       xfs_extlen_t    size;
+                       xfs_fsblock_t   extsize_fsb;
+
+                       extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
+                       if (extsize_fsb > MAXEXTLEN) {
+                               code = XFS_ERROR(EINVAL);
+                               goto error_return;
+                       }
+
+                       if (XFS_IS_REALTIME_INODE(ip) ||
+                           ((mask & FSX_XFLAGS) &&
+                           (fa->fsx_xflags & XFS_XFLAG_REALTIME))) {
+                               size = mp->m_sb.sb_rextsize <<
+                                      mp->m_sb.sb_blocklog;
+                       } else {
+                               size = mp->m_sb.sb_blocksize;
+                               if (extsize_fsb > mp->m_sb.sb_agblocks / 2) {
+                                       code = XFS_ERROR(EINVAL);
+                                       goto error_return;
+                               }
+                       }
+
+                       if (fa->fsx_extsize % size) {
+                               code = XFS_ERROR(EINVAL);
+                               goto error_return;
+                       }
+               }
+       }
+
+
+       if (mask & FSX_XFLAGS) {
+               /*
+                * Can't change realtime flag if any extents are allocated.
+                */
+               if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
+                   (XFS_IS_REALTIME_INODE(ip)) !=
+                   (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
+                       code = XFS_ERROR(EINVAL);       /* EFBIG? */
+                       goto error_return;
+               }
+
+               /*
+                * If realtime flag is set then must have realtime data.
+                */
+               if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
+                       if ((mp->m_sb.sb_rblocks == 0) ||
+                           (mp->m_sb.sb_rextsize == 0) ||
+                           (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
+                               code = XFS_ERROR(EINVAL);
+                               goto error_return;
+                       }
+               }
+
+               /*
+                * Can't modify an immutable/append-only file unless
+                * we have appropriate permission.
+                */
+               if ((ip->i_d.di_flags &
+                               (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
+                    (fa->fsx_xflags &
+                               (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
+                   !capable(CAP_LINUX_IMMUTABLE)) {
+                       code = XFS_ERROR(EPERM);
+                       goto error_return;
+               }
+       }
+
+       xfs_trans_ijoin(tp, ip);
+
+       /*
+        * Change file ownership.  Must be the owner or privileged.
+        */
+       if (mask & FSX_PROJID) {
+               /*
+                * CAP_FSETID overrides the following restrictions:
+                *
+                * The set-user-ID and set-group-ID bits of a file will be
+                * cleared upon successful return from chown()
+                */
+               if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+                   !capable(CAP_FSETID))
+                       ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+
+               /*
+                * Change the ownerships and register quota modifications
+                * in the transaction.
+                */
+               if (xfs_get_projid(ip) != fa->fsx_projid) {
+                       if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
+                               olddquot = xfs_qm_vop_chown(tp, ip,
+                                                       &ip->i_gdquot, gdqp);
+                       }
+                       xfs_set_projid(ip, fa->fsx_projid);
+
+                       /*
+                        * We may have to rev the inode as well as
+                        * the superblock version number since projids didn't
+                        * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
+                        */
+                       if (ip->i_d.di_version == 1)
+                               xfs_bump_ino_vers2(tp, ip);
+               }
+
+       }
+
+       if (mask & FSX_EXTSIZE)
+               ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
+       if (mask & FSX_XFLAGS) {
+               xfs_set_diflags(ip, fa->fsx_xflags);
+               xfs_diflags_to_linux(ip);
+       }
+
+       xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
+       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+       XFS_STATS_INC(xs_ig_attrchg);
+
+       /*
+        * If this is a synchronous mount, make sure that the
+        * transaction goes to disk before returning to the user.
+        * This is slightly sub-optimal in that truncates require
+        * two sync transactions instead of one for wsync filesystems.
+        * One for the truncate and one for the timestamps since we
+        * don't want to change the timestamps unless we're sure the
+        * truncate worked.  Truncates are less than 1% of the laddis
+        * mix so this probably isn't worth the trouble to optimize.
+        */
+       if (mp->m_flags & XFS_MOUNT_WSYNC)
+               xfs_trans_set_sync(tp);
+       code = xfs_trans_commit(tp, 0);
+       xfs_iunlock(ip, lock_flags);
+
+       /*
+        * Release any dquot(s) the inode had kept before chown.
+        */
+       xfs_qm_dqrele(olddquot);
+       xfs_qm_dqrele(udqp);
+       xfs_qm_dqrele(gdqp);
+
+       return code;
+
+ error_return:
+       xfs_qm_dqrele(udqp);
+       xfs_qm_dqrele(gdqp);
+       xfs_trans_cancel(tp, 0);
+       if (lock_flags)
+               xfs_iunlock(ip, lock_flags);
+       return code;
+}
+
+STATIC int
+xfs_ioc_fssetxattr(
+       xfs_inode_t             *ip,
+       struct file             *filp,
+       void                    __user *arg)
+{
+       struct fsxattr          fa;
+       unsigned int            mask;
+
+       if (copy_from_user(&fa, arg, sizeof(fa)))
+               return -EFAULT;
+
+       mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID;
+       if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+               mask |= FSX_NONBLOCK;
+
+       return -xfs_ioctl_setattr(ip, &fa, mask);
+}
+
+STATIC int
+xfs_ioc_getxflags(
+       xfs_inode_t             *ip,
+       void                    __user *arg)
+{
+       unsigned int            flags;
+
+       flags = xfs_di2lxflags(ip->i_d.di_flags);
+       if (copy_to_user(arg, &flags, sizeof(flags)))
+               return -EFAULT;
+       return 0;
+}
+
+STATIC int
+xfs_ioc_setxflags(
+       xfs_inode_t             *ip,
+       struct file             *filp,
+       void                    __user *arg)
+{
+       struct fsxattr          fa;
+       unsigned int            flags;
+       unsigned int            mask;
+
+       if (copy_from_user(&flags, arg, sizeof(flags)))
+               return -EFAULT;
+
+       if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
+                     FS_NOATIME_FL | FS_NODUMP_FL | \
+                     FS_SYNC_FL))
+               return -EOPNOTSUPP;
+
+       mask = FSX_XFLAGS;
+       if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
+               mask |= FSX_NONBLOCK;
+       fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
+
+       return -xfs_ioctl_setattr(ip, &fa, mask);
+}
+
+STATIC int
+xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
+{
+       struct getbmap __user   *base = *ap;
+
+       /* copy only getbmap portion (not getbmapx) */
+       if (copy_to_user(base, bmv, sizeof(struct getbmap)))
+               return XFS_ERROR(EFAULT);
+
+       *ap += sizeof(struct getbmap);
+       return 0;
+}
+
+STATIC int
+xfs_ioc_getbmap(
+       struct xfs_inode        *ip,
+       int                     ioflags,
+       unsigned int            cmd,
+       void                    __user *arg)
+{
+       struct getbmapx         bmx;
+       int                     error;
+
+       if (copy_from_user(&bmx, arg, sizeof(struct getbmapx)))
+               return -XFS_ERROR(EFAULT);
+
+       if (bmx.bmv_count < 2)
+               return -XFS_ERROR(EINVAL);
+
+       bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
+       if (ioflags & IO_INVIS)
+               bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
+
+       error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
+                           (struct getbmap *)arg+1);
+       if (error)
+               return -error;
+
+       /* copy back header - only size of getbmap */
+       if (copy_to_user(arg, &bmx, sizeof(struct getbmap)))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+STATIC int
+xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full)
+{
+       struct getbmapx __user  *base = *ap;
+
+       if (copy_to_user(base, bmv, sizeof(struct getbmapx)))
+               return XFS_ERROR(EFAULT);
+
+       *ap += sizeof(struct getbmapx);
+       return 0;
+}
+
+STATIC int
+xfs_ioc_getbmapx(
+       struct xfs_inode        *ip,
+       void                    __user *arg)
+{
+       struct getbmapx         bmx;
+       int                     error;
+
+       if (copy_from_user(&bmx, arg, sizeof(bmx)))
+               return -XFS_ERROR(EFAULT);
+
+       if (bmx.bmv_count < 2)
+               return -XFS_ERROR(EINVAL);
+
+       if (bmx.bmv_iflags & (~BMV_IF_VALID))
+               return -XFS_ERROR(EINVAL);
+
+       error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format,
+                           (struct getbmapx *)arg+1);
+       if (error)
+               return -error;
+
+       /* copy back header */
+       if (copy_to_user(arg, &bmx, sizeof(struct getbmapx)))
+               return -XFS_ERROR(EFAULT);
+
+       return 0;
+}
+
+/*
+ * Note: some of the ioctl's return positive numbers as a
+ * byte count indicating success, such as readlink_by_handle.
+ * So we don't "sign flip" like most other routines.  This means
+ * true errors need to be returned as a negative value.
+ */
+long
+xfs_file_ioctl(
+       struct file             *filp,
+       unsigned int            cmd,
+       unsigned long           p)
+{
+       struct inode            *inode = filp->f_path.dentry->d_inode;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       void                    __user *arg = (void __user *)p;
+       int                     ioflags = 0;
+       int                     error;
+
+       if (filp->f_mode & FMODE_NOCMTIME)
+               ioflags |= IO_INVIS;
+
+       trace_xfs_file_ioctl(ip);
+
+       switch (cmd) {
+       case FITRIM:
+               return xfs_ioc_trim(mp, arg);
+       case XFS_IOC_ALLOCSP:
+       case XFS_IOC_FREESP:
+       case XFS_IOC_RESVSP:
+       case XFS_IOC_UNRESVSP:
+       case XFS_IOC_ALLOCSP64:
+       case XFS_IOC_FREESP64:
+       case XFS_IOC_RESVSP64:
+       case XFS_IOC_UNRESVSP64:
+       case XFS_IOC_ZERO_RANGE: {
+               xfs_flock64_t           bf;
+
+               if (copy_from_user(&bf, arg, sizeof(bf)))
+                       return -XFS_ERROR(EFAULT);
+               return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
+       }
+       case XFS_IOC_DIOINFO: {
+               struct dioattr  da;
+               xfs_buftarg_t   *target =
+                       XFS_IS_REALTIME_INODE(ip) ?
+                       mp->m_rtdev_targp : mp->m_ddev_targp;
+
+               da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
+               da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
+
+               if (copy_to_user(arg, &da, sizeof(da)))
+                       return -XFS_ERROR(EFAULT);
+               return 0;
+       }
+
+       case XFS_IOC_FSBULKSTAT_SINGLE:
+       case XFS_IOC_FSBULKSTAT:
+       case XFS_IOC_FSINUMBERS:
+               return xfs_ioc_bulkstat(mp, cmd, arg);
+
+       case XFS_IOC_FSGEOMETRY_V1:
+               return xfs_ioc_fsgeometry_v1(mp, arg);
+
+       case XFS_IOC_FSGEOMETRY:
+               return xfs_ioc_fsgeometry(mp, arg);
+
+       case XFS_IOC_GETVERSION:
+               return put_user(inode->i_generation, (int __user *)arg);
+
+       case XFS_IOC_FSGETXATTR:
+               return xfs_ioc_fsgetxattr(ip, 0, arg);
+       case XFS_IOC_FSGETXATTRA:
+               return xfs_ioc_fsgetxattr(ip, 1, arg);
+       case XFS_IOC_FSSETXATTR:
+               return xfs_ioc_fssetxattr(ip, filp, arg);
+       case XFS_IOC_GETXFLAGS:
+               return xfs_ioc_getxflags(ip, arg);
+       case XFS_IOC_SETXFLAGS:
+               return xfs_ioc_setxflags(ip, filp, arg);
+
+       case XFS_IOC_FSSETDM: {
+               struct fsdmidata        dmi;
+
+               if (copy_from_user(&dmi, arg, sizeof(dmi)))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
+                               dmi.fsd_dmstate);
+               return -error;
+       }
+
+       case XFS_IOC_GETBMAP:
+       case XFS_IOC_GETBMAPA:
+               return xfs_ioc_getbmap(ip, ioflags, cmd, arg);
+
+       case XFS_IOC_GETBMAPX:
+               return xfs_ioc_getbmapx(ip, arg);
+
+       case XFS_IOC_FD_TO_HANDLE:
+       case XFS_IOC_PATH_TO_HANDLE:
+       case XFS_IOC_PATH_TO_FSHANDLE: {
+               xfs_fsop_handlereq_t    hreq;
+
+               if (copy_from_user(&hreq, arg, sizeof(hreq)))
+                       return -XFS_ERROR(EFAULT);
+               return xfs_find_handle(cmd, &hreq);
+       }
+       case XFS_IOC_OPEN_BY_HANDLE: {
+               xfs_fsop_handlereq_t    hreq;
+
+               if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
+                       return -XFS_ERROR(EFAULT);
+               return xfs_open_by_handle(filp, &hreq);
+       }
+       case XFS_IOC_FSSETDM_BY_HANDLE:
+               return xfs_fssetdm_by_handle(filp, arg);
+
+       case XFS_IOC_READLINK_BY_HANDLE: {
+               xfs_fsop_handlereq_t    hreq;
+
+               if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
+                       return -XFS_ERROR(EFAULT);
+               return xfs_readlink_by_handle(filp, &hreq);
+       }
+       case XFS_IOC_ATTRLIST_BY_HANDLE:
+               return xfs_attrlist_by_handle(filp, arg);
+
+       case XFS_IOC_ATTRMULTI_BY_HANDLE:
+               return xfs_attrmulti_by_handle(filp, arg);
+
+       case XFS_IOC_SWAPEXT: {
+               struct xfs_swapext      sxp;
+
+               if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t)))
+                       return -XFS_ERROR(EFAULT);
+               error = xfs_swapext(&sxp);
+               return -error;
+       }
+
+       case XFS_IOC_FSCOUNTS: {
+               xfs_fsop_counts_t out;
+
+               error = xfs_fs_counts(mp, &out);
+               if (error)
+                       return -error;
+
+               if (copy_to_user(arg, &out, sizeof(out)))
+                       return -XFS_ERROR(EFAULT);
+               return 0;
+       }
+
+       case XFS_IOC_SET_RESBLKS: {
+               xfs_fsop_resblks_t inout;
+               __uint64_t         in;
+
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               if (mp->m_flags & XFS_MOUNT_RDONLY)
+                       return -XFS_ERROR(EROFS);
+
+               if (copy_from_user(&inout, arg, sizeof(inout)))
+                       return -XFS_ERROR(EFAULT);
+
+               /* input parameter is passed in resblks field of structure */
+               in = inout.resblks;
+               error = xfs_reserve_blocks(mp, &in, &inout);
+               if (error)
+                       return -error;
+
+               if (copy_to_user(arg, &inout, sizeof(inout)))
+                       return -XFS_ERROR(EFAULT);
+               return 0;
+       }
+
+       case XFS_IOC_GET_RESBLKS: {
+               xfs_fsop_resblks_t out;
+
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               error = xfs_reserve_blocks(mp, NULL, &out);
+               if (error)
+                       return -error;
+
+               if (copy_to_user(arg, &out, sizeof(out)))
+                       return -XFS_ERROR(EFAULT);
+
+               return 0;
+       }
+
+       case XFS_IOC_FSGROWFSDATA: {
+               xfs_growfs_data_t in;
+
+               if (copy_from_user(&in, arg, sizeof(in)))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_growfs_data(mp, &in);
+               return -error;
+       }
+
+       case XFS_IOC_FSGROWFSLOG: {
+               xfs_growfs_log_t in;
+
+               if (copy_from_user(&in, arg, sizeof(in)))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_growfs_log(mp, &in);
+               return -error;
+       }
+
+       case XFS_IOC_FSGROWFSRT: {
+               xfs_growfs_rt_t in;
+
+               if (copy_from_user(&in, arg, sizeof(in)))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_growfs_rt(mp, &in);
+               return -error;
+       }
+
+       case XFS_IOC_GOINGDOWN: {
+               __uint32_t in;
+
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               if (get_user(in, (__uint32_t __user *)arg))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_fs_goingdown(mp, in);
+               return -error;
+       }
+
+       case XFS_IOC_ERROR_INJECTION: {
+               xfs_error_injection_t in;
+
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               if (copy_from_user(&in, arg, sizeof(in)))
+                       return -XFS_ERROR(EFAULT);
+
+               error = xfs_errortag_add(in.errtag, mp);
+               return -error;
+       }
+
+       case XFS_IOC_ERROR_CLEARALL:
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               error = xfs_errortag_clearall(mp, 1);
+               return -error;
+
+       default:
+               return -ENOTTY;
+       }
+}
diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h

new file mode 100644 (file)

index 0000000..d56173b
--- /dev/null
+++ b/fs/xfs/xfs_ioctl.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2008 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_IOCTL_H__
+#define __XFS_IOCTL_H__
+
+extern int
+xfs_ioc_space(
+       struct xfs_inode        *ip,
+       struct inode            *inode,
+       struct file             *filp,
+       int                     ioflags,
+       unsigned int            cmd,
+       xfs_flock64_t           *bf);
+
+extern int
+xfs_find_handle(
+       unsigned int            cmd,
+       xfs_fsop_handlereq_t    *hreq);
+
+extern int
+xfs_open_by_handle(
+       struct file             *parfilp,
+       xfs_fsop_handlereq_t    *hreq);
+
+extern int
+xfs_readlink_by_handle(
+       struct file             *parfilp,
+       xfs_fsop_handlereq_t    *hreq);
+
+extern int
+xfs_attrmulti_attr_get(
+       struct inode            *inode,
+       unsigned char           *name,
+       unsigned char           __user *ubuf,
+       __uint32_t              *len,
+       __uint32_t              flags);
+
+extern int
+xfs_attrmulti_attr_set(
+       struct inode            *inode,
+       unsigned char           *name,
+       const unsigned char     __user *ubuf,
+       __uint32_t              len,
+       __uint32_t              flags);
+
+extern int
+xfs_attrmulti_attr_remove(
+       struct inode            *inode,
+       unsigned char           *name,
+       __uint32_t              flags);
+
+extern struct dentry *
+xfs_handle_to_dentry(
+       struct file             *parfilp,
+       void __user             *uhandle,
+       u32                     hlen);
+
+extern long
+xfs_file_ioctl(
+       struct file             *filp,
+       unsigned int            cmd,
+       unsigned long           p);
+
+extern long
+xfs_file_compat_ioctl(
+       struct file             *file,
+       unsigned int            cmd,
+       unsigned long           arg);
+
+#endif
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c

new file mode 100644 (file)

index 0000000..54e623b
--- /dev/null
+++ b/fs/xfs/xfs_ioctl32.c
@@ -0,0 +1,672 @@
+/*
+ * Copyright (c) 2004-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include <linux/compat.h>
+#include <linux/ioctl.h>
+#include <linux/mount.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_vnode.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_error.h"
+#include "xfs_dfrag.h"
+#include "xfs_vnodeops.h"
+#include "xfs_fsops.h"
+#include "xfs_alloc.h"
+#include "xfs_rtalloc.h"
+#include "xfs_attr.h"
+#include "xfs_ioctl.h"
+#include "xfs_ioctl32.h"
+#include "xfs_trace.h"
+
+#define  _NATIVE_IOC(cmd, type) \
+         _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
+
+#ifdef BROKEN_X86_ALIGNMENT
+STATIC int
+xfs_compat_flock64_copyin(
+       xfs_flock64_t           *bf,
+       compat_xfs_flock64_t    __user *arg32)
+{
+       if (get_user(bf->l_type,        &arg32->l_type) ||
+           get_user(bf->l_whence,      &arg32->l_whence) ||
+           get_user(bf->l_start,       &arg32->l_start) ||
+           get_user(bf->l_len,         &arg32->l_len) ||
+           get_user(bf->l_sysid,       &arg32->l_sysid) ||
+           get_user(bf->l_pid,         &arg32->l_pid) ||
+           copy_from_user(bf->l_pad,   &arg32->l_pad,  4*sizeof(u32)))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+STATIC int
+xfs_compat_ioc_fsgeometry_v1(
+       struct xfs_mount          *mp,
+       compat_xfs_fsop_geom_v1_t __user *arg32)
+{
+       xfs_fsop_geom_t           fsgeo;
+       int                       error;
+
+       error = xfs_fs_geometry(mp, &fsgeo, 3);
+       if (error)
+               return -error;
+       /* The 32-bit variant simply has some padding at the end */
+       if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1)))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+STATIC int
+xfs_compat_growfs_data_copyin(
+       struct xfs_growfs_data   *in,
+       compat_xfs_growfs_data_t __user *arg32)
+{
+       if (get_user(in->newblocks, &arg32->newblocks) ||
+           get_user(in->imaxpct,   &arg32->imaxpct))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+STATIC int
+xfs_compat_growfs_rt_copyin(
+       struct xfs_growfs_rt     *in,
+       compat_xfs_growfs_rt_t  __user *arg32)
+{
+       if (get_user(in->newblocks, &arg32->newblocks) ||
+           get_user(in->extsize,   &arg32->extsize))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+STATIC int
+xfs_inumbers_fmt_compat(
+       void                    __user *ubuffer,
+       const xfs_inogrp_t      *buffer,
+       long                    count,
+       long                    *written)
+{
+       compat_xfs_inogrp_t     __user *p32 = ubuffer;
+       long                    i;
+
+       for (i = 0; i < count; i++) {
+               if (put_user(buffer[i].xi_startino,   &p32[i].xi_startino) ||
+                   put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
+                   put_user(buffer[i].xi_allocmask,  &p32[i].xi_allocmask))
+                       return -XFS_ERROR(EFAULT);
+       }
+       *written = count * sizeof(*p32);
+       return 0;
+}
+
+#else
+#define xfs_inumbers_fmt_compat xfs_inumbers_fmt
+#endif /* BROKEN_X86_ALIGNMENT */
+
+STATIC int
+xfs_ioctl32_bstime_copyin(
+       xfs_bstime_t            *bstime,
+       compat_xfs_bstime_t     __user *bstime32)
+{
+       compat_time_t           sec32;  /* tv_sec differs on 64 vs. 32 */
+
+       if (get_user(sec32,             &bstime32->tv_sec)      ||
+           get_user(bstime->tv_nsec,   &bstime32->tv_nsec))
+               return -XFS_ERROR(EFAULT);
+       bstime->tv_sec = sec32;
+       return 0;
+}
+
+/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */
+STATIC int
+xfs_ioctl32_bstat_copyin(
+       xfs_bstat_t             *bstat,
+       compat_xfs_bstat_t      __user *bstat32)
+{
+       if (get_user(bstat->bs_ino,     &bstat32->bs_ino)       ||
+           get_user(bstat->bs_mode,    &bstat32->bs_mode)      ||
+           get_user(bstat->bs_nlink,   &bstat32->bs_nlink)     ||
+           get_user(bstat->bs_uid,     &bstat32->bs_uid)       ||
+           get_user(bstat->bs_gid,     &bstat32->bs_gid)       ||
+           get_user(bstat->bs_rdev,    &bstat32->bs_rdev)      ||
+           get_user(bstat->bs_blksize, &bstat32->bs_blksize)   ||
+           get_user(bstat->bs_size,    &bstat32->bs_size)      ||
+           xfs_ioctl32_bstime_copyin(&bstat->bs_atime, &bstat32->bs_atime) ||
+           xfs_ioctl32_bstime_copyin(&bstat->bs_mtime, &bstat32->bs_mtime) ||
+           xfs_ioctl32_bstime_copyin(&bstat->bs_ctime, &bstat32->bs_ctime) ||
+           get_user(bstat->bs_blocks,  &bstat32->bs_size)      ||
+           get_user(bstat->bs_xflags,  &bstat32->bs_size)      ||
+           get_user(bstat->bs_extsize, &bstat32->bs_extsize)   ||
+           get_user(bstat->bs_extents, &bstat32->bs_extents)   ||
+           get_user(bstat->bs_gen,     &bstat32->bs_gen)       ||
+           get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) ||
+           get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) ||
+           get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) ||
+           get_user(bstat->bs_dmstate, &bstat32->bs_dmstate)   ||
+           get_user(bstat->bs_aextents, &bstat32->bs_aextents))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+/* XFS_IOC_FSBULKSTAT and friends */
+
+STATIC int
+xfs_bstime_store_compat(
+       compat_xfs_bstime_t     __user *p32,
+       const xfs_bstime_t      *p)
+{
+       __s32                   sec32;
+
+       sec32 = p->tv_sec;
+       if (put_user(sec32, &p32->tv_sec) ||
+           put_user(p->tv_nsec, &p32->tv_nsec))
+               return -XFS_ERROR(EFAULT);
+       return 0;
+}
+
+/* Return 0 on success or positive error (to xfs_bulkstat()) */
+STATIC int
+xfs_bulkstat_one_fmt_compat(
+       void                    __user *ubuffer,
+       int                     ubsize,
+       int                     *ubused,
+       const xfs_bstat_t       *buffer)
+{
+       compat_xfs_bstat_t      __user *p32 = ubuffer;
+
+       if (ubsize < sizeof(*p32))
+               return XFS_ERROR(ENOMEM);
+
+       if (put_user(buffer->bs_ino,      &p32->bs_ino)         ||
+           put_user(buffer->bs_mode,     &p32->bs_mode)        ||
+           put_user(buffer->bs_nlink,    &p32->bs_nlink)       ||
+           put_user(buffer->bs_uid,      &p32->bs_uid)         ||
+           put_user(buffer->bs_gid,      &p32->bs_gid)         ||
+           put_user(buffer->bs_rdev,     &p32->bs_rdev)        ||
+           put_user(buffer->bs_blksize,  &p32->bs_blksize)     ||
+           put_user(buffer->bs_size,     &p32->bs_size)        ||
+           xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) ||
+           xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) ||
+           xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) ||
+           put_user(buffer->bs_blocks,   &p32->bs_blocks)      ||
+           put_user(buffer->bs_xflags,   &p32->bs_xflags)      ||
+           put_user(buffer->bs_extsize,  &p32->bs_extsize)     ||
+           put_user(buffer->bs_extents,  &p32->bs_extents)     ||
+           put_user(buffer->bs_gen,      &p32->bs_gen)         ||
+           put_user(buffer->bs_projid,   &p32->bs_projid)      ||
+           put_user(buffer->bs_projid_hi,      &p32->bs_projid_hi)     ||
+           put_user(buffer->bs_dmevmask, &p32->bs_dmevmask)    ||
+           put_user(buffer->bs_dmstate,  &p32->bs_dmstate)     ||
+           put_user(buffer->bs_aextents, &p32->bs_aextents))
+               return XFS_ERROR(EFAULT);
+       if (ubused)
+               *ubused = sizeof(*p32);
+       return 0;
+}
+
+STATIC int
+xfs_bulkstat_one_compat(
+       xfs_mount_t     *mp,            /* mount point for filesystem */
+       xfs_ino_t       ino,            /* inode number to get data for */
+       void            __user *buffer, /* buffer to place output in */
+       int             ubsize,         /* size of buffer */
+       int             *ubused,        /* bytes used by me */
+       int             *stat)          /* BULKSTAT_RV_... */
+{
+       return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
+                                   xfs_bulkstat_one_fmt_compat,
+                                   ubused, stat);
+}
+
+/* copied from xfs_ioctl.c */
+STATIC int
+xfs_compat_ioc_bulkstat(
+       xfs_mount_t               *mp,
+       unsigned int              cmd,
+       compat_xfs_fsop_bulkreq_t __user *p32)
+{
+       u32                     addr;
+       xfs_fsop_bulkreq_t      bulkreq;
+       int                     count;  /* # of records returned */
+       xfs_ino_t               inlast; /* last inode number */
+       int                     done;
+       int                     error;
+
+       /* done = 1 if there are more stats to get and if bulkstat */
+       /* should be called again (unused here, but used in dmapi) */
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -XFS_ERROR(EIO);
+
+       if (get_user(addr, &p32->lastip))
+               return -XFS_ERROR(EFAULT);
+       bulkreq.lastip = compat_ptr(addr);
+       if (get_user(bulkreq.icount, &p32->icount) ||
+           get_user(addr, &p32->ubuffer))
+               return -XFS_ERROR(EFAULT);
+       bulkreq.ubuffer = compat_ptr(addr);
+       if (get_user(addr, &p32->ocount))
+               return -XFS_ERROR(EFAULT);
+       bulkreq.ocount = compat_ptr(addr);
+
+       if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
+               return -XFS_ERROR(EFAULT);
+
+       if ((count = bulkreq.icount) <= 0)
+               return -XFS_ERROR(EINVAL);
+
+       if (bulkreq.ubuffer == NULL)
+               return -XFS_ERROR(EINVAL);
+
+       if (cmd == XFS_IOC_FSINUMBERS_32) {
+               error = xfs_inumbers(mp, &inlast, &count,
+                               bulkreq.ubuffer, xfs_inumbers_fmt_compat);
+       } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) {
+               int res;
+
+               error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
+                               sizeof(compat_xfs_bstat_t), 0, &res);
+       } else if (cmd == XFS_IOC_FSBULKSTAT_32) {
+               error = xfs_bulkstat(mp, &inlast, &count,
+                       xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
+                       bulkreq.ubuffer, &done);
+       } else
+               error = XFS_ERROR(EINVAL);
+       if (error)
+               return -error;
+
+       if (bulkreq.ocount != NULL) {
+               if (copy_to_user(bulkreq.lastip, &inlast,
+                                               sizeof(xfs_ino_t)))
+                       return -XFS_ERROR(EFAULT);
+
+               if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
+                       return -XFS_ERROR(EFAULT);
+       }
+
+       return 0;
+}
+
+STATIC int
+xfs_compat_handlereq_copyin(
+       xfs_fsop_handlereq_t            *hreq,
+       compat_xfs_fsop_handlereq_t     __user *arg32)
+{
+       compat_xfs_fsop_handlereq_t     hreq32;
+
+       if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t)))
+               return -XFS_ERROR(EFAULT);
+
+       hreq->fd = hreq32.fd;
+       hreq->path = compat_ptr(hreq32.path);
+       hreq->oflags = hreq32.oflags;
+       hreq->ihandle = compat_ptr(hreq32.ihandle);
+       hreq->ihandlen = hreq32.ihandlen;
+       hreq->ohandle = compat_ptr(hreq32.ohandle);
+       hreq->ohandlen = compat_ptr(hreq32.ohandlen);
+
+       return 0;
+}
+
+STATIC struct dentry *
+xfs_compat_handlereq_to_dentry(
+       struct file             *parfilp,
+       compat_xfs_fsop_handlereq_t *hreq)
+{
+       return xfs_handle_to_dentry(parfilp,
+                       compat_ptr(hreq->ihandle), hreq->ihandlen);
+}
+
+STATIC int
+xfs_compat_attrlist_by_handle(
+       struct file             *parfilp,
+       void                    __user *arg)
+{
+       int                     error;
+       attrlist_cursor_kern_t  *cursor;
+       compat_xfs_fsop_attrlist_handlereq_t al_hreq;
+       struct dentry           *dentry;
+       char                    *kbuf;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+       if (copy_from_user(&al_hreq, arg,
+                          sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
+               return -XFS_ERROR(EFAULT);
+       if (al_hreq.buflen > XATTR_LIST_MAX)
+               return -XFS_ERROR(EINVAL);
+
+       /*
+        * Reject flags, only allow namespaces.
+        */
+       if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
+               return -XFS_ERROR(EINVAL);
+
+       dentry = xfs_compat_handlereq_to_dentry(parfilp, &al_hreq.hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+
+       error = -ENOMEM;
+       kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
+       if (!kbuf)
+               goto out_dput;
+
+       cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
+       error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
+                                       al_hreq.flags, cursor);
+       if (error)
+               goto out_kfree;
+
+       if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen))
+               error = -EFAULT;
+
+ out_kfree:
+       kfree(kbuf);
+ out_dput:
+       dput(dentry);
+       return error;
+}
+
+STATIC int
+xfs_compat_attrmulti_by_handle(
+       struct file                             *parfilp,
+       void                                    __user *arg)
+{
+       int                                     error;
+       compat_xfs_attr_multiop_t               *ops;
+       compat_xfs_fsop_attrmulti_handlereq_t   am_hreq;
+       struct dentry                           *dentry;
+       unsigned int                            i, size;
+       unsigned char                           *attr_name;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -XFS_ERROR(EPERM);
+       if (copy_from_user(&am_hreq, arg,
+                          sizeof(compat_xfs_fsop_attrmulti_handlereq_t)))
+               return -XFS_ERROR(EFAULT);
+
+       /* overflow check */
+       if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t))
+               return -E2BIG;
+
+       dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+
+       error = E2BIG;
+       size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t);
+       if (!size || size > 16 * PAGE_SIZE)
+               goto out_dput;
+
+       ops = memdup_user(compat_ptr(am_hreq.ops), size);
+       if (IS_ERR(ops)) {
+               error = PTR_ERR(ops);
+               goto out_dput;
+       }
+
+       attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
+       if (!attr_name)
+               goto out_kfree_ops;
+
+       error = 0;
+       for (i = 0; i < am_hreq.opcount; i++) {
+               ops[i].am_error = strncpy_from_user((char *)attr_name,
+                               compat_ptr(ops[i].am_attrname),
+                               MAXNAMELEN);
+               if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
+                       error = -ERANGE;
+               if (ops[i].am_error < 0)
+                       break;
+
+               switch (ops[i].am_opcode) {
+               case ATTR_OP_GET:
+                       ops[i].am_error = xfs_attrmulti_attr_get(
+                                       dentry->d_inode, attr_name,
+                                       compat_ptr(ops[i].am_attrvalue),
+                                       &ops[i].am_length, ops[i].am_flags);
+                       break;
+               case ATTR_OP_SET:
+                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+                       if (ops[i].am_error)
+                               break;
+                       ops[i].am_error = xfs_attrmulti_attr_set(
+                                       dentry->d_inode, attr_name,
+                                       compat_ptr(ops[i].am_attrvalue),
+                                       ops[i].am_length, ops[i].am_flags);
+                       mnt_drop_write(parfilp->f_path.mnt);
+                       break;
+               case ATTR_OP_REMOVE:
+                       ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+                       if (ops[i].am_error)
+                               break;
+                       ops[i].am_error = xfs_attrmulti_attr_remove(
+                                       dentry->d_inode, attr_name,
+                                       ops[i].am_flags);
+                       mnt_drop_write(parfilp->f_path.mnt);
+                       break;
+               default:
+                       ops[i].am_error = EINVAL;
+               }
+       }
+
+       if (copy_to_user(compat_ptr(am_hreq.ops), ops, size))
+               error = XFS_ERROR(EFAULT);
+
+       kfree(attr_name);
+ out_kfree_ops:
+       kfree(ops);
+ out_dput:
+       dput(dentry);
+       return -error;
+}
+
+STATIC int
+xfs_compat_fssetdm_by_handle(
+       struct file             *parfilp,
+       void                    __user *arg)
+{
+       int                     error;
+       struct fsdmidata        fsd;
+       compat_xfs_fsop_setdm_handlereq_t dmhreq;
+       struct dentry           *dentry;
+
+       if (!capable(CAP_MKNOD))
+               return -XFS_ERROR(EPERM);
+       if (copy_from_user(&dmhreq, arg,
+                          sizeof(compat_xfs_fsop_setdm_handlereq_t)))
+               return -XFS_ERROR(EFAULT);
+
+       dentry = xfs_compat_handlereq_to_dentry(parfilp, &dmhreq.hreq);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+
+       if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
+               error = -XFS_ERROR(EPERM);
+               goto out;
+       }
+
+       if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) {
+               error = -XFS_ERROR(EFAULT);
+               goto out;
+       }
+
+       error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
+                                fsd.fsd_dmstate);
+
+out:
+       dput(dentry);
+       return error;
+}
+
+long
+xfs_file_compat_ioctl(
+       struct file             *filp,
+       unsigned                cmd,
+       unsigned long           p)
+{
+       struct inode            *inode = filp->f_path.dentry->d_inode;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       void                    __user *arg = (void __user *)p;
+       int                     ioflags = 0;
+       int                     error;
+
+       if (filp->f_mode & FMODE_NOCMTIME)
+               ioflags |= IO_INVIS;
+
+       trace_xfs_file_compat_ioctl(ip);
+
+       switch (cmd) {
+       /* No size or alignment issues on any arch */
+       case XFS_IOC_DIOINFO:
+       case XFS_IOC_FSGEOMETRY:
+       case XFS_IOC_FSGETXATTR:
+       case XFS_IOC_FSSETXATTR:
+       case XFS_IOC_FSGETXATTRA:
+       case XFS_IOC_FSSETDM:
+       case XFS_IOC_GETBMAP:
+       case XFS_IOC_GETBMAPA:
+       case XFS_IOC_GETBMAPX:
+       case XFS_IOC_FSCOUNTS:
+       case XFS_IOC_SET_RESBLKS:
+       case XFS_IOC_GET_RESBLKS:
+       case XFS_IOC_FSGROWFSLOG:
+       case XFS_IOC_GOINGDOWN:
+       case XFS_IOC_ERROR_INJECTION:
+       case XFS_IOC_ERROR_CLEARALL:
+               return xfs_file_ioctl(filp, cmd, p);
+#ifndef BROKEN_X86_ALIGNMENT
+       /* These are handled fine if no alignment issues */
+       case XFS_IOC_ALLOCSP:
+       case XFS_IOC_FREESP:
+       case XFS_IOC_RESVSP:
+       case XFS_IOC_UNRESVSP:
+       case XFS_IOC_ALLOCSP64:
+       case XFS_IOC_FREESP64:
+       case XFS_IOC_RESVSP64:
+       case XFS_IOC_UNRESVSP64:
+       case XFS_IOC_FSGEOMETRY_V1:
+       case XFS_IOC_FSGROWFSDATA:
+       case XFS_IOC_FSGROWFSRT:
+       case XFS_IOC_ZERO_RANGE:
+               return xfs_file_ioctl(filp, cmd, p);
+#else
+       case XFS_IOC_ALLOCSP_32:
+       case XFS_IOC_FREESP_32:
+       case XFS_IOC_ALLOCSP64_32:
+       case XFS_IOC_FREESP64_32:
+       case XFS_IOC_RESVSP_32:
+       case XFS_IOC_UNRESVSP_32:
+       case XFS_IOC_RESVSP64_32:
+       case XFS_IOC_UNRESVSP64_32:
+       case XFS_IOC_ZERO_RANGE_32: {
+               struct xfs_flock64      bf;
+
+               if (xfs_compat_flock64_copyin(&bf, arg))
+                       return -XFS_ERROR(EFAULT);
+               cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
+               return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
+       }
+       case XFS_IOC_FSGEOMETRY_V1_32:
+               return xfs_compat_ioc_fsgeometry_v1(mp, arg);
+       case XFS_IOC_FSGROWFSDATA_32: {
+               struct xfs_growfs_data  in;
+
+               if (xfs_compat_growfs_data_copyin(&in, arg))
+                       return -XFS_ERROR(EFAULT);
+               error = xfs_growfs_data(mp, &in);
+               return -error;
+       }
+       case XFS_IOC_FSGROWFSRT_32: {
+               struct xfs_growfs_rt    in;
+
+               if (xfs_compat_growfs_rt_copyin(&in, arg))
+                       return -XFS_ERROR(EFAULT);
+               error = xfs_growfs_rt(mp, &in);
+               return -error;
+       }
+#endif
+       /* long changes size, but xfs only copiese out 32 bits */
+       case XFS_IOC_GETXFLAGS_32:
+       case XFS_IOC_SETXFLAGS_32:
+       case XFS_IOC_GETVERSION_32:
+               cmd = _NATIVE_IOC(cmd, long);
+               return xfs_file_ioctl(filp, cmd, p);
+       case XFS_IOC_SWAPEXT_32: {
+               struct xfs_swapext        sxp;
+               struct compat_xfs_swapext __user *sxu = arg;
+
+               /* Bulk copy in up to the sx_stat field, then copy bstat */
+               if (copy_from_user(&sxp, sxu,
+                                  offsetof(struct xfs_swapext, sx_stat)) ||
+                   xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat))
+                       return -XFS_ERROR(EFAULT);
+               error = xfs_swapext(&sxp);
+               return -error;
+       }
+       case XFS_IOC_FSBULKSTAT_32:
+       case XFS_IOC_FSBULKSTAT_SINGLE_32:
+       case XFS_IOC_FSINUMBERS_32:
+               return xfs_compat_ioc_bulkstat(mp, cmd, arg);
+       case XFS_IOC_FD_TO_HANDLE_32:
+       case XFS_IOC_PATH_TO_HANDLE_32:
+       case XFS_IOC_PATH_TO_FSHANDLE_32: {
+               struct xfs_fsop_handlereq       hreq;
+
+               if (xfs_compat_handlereq_copyin(&hreq, arg))
+                       return -XFS_ERROR(EFAULT);
+               cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq);
+               return xfs_find_handle(cmd, &hreq);
+       }
+       case XFS_IOC_OPEN_BY_HANDLE_32: {
+               struct xfs_fsop_handlereq       hreq;
+
+               if (xfs_compat_handlereq_copyin(&hreq, arg))
+                       return -XFS_ERROR(EFAULT);
+               return xfs_open_by_handle(filp, &hreq);
+       }
+       case XFS_IOC_READLINK_BY_HANDLE_32: {
+               struct xfs_fsop_handlereq       hreq;
+
+               if (xfs_compat_handlereq_copyin(&hreq, arg))
+                       return -XFS_ERROR(EFAULT);
+               return xfs_readlink_by_handle(filp, &hreq);
+       }
+       case XFS_IOC_ATTRLIST_BY_HANDLE_32:
+               return xfs_compat_attrlist_by_handle(filp, arg);
+       case XFS_IOC_ATTRMULTI_BY_HANDLE_32:
+               return xfs_compat_attrmulti_by_handle(filp, arg);
+       case XFS_IOC_FSSETDM_BY_HANDLE_32:
+               return xfs_compat_fssetdm_by_handle(filp, arg);
+       default:
+               return -XFS_ERROR(ENOIOCTLCMD);
+       }
+}
diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h

new file mode 100644 (file)

index 0000000..80f4060
--- /dev/null
+++ b/fs/xfs/xfs_ioctl32.h
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2004-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_IOCTL32_H__
+#define __XFS_IOCTL32_H__
+
+#include <linux/compat.h>
+
+/*
+ * on 32-bit arches, ioctl argument structures may have different sizes
+ * and/or alignment.  We define compat structures which match the
+ * 32-bit sizes/alignments here, and their associated ioctl numbers.
+ *
+ * xfs_ioctl32.c contains routines to copy these structures in and out.
+ */
+
+/* stock kernel-level ioctls we support */
+#define XFS_IOC_GETXFLAGS_32   FS_IOC32_GETFLAGS
+#define XFS_IOC_SETXFLAGS_32   FS_IOC32_SETFLAGS
+#define XFS_IOC_GETVERSION_32  FS_IOC32_GETVERSION
+
+/*
+ * On intel, even if sizes match, alignment and/or padding may differ.
+ */
+#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
+#define BROKEN_X86_ALIGNMENT
+#define __compat_packed __attribute__((packed))
+#else
+#define __compat_packed
+#endif
+
+typedef struct compat_xfs_bstime {
+       compat_time_t   tv_sec;         /* seconds              */
+       __s32           tv_nsec;        /* and nanoseconds      */
+} compat_xfs_bstime_t;
+
+typedef struct compat_xfs_bstat {
+       __u64           bs_ino;         /* inode number                 */
+       __u16           bs_mode;        /* type and mode                */
+       __u16           bs_nlink;       /* number of links              */
+       __u32           bs_uid;         /* user id                      */
+       __u32           bs_gid;         /* group id                     */
+       __u32           bs_rdev;        /* device value                 */
+       __s32           bs_blksize;     /* block size                   */
+       __s64           bs_size;        /* file size                    */
+       compat_xfs_bstime_t bs_atime;   /* access time                  */
+       compat_xfs_bstime_t bs_mtime;   /* modify time                  */
+       compat_xfs_bstime_t bs_ctime;   /* inode change time            */
+       int64_t         bs_blocks;      /* number of blocks             */
+       __u32           bs_xflags;      /* extended flags               */
+       __s32           bs_extsize;     /* extent size                  */
+       __s32           bs_extents;     /* number of extents            */
+       __u32           bs_gen;         /* generation count             */
+       __u16           bs_projid_lo;   /* lower part of project id     */
+#define        bs_projid       bs_projid_lo    /* (previously just bs_projid)  */
+       __u16           bs_projid_hi;   /* high part of project id      */
+       unsigned char   bs_pad[12];     /* pad space, unused            */
+       __u32           bs_dmevmask;    /* DMIG event mask              */
+       __u16           bs_dmstate;     /* DMIG state info              */
+       __u16           bs_aextents;    /* attribute number of extents  */
+} __compat_packed compat_xfs_bstat_t;
+
+typedef struct compat_xfs_fsop_bulkreq {
+       compat_uptr_t   lastip;         /* last inode # pointer         */
+       __s32           icount;         /* count of entries in buffer   */
+       compat_uptr_t   ubuffer;        /* user buffer for inode desc.  */
+       compat_uptr_t   ocount;         /* output count pointer         */
+} compat_xfs_fsop_bulkreq_t;
+
+#define XFS_IOC_FSBULKSTAT_32 \
+       _IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
+#define XFS_IOC_FSBULKSTAT_SINGLE_32 \
+       _IOWR('X', 102, struct compat_xfs_fsop_bulkreq)
+#define XFS_IOC_FSINUMBERS_32 \
+       _IOWR('X', 103, struct compat_xfs_fsop_bulkreq)
+
+typedef struct compat_xfs_fsop_handlereq {
+       __u32           fd;             /* fd for FD_TO_HANDLE          */
+       compat_uptr_t   path;           /* user pathname                */
+       __u32           oflags;         /* open flags                   */
+       compat_uptr_t   ihandle;        /* user supplied handle         */
+       __u32           ihandlen;       /* user supplied length         */
+       compat_uptr_t   ohandle;        /* user buffer for handle       */
+       compat_uptr_t   ohandlen;       /* user buffer length           */
+} compat_xfs_fsop_handlereq_t;
+
+#define XFS_IOC_PATH_TO_FSHANDLE_32 \
+       _IOWR('X', 104, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_PATH_TO_HANDLE_32 \
+       _IOWR('X', 105, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_FD_TO_HANDLE_32 \
+       _IOWR('X', 106, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_OPEN_BY_HANDLE_32 \
+       _IOWR('X', 107, struct compat_xfs_fsop_handlereq)
+#define XFS_IOC_READLINK_BY_HANDLE_32 \
+       _IOWR('X', 108, struct compat_xfs_fsop_handlereq)
+
+/* The bstat field in the swapext struct needs translation */
+typedef struct compat_xfs_swapext {
+       __int64_t               sx_version;     /* version */
+       __int64_t               sx_fdtarget;    /* fd of target file */
+       __int64_t               sx_fdtmp;       /* fd of tmp file */
+       xfs_off_t               sx_offset;      /* offset into file */
+       xfs_off_t               sx_length;      /* leng from offset */
+       char                    sx_pad[16];     /* pad space, unused */
+       compat_xfs_bstat_t      sx_stat;        /* stat of target b4 copy */
+} __compat_packed compat_xfs_swapext_t;
+
+#define XFS_IOC_SWAPEXT_32     _IOWR('X', 109, struct compat_xfs_swapext)
+
+typedef struct compat_xfs_fsop_attrlist_handlereq {
+       struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
+       struct xfs_attrlist_cursor      pos; /* opaque cookie, list offset */
+       __u32                           flags;  /* which namespace to use */
+       __u32                           buflen; /* length of buffer supplied */
+       compat_uptr_t                   buffer; /* returned names */
+} __compat_packed compat_xfs_fsop_attrlist_handlereq_t;
+
+/* Note: actually this is read/write */
+#define XFS_IOC_ATTRLIST_BY_HANDLE_32 \
+       _IOW('X', 122, struct compat_xfs_fsop_attrlist_handlereq)
+
+/* am_opcodes defined in xfs_fs.h */
+typedef struct compat_xfs_attr_multiop {
+       __u32           am_opcode;
+       __s32           am_error;
+       compat_uptr_t   am_attrname;
+       compat_uptr_t   am_attrvalue;
+       __u32           am_length;
+       __u32           am_flags;
+} compat_xfs_attr_multiop_t;
+
+typedef struct compat_xfs_fsop_attrmulti_handlereq {
+       struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
+       __u32                           opcount;/* count of following multiop */
+       /* ptr to compat_xfs_attr_multiop */
+       compat_uptr_t                   ops; /* attr_multi data */
+} compat_xfs_fsop_attrmulti_handlereq_t;
+
+#define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \
+       _IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq)
+
+typedef struct compat_xfs_fsop_setdm_handlereq {
+       struct compat_xfs_fsop_handlereq hreq;  /* handle information   */
+       /* ptr to struct fsdmidata */
+       compat_uptr_t                   data;   /* DMAPI data   */
+} compat_xfs_fsop_setdm_handlereq_t;
+
+#define XFS_IOC_FSSETDM_BY_HANDLE_32 \
+       _IOW('X', 121, struct compat_xfs_fsop_setdm_handlereq)
+
+#ifdef BROKEN_X86_ALIGNMENT
+/* on ia32 l_start is on a 32-bit boundary */
+typedef struct compat_xfs_flock64 {
+       __s16           l_type;
+       __s16           l_whence;
+       __s64           l_start __attribute__((packed));
+                       /* len == 0 means until end of file */
+       __s64           l_len __attribute__((packed));
+       __s32           l_sysid;
+       __u32           l_pid;
+       __s32           l_pad[4];       /* reserve area */
+} compat_xfs_flock64_t;
+
+#define XFS_IOC_ALLOCSP_32     _IOW('X', 10, struct compat_xfs_flock64)
+#define XFS_IOC_FREESP_32      _IOW('X', 11, struct compat_xfs_flock64)
+#define XFS_IOC_ALLOCSP64_32   _IOW('X', 36, struct compat_xfs_flock64)
+#define XFS_IOC_FREESP64_32    _IOW('X', 37, struct compat_xfs_flock64)
+#define XFS_IOC_RESVSP_32      _IOW('X', 40, struct compat_xfs_flock64)
+#define XFS_IOC_UNRESVSP_32    _IOW('X', 41, struct compat_xfs_flock64)
+#define XFS_IOC_RESVSP64_32    _IOW('X', 42, struct compat_xfs_flock64)
+#define XFS_IOC_UNRESVSP64_32  _IOW('X', 43, struct compat_xfs_flock64)
+#define XFS_IOC_ZERO_RANGE_32  _IOW('X', 57, struct compat_xfs_flock64)
+
+typedef struct compat_xfs_fsop_geom_v1 {
+       __u32           blocksize;      /* filesystem (data) block size */
+       __u32           rtextsize;      /* realtime extent size         */
+       __u32           agblocks;       /* fsblocks in an AG            */
+       __u32           agcount;        /* number of allocation groups  */
+       __u32           logblocks;      /* fsblocks in the log          */
+       __u32           sectsize;       /* (data) sector size, bytes    */
+       __u32           inodesize;      /* inode size in bytes          */
+       __u32           imaxpct;        /* max allowed inode space(%)   */
+       __u64           datablocks;     /* fsblocks in data subvolume   */
+       __u64           rtblocks;       /* fsblocks in realtime subvol  */
+       __u64           rtextents;      /* rt extents in realtime subvol*/
+       __u64           logstart;       /* starting fsblock of the log  */
+       unsigned char   uuid[16];       /* unique id of the filesystem  */
+       __u32           sunit;          /* stripe unit, fsblocks        */
+       __u32           swidth;         /* stripe width, fsblocks       */
+       __s32           version;        /* structure version            */
+       __u32           flags;          /* superblock version flags     */
+       __u32           logsectsize;    /* log sector size, bytes       */
+       __u32           rtsectsize;     /* realtime sector size, bytes  */
+       __u32           dirblocksize;   /* directory block size, bytes  */
+} __attribute__((packed)) compat_xfs_fsop_geom_v1_t;
+
+#define XFS_IOC_FSGEOMETRY_V1_32  \
+       _IOR('X', 100, struct compat_xfs_fsop_geom_v1)
+
+typedef struct compat_xfs_inogrp {
+       __u64           xi_startino;    /* starting inode number        */
+       __s32           xi_alloccount;  /* # bits set in allocmask      */
+       __u64           xi_allocmask;   /* mask of allocated inodes     */
+} __attribute__((packed)) compat_xfs_inogrp_t;
+
+/* These growfs input structures have padding on the end, so must translate */
+typedef struct compat_xfs_growfs_data {
+       __u64           newblocks;      /* new data subvol size, fsblocks */
+       __u32           imaxpct;        /* new inode space percentage limit */
+} __attribute__((packed)) compat_xfs_growfs_data_t;
+
+typedef struct compat_xfs_growfs_rt {
+       __u64           newblocks;      /* new realtime size, fsblocks */
+       __u32           extsize;        /* new realtime extent size, fsblocks */
+} __attribute__((packed)) compat_xfs_growfs_rt_t;
+
+#define XFS_IOC_FSGROWFSDATA_32 _IOW('X', 110, struct compat_xfs_growfs_data)
+#define XFS_IOC_FSGROWFSRT_32   _IOW('X', 112, struct compat_xfs_growfs_rt)
+
+#endif /* BROKEN_X86_ALIGNMENT */
+
+#endif /* __XFS_IOCTL32_H__ */
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c

new file mode 100644 (file)

index 0000000..673704f
--- /dev/null
+++ b/fs/xfs/xfs_iops.c
@@ -0,0 +1,1218 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_acl.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_vnodeops.h"
+#include "xfs_inode_item.h"
+#include "xfs_trace.h"
+
+#include <linux/capability.h>
+#include <linux/xattr.h>
+#include <linux/namei.h>
+#include <linux/posix_acl.h>
+#include <linux/security.h>
+#include <linux/fiemap.h>
+#include <linux/slab.h>
+
+/*
+ * Bring the timestamps in the XFS inode uptodate.
+ *
+ * Used before writing the inode to disk.
+ */
+void
+xfs_synchronize_times(
+       xfs_inode_t     *ip)
+{
+       struct inode    *inode = VFS_I(ip);
+
+       ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
+       ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
+       ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
+       ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
+       ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
+       ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
+}
+
+/*
+ * If the linux inode is valid, mark it dirty, else mark the dirty state
+ * in the XFS inode to make sure we pick it up when reclaiming the inode.
+ */
+void
+xfs_mark_inode_dirty_sync(
+       xfs_inode_t     *ip)
+{
+       struct inode    *inode = VFS_I(ip);
+
+       if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
+               mark_inode_dirty_sync(inode);
+       else {
+               barrier();
+               ip->i_update_core = 1;
+       }
+}
+
+void
+xfs_mark_inode_dirty(
+       xfs_inode_t     *ip)
+{
+       struct inode    *inode = VFS_I(ip);
+
+       if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
+               mark_inode_dirty(inode);
+       else {
+               barrier();
+               ip->i_update_core = 1;
+       }
+
+}
+
+/*
+ * Hook in SELinux.  This is not quite correct yet, what we really need
+ * here (as we do for default ACLs) is a mechanism by which creation of
+ * these attrs can be journalled at inode creation time (along with the
+ * inode, of course, such that log replay can't cause these to be lost).
+ */
+STATIC int
+xfs_init_security(
+       struct inode    *inode,
+       struct inode    *dir,
+       const struct qstr *qstr)
+{
+       struct xfs_inode *ip = XFS_I(inode);
+       size_t          length;
+       void            *value;
+       unsigned char   *name;
+       int             error;
+
+       error = security_inode_init_security(inode, dir, qstr, (char **)&name,
+                                            &value, &length);
+       if (error) {
+               if (error == -EOPNOTSUPP)
+                       return 0;
+               return -error;
+       }
+
+       error = xfs_attr_set(ip, name, value, length, ATTR_SECURE);
+
+       kfree(name);
+       kfree(value);
+       return error;
+}
+
+static void
+xfs_dentry_to_name(
+       struct xfs_name *namep,
+       struct dentry   *dentry)
+{
+       namep->name = dentry->d_name.name;
+       namep->len = dentry->d_name.len;
+}
+
+STATIC void
+xfs_cleanup_inode(
+       struct inode    *dir,
+       struct inode    *inode,
+       struct dentry   *dentry)
+{
+       struct xfs_name teardown;
+
+       /* Oh, the horror.
+        * If we can't add the ACL or we fail in
+        * xfs_init_security we must back out.
+        * ENOSPC can hit here, among other things.
+        */
+       xfs_dentry_to_name(&teardown, dentry);
+
+       xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
+       iput(inode);
+}
+
+STATIC int
+xfs_vn_mknod(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       int             mode,
+       dev_t           rdev)
+{
+       struct inode    *inode;
+       struct xfs_inode *ip = NULL;
+       struct posix_acl *default_acl = NULL;
+       struct xfs_name name;
+       int             error;
+
+       /*
+        * Irix uses Missed'em'V split, but doesn't want to see
+        * the upper 5 bits of (14bit) major.
+        */
+       if (S_ISCHR(mode) || S_ISBLK(mode)) {
+               if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
+                       return -EINVAL;
+               rdev = sysv_encode_dev(rdev);
+       } else {
+               rdev = 0;
+       }
+
+       if (IS_POSIXACL(dir)) {
+               default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
+               if (IS_ERR(default_acl))
+                       return PTR_ERR(default_acl);
+
+               if (!default_acl)
+                       mode &= ~current_umask();
+       }
+
+       xfs_dentry_to_name(&name, dentry);
+       error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
+       if (unlikely(error))
+               goto out_free_acl;
+
+       inode = VFS_I(ip);
+
+       error = xfs_init_security(inode, dir, &dentry->d_name);
+       if (unlikely(error))
+               goto out_cleanup_inode;
+
+       if (default_acl) {
+               error = -xfs_inherit_acl(inode, default_acl);
+               default_acl = NULL;
+               if (unlikely(error))
+                       goto out_cleanup_inode;
+       }
+
+
+       d_instantiate(dentry, inode);
+       return -error;
+
+ out_cleanup_inode:
+       xfs_cleanup_inode(dir, inode, dentry);
+ out_free_acl:
+       posix_acl_release(default_acl);
+       return -error;
+}
+
+STATIC int
+xfs_vn_create(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       int             mode,
+       struct nameidata *nd)
+{
+       return xfs_vn_mknod(dir, dentry, mode, 0);
+}
+
+STATIC int
+xfs_vn_mkdir(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       int             mode)
+{
+       return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
+}
+
+STATIC struct dentry *
+xfs_vn_lookup(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       struct nameidata *nd)
+{
+       struct xfs_inode *cip;
+       struct xfs_name name;
+       int             error;
+
+       if (dentry->d_name.len >= MAXNAMELEN)
+               return ERR_PTR(-ENAMETOOLONG);
+
+       xfs_dentry_to_name(&name, dentry);
+       error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
+       if (unlikely(error)) {
+               if (unlikely(error != ENOENT))
+                       return ERR_PTR(-error);
+               d_add(dentry, NULL);
+               return NULL;
+       }
+
+       return d_splice_alias(VFS_I(cip), dentry);
+}
+
+STATIC struct dentry *
+xfs_vn_ci_lookup(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       struct nameidata *nd)
+{
+       struct xfs_inode *ip;
+       struct xfs_name xname;
+       struct xfs_name ci_name;
+       struct qstr     dname;
+       int             error;
+
+       if (dentry->d_name.len >= MAXNAMELEN)
+               return ERR_PTR(-ENAMETOOLONG);
+
+       xfs_dentry_to_name(&xname, dentry);
+       error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
+       if (unlikely(error)) {
+               if (unlikely(error != ENOENT))
+                       return ERR_PTR(-error);
+               /*
+                * call d_add(dentry, NULL) here when d_drop_negative_children
+                * is called in xfs_vn_mknod (ie. allow negative dentries
+                * with CI filesystems).
+                */
+               return NULL;
+       }
+
+       /* if exact match, just splice and exit */
+       if (!ci_name.name)
+               return d_splice_alias(VFS_I(ip), dentry);
+
+       /* else case-insensitive match... */
+       dname.name = ci_name.name;
+       dname.len = ci_name.len;
+       dentry = d_add_ci(dentry, VFS_I(ip), &dname);
+       kmem_free(ci_name.name);
+       return dentry;
+}
+
+STATIC int
+xfs_vn_link(
+       struct dentry   *old_dentry,
+       struct inode    *dir,
+       struct dentry   *dentry)
+{
+       struct inode    *inode = old_dentry->d_inode;
+       struct xfs_name name;
+       int             error;
+
+       xfs_dentry_to_name(&name, dentry);
+
+       error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
+       if (unlikely(error))
+               return -error;
+
+       ihold(inode);
+       d_instantiate(dentry, inode);
+       return 0;
+}
+
+STATIC int
+xfs_vn_unlink(
+       struct inode    *dir,
+       struct dentry   *dentry)
+{
+       struct xfs_name name;
+       int             error;
+
+       xfs_dentry_to_name(&name, dentry);
+
+       error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
+       if (error)
+               return error;
+
+       /*
+        * With unlink, the VFS makes the dentry "negative": no inode,
+        * but still hashed. This is incompatible with case-insensitive
+        * mode, so invalidate (unhash) the dentry in CI-mode.
+        */
+       if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
+               d_invalidate(dentry);
+       return 0;
+}
+
+STATIC int
+xfs_vn_symlink(
+       struct inode    *dir,
+       struct dentry   *dentry,
+       const char      *symname)
+{
+       struct inode    *inode;
+       struct xfs_inode *cip = NULL;
+       struct xfs_name name;
+       int             error;
+       mode_t          mode;
+
+       mode = S_IFLNK |
+               (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
+       xfs_dentry_to_name(&name, dentry);
+
+       error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
+       if (unlikely(error))
+               goto out;
+
+       inode = VFS_I(cip);
+
+       error = xfs_init_security(inode, dir, &dentry->d_name);
+       if (unlikely(error))
+               goto out_cleanup_inode;
+
+       d_instantiate(dentry, inode);
+       return 0;
+
+ out_cleanup_inode:
+       xfs_cleanup_inode(dir, inode, dentry);
+ out:
+       return -error;
+}
+
+STATIC int
+xfs_vn_rename(
+       struct inode    *odir,
+       struct dentry   *odentry,
+       struct inode    *ndir,
+       struct dentry   *ndentry)
+{
+       struct inode    *new_inode = ndentry->d_inode;
+       struct xfs_name oname;
+       struct xfs_name nname;
+
+       xfs_dentry_to_name(&oname, odentry);
+       xfs_dentry_to_name(&nname, ndentry);
+
+       return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
+                          XFS_I(ndir), &nname, new_inode ?
+                                               XFS_I(new_inode) : NULL);
+}
+
+/*
+ * careful here - this function can get called recursively, so
+ * we need to be very careful about how much stack we use.
+ * uio is kmalloced for this reason...
+ */
+STATIC void *
+xfs_vn_follow_link(
+       struct dentry           *dentry,
+       struct nameidata        *nd)
+{
+       char                    *link;
+       int                     error = -ENOMEM;
+
+       link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
+       if (!link)
+               goto out_err;
+
+       error = -xfs_readlink(XFS_I(dentry->d_inode), link);
+       if (unlikely(error))
+               goto out_kfree;
+
+       nd_set_link(nd, link);
+       return NULL;
+
+ out_kfree:
+       kfree(link);
+ out_err:
+       nd_set_link(nd, ERR_PTR(error));
+       return NULL;
+}
+
+STATIC void
+xfs_vn_put_link(
+       struct dentry   *dentry,
+       struct nameidata *nd,
+       void            *p)
+{
+       char            *s = nd_get_link(nd);
+
+       if (!IS_ERR(s))
+               kfree(s);
+}
+
+STATIC int
+xfs_vn_getattr(
+       struct vfsmount         *mnt,
+       struct dentry           *dentry,
+       struct kstat            *stat)
+{
+       struct inode            *inode = dentry->d_inode;
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+
+       trace_xfs_getattr(ip);
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+
+       stat->size = XFS_ISIZE(ip);
+       stat->dev = inode->i_sb->s_dev;
+       stat->mode = ip->i_d.di_mode;
+       stat->nlink = ip->i_d.di_nlink;
+       stat->uid = ip->i_d.di_uid;
+       stat->gid = ip->i_d.di_gid;
+       stat->ino = ip->i_ino;
+       stat->atime = inode->i_atime;
+       stat->mtime = inode->i_mtime;
+       stat->ctime = inode->i_ctime;
+       stat->blocks =
+               XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
+
+
+       switch (inode->i_mode & S_IFMT) {
+       case S_IFBLK:
+       case S_IFCHR:
+               stat->blksize = BLKDEV_IOSIZE;
+               stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
+                                  sysv_minor(ip->i_df.if_u2.if_rdev));
+               break;
+       default:
+               if (XFS_IS_REALTIME_INODE(ip)) {
+                       /*
+                        * If the file blocks are being allocated from a
+                        * realtime volume, then return the inode's realtime
+                        * extent size or the realtime volume's extent size.
+                        */
+                       stat->blksize =
+                               xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
+               } else
+                       stat->blksize = xfs_preferred_iosize(mp);
+               stat->rdev = 0;
+               break;
+       }
+
+       return 0;
+}
+
+int
+xfs_setattr_nonsize(
+       struct xfs_inode        *ip,
+       struct iattr            *iattr,
+       int                     flags)
+{
+       xfs_mount_t             *mp = ip->i_mount;
+       struct inode            *inode = VFS_I(ip);
+       int                     mask = iattr->ia_valid;
+       xfs_trans_t             *tp;
+       int                     error;
+       uid_t                   uid = 0, iuid = 0;
+       gid_t                   gid = 0, igid = 0;
+       struct xfs_dquot        *udqp = NULL, *gdqp = NULL;
+       struct xfs_dquot        *olddquot1 = NULL, *olddquot2 = NULL;
+
+       trace_xfs_setattr(ip);
+
+       if (mp->m_flags & XFS_MOUNT_RDONLY)
+               return XFS_ERROR(EROFS);
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+
+       error = -inode_change_ok(inode, iattr);
+       if (error)
+               return XFS_ERROR(error);
+
+       ASSERT((mask & ATTR_SIZE) == 0);
+
+       /*
+        * If disk quotas is on, we make sure that the dquots do exist on disk,
+        * before we start any other transactions. Trying to do this later
+        * is messy. We don't care to take a readlock to look at the ids
+        * in inode here, because we can't hold it across the trans_reserve.
+        * If the IDs do change before we take the ilock, we're covered
+        * because the i_*dquot fields will get updated anyway.
+        */
+       if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
+               uint    qflags = 0;
+
+               if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
+                       uid = iattr->ia_uid;
+                       qflags |= XFS_QMOPT_UQUOTA;
+               } else {
+                       uid = ip->i_d.di_uid;
+               }
+               if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
+                       gid = iattr->ia_gid;
+                       qflags |= XFS_QMOPT_GQUOTA;
+               }  else {
+                       gid = ip->i_d.di_gid;
+               }
+
+               /*
+                * We take a reference when we initialize udqp and gdqp,
+                * so it is important that we never blindly double trip on
+                * the same variable. See xfs_create() for an example.
+                */
+               ASSERT(udqp == NULL);
+               ASSERT(gdqp == NULL);
+               error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
+                                        qflags, &udqp, &gdqp);
+               if (error)
+                       return error;
+       }
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
+       error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+       if (error)
+               goto out_dqrele;
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+       /*
+        * Change file ownership.  Must be the owner or privileged.
+        */
+       if (mask & (ATTR_UID|ATTR_GID)) {
+               /*
+                * These IDs could have changed since we last looked at them.
+                * But, we're assured that if the ownership did change
+                * while we didn't have the inode locked, inode's dquot(s)
+                * would have changed also.
+                */
+               iuid = ip->i_d.di_uid;
+               igid = ip->i_d.di_gid;
+               gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
+               uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
+
+               /*
+                * Do a quota reservation only if uid/gid is actually
+                * going to change.
+                */
+               if (XFS_IS_QUOTA_RUNNING(mp) &&
+                   ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
+                    (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
+                       ASSERT(tp);
+                       error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
+                                               capable(CAP_FOWNER) ?
+                                               XFS_QMOPT_FORCE_RES : 0);
+                       if (error)      /* out of quota */
+                               goto out_trans_cancel;
+               }
+       }
+
+       xfs_trans_ijoin(tp, ip);
+
+       /*
+        * Change file ownership.  Must be the owner or privileged.
+        */
+       if (mask & (ATTR_UID|ATTR_GID)) {
+               /*
+                * CAP_FSETID overrides the following restrictions:
+                *
+                * The set-user-ID and set-group-ID bits of a file will be
+                * cleared upon successful return from chown()
+                */
+               if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+                   !capable(CAP_FSETID))
+                       ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+
+               /*
+                * Change the ownerships and register quota modifications
+                * in the transaction.
+                */
+               if (iuid != uid) {
+                       if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
+                               ASSERT(mask & ATTR_UID);
+                               ASSERT(udqp);
+                               olddquot1 = xfs_qm_vop_chown(tp, ip,
+                                                       &ip->i_udquot, udqp);
+                       }
+                       ip->i_d.di_uid = uid;
+                       inode->i_uid = uid;
+               }
+               if (igid != gid) {
+                       if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
+                               ASSERT(!XFS_IS_PQUOTA_ON(mp));
+                               ASSERT(mask & ATTR_GID);
+                               ASSERT(gdqp);
+                               olddquot2 = xfs_qm_vop_chown(tp, ip,
+                                                       &ip->i_gdquot, gdqp);
+                       }
+                       ip->i_d.di_gid = gid;
+                       inode->i_gid = gid;
+               }
+       }
+
+       /*
+        * Change file access modes.
+        */
+       if (mask & ATTR_MODE) {
+               umode_t mode = iattr->ia_mode;
+
+               if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+                       mode &= ~S_ISGID;
+
+               ip->i_d.di_mode &= S_IFMT;
+               ip->i_d.di_mode |= mode & ~S_IFMT;
+
+               inode->i_mode &= S_IFMT;
+               inode->i_mode |= mode & ~S_IFMT;
+       }
+
+       /*
+        * Change file access or modified times.
+        */
+       if (mask & ATTR_ATIME) {
+               inode->i_atime = iattr->ia_atime;
+               ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
+               ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
+               ip->i_update_core = 1;
+       }
+       if (mask & ATTR_CTIME) {
+               inode->i_ctime = iattr->ia_ctime;
+               ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
+               ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
+               ip->i_update_core = 1;
+       }
+       if (mask & ATTR_MTIME) {
+               inode->i_mtime = iattr->ia_mtime;
+               ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
+               ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
+               ip->i_update_core = 1;
+       }
+
+       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+       XFS_STATS_INC(xs_ig_attrchg);
+
+       if (mp->m_flags & XFS_MOUNT_WSYNC)
+               xfs_trans_set_sync(tp);
+       error = xfs_trans_commit(tp, 0);
+
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+       /*
+        * Release any dquot(s) the inode had kept before chown.
+        */
+       xfs_qm_dqrele(olddquot1);
+       xfs_qm_dqrele(olddquot2);
+       xfs_qm_dqrele(udqp);
+       xfs_qm_dqrele(gdqp);
+
+       if (error)
+               return XFS_ERROR(error);
+
+       /*
+        * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
+        *           update.  We could avoid this with linked transactions
+        *           and passing down the transaction pointer all the way
+        *           to attr_set.  No previous user of the generic
+        *           Posix ACL code seems to care about this issue either.
+        */
+       if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
+               error = -xfs_acl_chmod(inode);
+               if (error)
+                       return XFS_ERROR(error);
+       }
+
+       return 0;
+
+out_trans_cancel:
+       xfs_trans_cancel(tp, 0);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+out_dqrele:
+       xfs_qm_dqrele(udqp);
+       xfs_qm_dqrele(gdqp);
+       return error;
+}
+
+/*
+ * Truncate file.  Must have write permission and not be a directory.
+ */
+int
+xfs_setattr_size(
+       struct xfs_inode        *ip,
+       struct iattr            *iattr,
+       int                     flags)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct inode            *inode = VFS_I(ip);
+       int                     mask = iattr->ia_valid;
+       struct xfs_trans        *tp;
+       int                     error;
+       uint                    lock_flags;
+       uint                    commit_flags = 0;
+
+       trace_xfs_setattr(ip);
+
+       if (mp->m_flags & XFS_MOUNT_RDONLY)
+               return XFS_ERROR(EROFS);
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return XFS_ERROR(EIO);
+
+       error = -inode_change_ok(inode, iattr);
+       if (error)
+               return XFS_ERROR(error);
+
+       ASSERT(S_ISREG(ip->i_d.di_mode));
+       ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
+                       ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID|
+                       ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
+
+       lock_flags = XFS_ILOCK_EXCL;
+       if (!(flags & XFS_ATTR_NOLOCK))
+               lock_flags |= XFS_IOLOCK_EXCL;
+       xfs_ilock(ip, lock_flags);
+
+       /*
+        * Short circuit the truncate case for zero length files.
+        */
+       if (iattr->ia_size == 0 &&
+           ip->i_size == 0 && ip->i_d.di_nextents == 0) {
+               if (!(mask & (ATTR_CTIME|ATTR_MTIME)))
+                       goto out_unlock;
+
+               /*
+                * Use the regular setattr path to update the timestamps.
+                */
+               xfs_iunlock(ip, lock_flags);
+               iattr->ia_valid &= ~ATTR_SIZE;
+               return xfs_setattr_nonsize(ip, iattr, 0);
+       }
+
+       /*
+        * Make sure that the dquots are attached to the inode.
+        */
+       error = xfs_qm_dqattach_locked(ip, 0);
+       if (error)
+               goto out_unlock;
+
+       /*
+        * Now we can make the changes.  Before we join the inode to the
+        * transaction, take care of the part of the truncation that must be
+        * done without the inode lock.  This needs to be done before joining
+        * the inode to the transaction, because the inode cannot be unlocked
+        * once it is a part of the transaction.
+        */
+       if (iattr->ia_size > ip->i_size) {
+               /*
+                * Do the first part of growing a file: zero any data in the
+                * last block that is beyond the old EOF.  We need to do this
+                * before the inode is joined to the transaction to modify
+                * i_size.
+                */
+               error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
+               if (error)
+                       goto out_unlock;
+       }
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       lock_flags &= ~XFS_ILOCK_EXCL;
+
+       /*
+        * We are going to log the inode size change in this transaction so
+        * any previous writes that are beyond the on disk EOF and the new
+        * EOF that have not been written out need to be written here.  If we
+        * do not write the data out, we expose ourselves to the null files
+        * problem.
+        *
+        * Only flush from the on disk size to the smaller of the in memory
+        * file size or the new size as that's the range we really care about
+        * here and prevents waiting for other data not within the range we
+        * care about here.
+        */
+       if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) {
+               error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size,
+                                       XBF_ASYNC, FI_NONE);
+               if (error)
+                       goto out_unlock;
+       }
+
+       /*
+        * Wait for all I/O to complete.
+        */
+       xfs_ioend_wait(ip);
+
+       error = -block_truncate_page(inode->i_mapping, iattr->ia_size,
+                                    xfs_get_blocks);
+       if (error)
+               goto out_unlock;
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
+       error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
+                                XFS_TRANS_PERM_LOG_RES,
+                                XFS_ITRUNCATE_LOG_COUNT);
+       if (error)
+               goto out_trans_cancel;
+
+       truncate_setsize(inode, iattr->ia_size);
+
+       commit_flags = XFS_TRANS_RELEASE_LOG_RES;
+       lock_flags |= XFS_ILOCK_EXCL;
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+       xfs_trans_ijoin(tp, ip);
+
+       /*
+        * Only change the c/mtime if we are changing the size or we are
+        * explicitly asked to change it.  This handles the semantic difference
+        * between truncate() and ftruncate() as implemented in the VFS.
+        *
+        * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
+        * special case where we need to update the times despite not having
+        * these flags set.  For all other operations the VFS set these flags
+        * explicitly if it wants a timestamp update.
+        */
+       if (iattr->ia_size != ip->i_size &&
+           (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
+               iattr->ia_ctime = iattr->ia_mtime =
+                       current_fs_time(inode->i_sb);
+               mask |= ATTR_CTIME | ATTR_MTIME;
+       }
+
+       if (iattr->ia_size > ip->i_size) {
+               ip->i_d.di_size = iattr->ia_size;
+               ip->i_size = iattr->ia_size;
+       } else if (iattr->ia_size <= ip->i_size ||
+                  (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
+               error = xfs_itruncate_data(&tp, ip, iattr->ia_size);
+               if (error)
+                       goto out_trans_abort;
+
+               /*
+                * Truncated "down", so we're removing references to old data
+                * here - if we delay flushing for a long time, we expose
+                * ourselves unduly to the notorious NULL files problem.  So,
+                * we mark this inode and flush it when the file is closed,
+                * and do not wait the usual (long) time for writeout.
+                */
+               xfs_iflags_set(ip, XFS_ITRUNCATED);
+       }
+
+       if (mask & ATTR_CTIME) {
+               inode->i_ctime = iattr->ia_ctime;
+               ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
+               ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
+               ip->i_update_core = 1;
+       }
+       if (mask & ATTR_MTIME) {
+               inode->i_mtime = iattr->ia_mtime;
+               ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
+               ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
+               ip->i_update_core = 1;
+       }
+
+       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+       XFS_STATS_INC(xs_ig_attrchg);
+
+       if (mp->m_flags & XFS_MOUNT_WSYNC)
+               xfs_trans_set_sync(tp);
+
+       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+out_unlock:
+       if (lock_flags)
+               xfs_iunlock(ip, lock_flags);
+       return error;
+
+out_trans_abort:
+       commit_flags |= XFS_TRANS_ABORT;
+out_trans_cancel:
+       xfs_trans_cancel(tp, commit_flags);
+       goto out_unlock;
+}
+
+STATIC int
+xfs_vn_setattr(
+       struct dentry   *dentry,
+       struct iattr    *iattr)
+{
+       if (iattr->ia_valid & ATTR_SIZE)
+               return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0);
+       return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0);
+}
+
+#define XFS_FIEMAP_FLAGS       (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
+
+/*
+ * Call fiemap helper to fill in user data.
+ * Returns positive errors to xfs_getbmap.
+ */
+STATIC int
+xfs_fiemap_format(
+       void                    **arg,
+       struct getbmapx         *bmv,
+       int                     *full)
+{
+       int                     error;
+       struct fiemap_extent_info *fieinfo = *arg;
+       u32                     fiemap_flags = 0;
+       u64                     logical, physical, length;
+
+       /* Do nothing for a hole */
+       if (bmv->bmv_block == -1LL)
+               return 0;
+
+       logical = BBTOB(bmv->bmv_offset);
+       physical = BBTOB(bmv->bmv_block);
+       length = BBTOB(bmv->bmv_length);
+
+       if (bmv->bmv_oflags & BMV_OF_PREALLOC)
+               fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;
+       else if (bmv->bmv_oflags & BMV_OF_DELALLOC) {
+               fiemap_flags |= FIEMAP_EXTENT_DELALLOC;
+               physical = 0;   /* no block yet */
+       }
+       if (bmv->bmv_oflags & BMV_OF_LAST)
+               fiemap_flags |= FIEMAP_EXTENT_LAST;
+
+       error = fiemap_fill_next_extent(fieinfo, logical, physical,
+                                       length, fiemap_flags);
+       if (error > 0) {
+               error = 0;
+               *full = 1;      /* user array now full */
+       }
+
+       return -error;
+}
+
+STATIC int
+xfs_vn_fiemap(
+       struct inode            *inode,
+       struct fiemap_extent_info *fieinfo,
+       u64                     start,
+       u64                     length)
+{
+       xfs_inode_t             *ip = XFS_I(inode);
+       struct getbmapx         bm;
+       int                     error;
+
+       error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS);
+       if (error)
+               return error;
+
+       /* Set up bmap header for xfs internal routine */
+       bm.bmv_offset = BTOBB(start);
+       /* Special case for whole file */
+       if (length == FIEMAP_MAX_OFFSET)
+               bm.bmv_length = -1LL;
+       else
+               bm.bmv_length = BTOBB(length);
+
+       /* We add one because in getbmap world count includes the header */
+       bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
+                                       fieinfo->fi_extents_max + 1;
+       bm.bmv_count = min_t(__s32, bm.bmv_count,
+                            (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
+       bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
+       if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
+               bm.bmv_iflags |= BMV_IF_ATTRFORK;
+       if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
+               bm.bmv_iflags |= BMV_IF_DELALLOC;
+
+       error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
+       if (error)
+               return -error;
+
+       return 0;
+}
+
+static const struct inode_operations xfs_inode_operations = {
+       .get_acl                = xfs_get_acl,
+       .getattr                = xfs_vn_getattr,
+       .setattr                = xfs_vn_setattr,
+       .setxattr               = generic_setxattr,
+       .getxattr               = generic_getxattr,
+       .removexattr            = generic_removexattr,
+       .listxattr              = xfs_vn_listxattr,
+       .fiemap                 = xfs_vn_fiemap,
+};
+
+static const struct inode_operations xfs_dir_inode_operations = {
+       .create                 = xfs_vn_create,
+       .lookup                 = xfs_vn_lookup,
+       .link                   = xfs_vn_link,
+       .unlink                 = xfs_vn_unlink,
+       .symlink                = xfs_vn_symlink,
+       .mkdir                  = xfs_vn_mkdir,
+       /*
+        * Yes, XFS uses the same method for rmdir and unlink.
+        *
+        * There are some subtile differences deeper in the code,
+        * but we use S_ISDIR to check for those.
+        */
+       .rmdir                  = xfs_vn_unlink,
+       .mknod                  = xfs_vn_mknod,
+       .rename                 = xfs_vn_rename,
+       .get_acl                = xfs_get_acl,
+       .getattr                = xfs_vn_getattr,
+       .setattr                = xfs_vn_setattr,
+       .setxattr               = generic_setxattr,
+       .getxattr               = generic_getxattr,
+       .removexattr            = generic_removexattr,
+       .listxattr              = xfs_vn_listxattr,
+};
+
+static const struct inode_operations xfs_dir_ci_inode_operations = {
+       .create                 = xfs_vn_create,
+       .lookup                 = xfs_vn_ci_lookup,
+       .link                   = xfs_vn_link,
+       .unlink                 = xfs_vn_unlink,
+       .symlink                = xfs_vn_symlink,
+       .mkdir                  = xfs_vn_mkdir,
+       /*
+        * Yes, XFS uses the same method for rmdir and unlink.
+        *
+        * There are some subtile differences deeper in the code,
+        * but we use S_ISDIR to check for those.
+        */
+       .rmdir                  = xfs_vn_unlink,
+       .mknod                  = xfs_vn_mknod,
+       .rename                 = xfs_vn_rename,
+       .get_acl                = xfs_get_acl,
+       .getattr                = xfs_vn_getattr,
+       .setattr                = xfs_vn_setattr,
+       .setxattr               = generic_setxattr,
+       .getxattr               = generic_getxattr,
+       .removexattr            = generic_removexattr,
+       .listxattr              = xfs_vn_listxattr,
+};
+
+static const struct inode_operations xfs_symlink_inode_operations = {
+       .readlink               = generic_readlink,
+       .follow_link            = xfs_vn_follow_link,
+       .put_link               = xfs_vn_put_link,
+       .get_acl                = xfs_get_acl,
+       .getattr                = xfs_vn_getattr,
+       .setattr                = xfs_vn_setattr,
+       .setxattr               = generic_setxattr,
+       .getxattr               = generic_getxattr,
+       .removexattr            = generic_removexattr,
+       .listxattr              = xfs_vn_listxattr,
+};
+
+STATIC void
+xfs_diflags_to_iflags(
+       struct inode            *inode,
+       struct xfs_inode        *ip)
+{
+       if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
+               inode->i_flags |= S_IMMUTABLE;
+       else
+               inode->i_flags &= ~S_IMMUTABLE;
+       if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
+               inode->i_flags |= S_APPEND;
+       else
+               inode->i_flags &= ~S_APPEND;
+       if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
+               inode->i_flags |= S_SYNC;
+       else
+               inode->i_flags &= ~S_SYNC;
+       if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
+               inode->i_flags |= S_NOATIME;
+       else
+               inode->i_flags &= ~S_NOATIME;
+}
+
+/*
+ * Initialize the Linux inode, set up the operation vectors and
+ * unlock the inode.
+ *
+ * When reading existing inodes from disk this is called directly
+ * from xfs_iget, when creating a new inode it is called from
+ * xfs_ialloc after setting up the inode.
+ *
+ * We are always called with an uninitialised linux inode here.
+ * We need to initialise the necessary fields and take a reference
+ * on it.
+ */
+void
+xfs_setup_inode(
+       struct xfs_inode        *ip)
+{
+       struct inode            *inode = &ip->i_vnode;
+
+       inode->i_ino = ip->i_ino;
+       inode->i_state = I_NEW;
+
+       inode_sb_list_add(inode);
+       /* make the inode look hashed for the writeback code */
+       hlist_add_fake(&inode->i_hash);
+
+       inode->i_mode   = ip->i_d.di_mode;
+       inode->i_nlink  = ip->i_d.di_nlink;
+       inode->i_uid    = ip->i_d.di_uid;
+       inode->i_gid    = ip->i_d.di_gid;
+
+       switch (inode->i_mode & S_IFMT) {
+       case S_IFBLK:
+       case S_IFCHR:
+               inode->i_rdev =
+                       MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
+                             sysv_minor(ip->i_df.if_u2.if_rdev));
+               break;
+       default:
+               inode->i_rdev = 0;
+               break;
+       }
+
+       inode->i_generation = ip->i_d.di_gen;
+       i_size_write(inode, ip->i_d.di_size);
+       inode->i_atime.tv_sec   = ip->i_d.di_atime.t_sec;
+       inode->i_atime.tv_nsec  = ip->i_d.di_atime.t_nsec;
+       inode->i_mtime.tv_sec   = ip->i_d.di_mtime.t_sec;
+       inode->i_mtime.tv_nsec  = ip->i_d.di_mtime.t_nsec;
+       inode->i_ctime.tv_sec   = ip->i_d.di_ctime.t_sec;
+       inode->i_ctime.tv_nsec  = ip->i_d.di_ctime.t_nsec;
+       xfs_diflags_to_iflags(inode, ip);
+
+       switch (inode->i_mode & S_IFMT) {
+       case S_IFREG:
+               inode->i_op = &xfs_inode_operations;
+               inode->i_fop = &xfs_file_operations;
+               inode->i_mapping->a_ops = &xfs_address_space_operations;
+               break;
+       case S_IFDIR:
+               if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
+                       inode->i_op = &xfs_dir_ci_inode_operations;
+               else
+                       inode->i_op = &xfs_dir_inode_operations;
+               inode->i_fop = &xfs_dir_file_operations;
+               break;
+       case S_IFLNK:
+               inode->i_op = &xfs_symlink_inode_operations;
+               if (!(ip->i_df.if_flags & XFS_IFINLINE))
+                       inode->i_mapping->a_ops = &xfs_address_space_operations;
+               break;
+       default:
+               inode->i_op = &xfs_inode_operations;
+               init_special_inode(inode, inode->i_mode, inode->i_rdev);
+               break;
+       }
+
+       /*
+        * If there is no attribute fork no ACL can exist on this inode,
+        * and it can't have any file capabilities attached to it either.
+        */
+       if (!XFS_IFORK_Q(ip)) {
+               inode_has_no_xattr(inode);
+               cache_no_acl(inode);
+       }
+
+       xfs_iflags_clear(ip, XFS_INEW);
+       barrier();
+
+       unlock_new_inode(inode);
+}
diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h

new file mode 100644 (file)

index 0000000..ef41c92
--- /dev/null
+++ b/fs/xfs/xfs_iops.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_IOPS_H__
+#define __XFS_IOPS_H__
+
+struct xfs_inode;
+
+extern const struct file_operations xfs_file_operations;
+extern const struct file_operations xfs_dir_file_operations;
+
+extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
+
+extern void xfs_setup_inode(struct xfs_inode *);
+
+#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h

new file mode 100644 (file)

index 0000000..1e8a45e
--- /dev/null
+++ b/fs/xfs/xfs_linux.h
@@ -0,0 +1,309 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_LINUX__
+#define __XFS_LINUX__
+
+#include <linux/types.h>
+
+/*
+ * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
+ * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set.
+ */
+#if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64)
+# define XFS_BIG_BLKNOS        1
+# define XFS_BIG_INUMS 1
+#else
+# define XFS_BIG_BLKNOS        0
+# define XFS_BIG_INUMS 0
+#endif
+
+#include "xfs_types.h"
+
+#include "kmem.h"
+#include "mrlock.h"
+#include "time.h"
+#include "uuid.h"
+
+#include <linux/semaphore.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/file.h>
+#include <linux/swap.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/bitops.h>
+#include <linux/major.h>
+#include <linux/pagemap.h>
+#include <linux/vfs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/proc_fs.h>
+#include <linux/sort.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+#include <linux/delay.h>
+#include <linux/log2.h>
+#include <linux/spinlock.h>
+#include <linux/random.h>
+#include <linux/ctype.h>
+#include <linux/writeback.h>
+#include <linux/capability.h>
+#include <linux/list_sort.h>
+
+#include <asm/page.h>
+#include <asm/div64.h>
+#include <asm/param.h>
+#include <asm/uaccess.h>
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+
+#include "xfs_vnode.h"
+#include "xfs_stats.h"
+#include "xfs_sysctl.h"
+#include "xfs_iops.h"
+#include "xfs_aops.h"
+#include "xfs_super.h"
+#include "xfs_buf.h"
+#include "xfs_message.h"
+
+#ifdef __BIG_ENDIAN
+#define XFS_NATIVE_HOST 1
+#else
+#undef XFS_NATIVE_HOST
+#endif
+
+/*
+ * Feature macros (disable/enable)
+ */
+#ifdef CONFIG_SMP
+#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
+#else
+#undef  HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
+#endif
+
+#define irix_sgid_inherit      xfs_params.sgid_inherit.val
+#define irix_symlink_mode      xfs_params.symlink_mode.val
+#define xfs_panic_mask         xfs_params.panic_mask.val
+#define xfs_error_level                xfs_params.error_level.val
+#define xfs_syncd_centisecs    xfs_params.syncd_timer.val
+#define xfs_stats_clear                xfs_params.stats_clear.val
+#define xfs_inherit_sync       xfs_params.inherit_sync.val
+#define xfs_inherit_nodump     xfs_params.inherit_nodump.val
+#define xfs_inherit_noatime    xfs_params.inherit_noatim.val
+#define xfs_buf_timer_centisecs        xfs_params.xfs_buf_timer.val
+#define xfs_buf_age_centisecs  xfs_params.xfs_buf_age.val
+#define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val
+#define xfs_rotorstep          xfs_params.rotorstep.val
+#define xfs_inherit_nodefrag   xfs_params.inherit_nodfrg.val
+#define xfs_fstrm_centisecs    xfs_params.fstrm_timer.val
+
+#define current_cpu()          (raw_smp_processor_id())
+#define current_pid()          (current->pid)
+#define current_test_flags(f)  (current->flags & (f))
+#define current_set_flags_nested(sp, f)                \
+               (*(sp) = current->flags, current->flags |= (f))
+#define current_clear_flags_nested(sp, f)      \
+               (*(sp) = current->flags, current->flags &= ~(f))
+#define current_restore_flags_nested(sp, f)    \
+               (current->flags = ((current->flags & ~(f)) | (*(sp) & (f))))
+
+#define spinlock_destroy(lock)
+
+#define NBBY           8               /* number of bits per byte */
+
+/*
+ * Size of block device i/o is parameterized here.
+ * Currently the system supports page-sized i/o.
+ */
+#define        BLKDEV_IOSHIFT          PAGE_CACHE_SHIFT
+#define        BLKDEV_IOSIZE           (1<<BLKDEV_IOSHIFT)
+/* number of BB's per block device block */
+#define        BLKDEV_BB               BTOBB(BLKDEV_IOSIZE)
+
+#define ENOATTR                ENODATA         /* Attribute not found */
+#define EWRONGFS       EINVAL          /* Mount with wrong filesystem type */
+#define EFSCORRUPTED   EUCLEAN         /* Filesystem is corrupted */
+
+#define SYNCHRONIZE()  barrier()
+#define __return_address __builtin_return_address(0)
+
+#define XFS_PROJID_DEFAULT     0
+#define MAXPATHLEN     1024
+
+#define MIN(a,b)       (min(a,b))
+#define MAX(a,b)       (max(a,b))
+#define howmany(x, y)  (((x)+((y)-1))/(y))
+
+/*
+ * Various platform dependent calls that don't fit anywhere else
+ */
+#define xfs_sort(a,n,s,fn)     sort(a,n,s,fn,NULL)
+#define xfs_stack_trace()      dump_stack()
+
+
+/* Move the kernel do_div definition off to one side */
+
+#if defined __i386__
+/* For ia32 we need to pull some tricks to get past various versions
+ * of the compiler which do not like us using do_div in the middle
+ * of large functions.
+ */
+static inline __u32 xfs_do_div(void *a, __u32 b, int n)
+{
+       __u32   mod;
+
+       switch (n) {
+               case 4:
+                       mod = *(__u32 *)a % b;
+                       *(__u32 *)a = *(__u32 *)a / b;
+                       return mod;
+               case 8:
+                       {
+                       unsigned long __upper, __low, __high, __mod;
+                       __u64   c = *(__u64 *)a;
+                       __upper = __high = c >> 32;
+                       __low = c;
+                       if (__high) {
+                               __upper = __high % (b);
+                               __high = __high / (b);
+                       }
+                       asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
+                       asm("":"=A" (c):"a" (__low),"d" (__high));
+                       *(__u64 *)a = c;
+                       return __mod;
+                       }
+       }
+
+       /* NOTREACHED */
+       return 0;
+}
+
+/* Side effect free 64 bit mod operation */
+static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
+{
+       switch (n) {
+               case 4:
+                       return *(__u32 *)a % b;
+               case 8:
+                       {
+                       unsigned long __upper, __low, __high, __mod;
+                       __u64   c = *(__u64 *)a;
+                       __upper = __high = c >> 32;
+                       __low = c;
+                       if (__high) {
+                               __upper = __high % (b);
+                               __high = __high / (b);
+                       }
+                       asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
+                       asm("":"=A" (c):"a" (__low),"d" (__high));
+                       return __mod;
+                       }
+       }
+
+       /* NOTREACHED */
+       return 0;
+}
+#else
+static inline __u32 xfs_do_div(void *a, __u32 b, int n)
+{
+       __u32   mod;
+
+       switch (n) {
+               case 4:
+                       mod = *(__u32 *)a % b;
+                       *(__u32 *)a = *(__u32 *)a / b;
+                       return mod;
+               case 8:
+                       mod = do_div(*(__u64 *)a, b);
+                       return mod;
+       }
+
+       /* NOTREACHED */
+       return 0;
+}
+
+/* Side effect free 64 bit mod operation */
+static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
+{
+       switch (n) {
+               case 4:
+                       return *(__u32 *)a % b;
+               case 8:
+                       {
+                       __u64   c = *(__u64 *)a;
+                       return do_div(c, b);
+                       }
+       }
+
+       /* NOTREACHED */
+       return 0;
+}
+#endif
+
+#undef do_div
+#define do_div(a, b)   xfs_do_div(&(a), (b), sizeof(a))
+#define do_mod(a, b)   xfs_do_mod(&(a), (b), sizeof(a))
+
+static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
+{
+       x += y - 1;
+       do_div(x, y);
+       return(x * y);
+}
+
+static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
+{
+       x += y - 1;
+       do_div(x, y);
+       return x;
+}
+
+/* ARM old ABI has some weird alignment/padding */
+#if defined(__arm__) && !defined(__ARM_EABI__)
+#define __arch_pack __attribute__((packed))
+#else
+#define __arch_pack
+#endif
+
+#define ASSERT_ALWAYS(expr)    \
+       (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
+
+#ifndef DEBUG
+#define ASSERT(expr)   ((void)0)
+
+#ifndef STATIC
+# define STATIC static noinline
+#endif
+
+#else /* DEBUG */
+
+#define ASSERT(expr)   \
+       (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
+
+#ifndef STATIC
+# define STATIC noinline
+#endif
+
+#endif /* DEBUG */
+
+#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c

index 06ff8437ed8e4cab29048de38dfa15f165739b83..3a8d4f66d70253089410b3a8cdb1f52868e5a3bd 100644 (file)
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -878,7 +878,7 @@ xlog_iodone(xfs_buf_t *bp)
         /*
          * Race to shutdown the filesystem if we see an error.
          */
-       if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp,
+       if (XFS_TEST_ERROR((xfs_buf_geterror(bp)), l->l_mp,
                         XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) {
                 xfs_ioerror_alert("xlog_iodone", l->l_mp, bp, XFS_BUF_ADDR(bp));
                 XFS_BUF_STALE(bp);
@@ -1051,7 +1051,6 @@ xlog_alloc_log(xfs_mount_t        *mp,
         if (!bp)
                 goto out_free_log;
         bp->b_iodone = xlog_iodone;
-       ASSERT(XFS_BUF_ISBUSY(bp));
         ASSERT(xfs_buf_islocked(bp));
         log->l_xbuf = bp;
  
@@ -1108,7 +1107,6 @@ xlog_alloc_log(xfs_mount_t        *mp,
                 iclog->ic_callback_tail = &(iclog->ic_callback);
                 iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
  
-               ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
                 ASSERT(xfs_buf_islocked(iclog->ic_bp));
                 init_waitqueue_head(&iclog->ic_force_wait);
                 init_waitqueue_head(&iclog->ic_write_wait);
@@ -1248,7 +1246,7 @@ xlog_bdstrat(
         struct xlog_in_core     *iclog = bp->b_fspriv;
  
         if (iclog->ic_state & XLOG_STATE_IOERROR) {
-               XFS_BUF_ERROR(bp, EIO);
+               xfs_buf_ioerror(bp, EIO);
                 XFS_BUF_STALE(bp);
                 xfs_buf_ioend(bp, 0);
                 /*
@@ -1355,7 +1353,6 @@ xlog_sync(xlog_t          *log,
         XFS_BUF_SET_COUNT(bp, count);
         bp->b_fspriv = iclog;
         XFS_BUF_ZEROFLAGS(bp);
-       XFS_BUF_BUSY(bp);
         XFS_BUF_ASYNC(bp);
         bp->b_flags |= XBF_SYNCIO;
  
@@ -1398,16 +1395,15 @@ xlog_sync(xlog_t                *log,
         if (split) {
                 bp = iclog->ic_log->l_xbuf;
                 XFS_BUF_SET_ADDR(bp, 0);             /* logical 0 */
-               XFS_BUF_SET_PTR(bp, (xfs_caddr_t)((__psint_t)&(iclog->ic_header)+
-                                           (__psint_t)count), split);
+               xfs_buf_associate_memory(bp,
+                               (char *)&iclog->ic_header + count, split);
                 bp->b_fspriv = iclog;
                 XFS_BUF_ZEROFLAGS(bp);
-               XFS_BUF_BUSY(bp);
                 XFS_BUF_ASYNC(bp);
                 bp->b_flags |= XBF_SYNCIO;
                 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
                         bp->b_flags |= XBF_FUA;
-               dptr = XFS_BUF_PTR(bp);
+               dptr = bp->b_addr;
                 /*
                  * Bump the cycle numbers at the start of each block
                  * since this part of the buffer is at the start of
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c

index 052a2c0ec5fbecaa2e6d0aebffc8289bdc43f465..a199dbcee7d8c274b14a648c1252986234b04f9e 100644 (file)
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -147,7 +147,7 @@ xlog_align(
         xfs_daddr_t     offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1);
  
         ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp));
-       return XFS_BUF_PTR(bp) + BBTOB(offset);
+       return bp->b_addr + BBTOB(offset);
  }
  
  
@@ -178,9 +178,7 @@ xlog_bread_noalign(
  
         XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
         XFS_BUF_READ(bp);
-       XFS_BUF_BUSY(bp);
         XFS_BUF_SET_COUNT(bp, BBTOB(nbblks));
-       XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp);
  
         xfsbdstrat(log->l_mp, bp);
         error = xfs_buf_iowait(bp);
@@ -220,18 +218,18 @@ xlog_bread_offset(
         xfs_buf_t       *bp,
         xfs_caddr_t     offset)
  {
-       xfs_caddr_t     orig_offset = XFS_BUF_PTR(bp);
+       xfs_caddr_t     orig_offset = bp->b_addr;
         int             orig_len = bp->b_buffer_length;
         int             error, error2;
  
-       error = XFS_BUF_SET_PTR(bp, offset, BBTOB(nbblks));
+       error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks));
         if (error)
                 return error;
  
         error = xlog_bread_noalign(log, blk_no, nbblks, bp);
  
         /* must reset buffer pointer even on error */
-       error2 = XFS_BUF_SET_PTR(bp, orig_offset, orig_len);
+       error2 = xfs_buf_associate_memory(bp, orig_offset, orig_len);
         if (error)
                 return error;
         return error2;
@@ -266,11 +264,9 @@ xlog_bwrite(
  
         XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
         XFS_BUF_ZEROFLAGS(bp);
-       XFS_BUF_BUSY(bp);
-       XFS_BUF_HOLD(bp);
+       xfs_buf_hold(bp);
         xfs_buf_lock(bp);
         XFS_BUF_SET_COUNT(bp, BBTOB(nbblks));
-       XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp);
  
         if ((error = xfs_bwrite(log->l_mp, bp)))
                 xfs_ioerror_alert("xlog_bwrite", log->l_mp,
@@ -360,7 +356,7 @@ STATIC void
  xlog_recover_iodone(
         struct xfs_buf  *bp)
  {
-       if (XFS_BUF_GETERROR(bp)) {
+       if (bp->b_error) {
                 /*
                  * We're not going to bother about retrying
                  * this during recovery. One strike!
@@ -1262,7 +1258,7 @@ xlog_write_log_records(
                  */
                 ealign = round_down(end_block, sectbb);
                 if (j == 0 && (start_block + endcount > ealign)) {
-                       offset = XFS_BUF_PTR(bp) + BBTOB(ealign - start_block);
+                       offset = bp->b_addr + BBTOB(ealign - start_block);
                         error = xlog_bread_offset(log, ealign, sectbb,
                                                         bp, offset);
                         if (error)
@@ -2135,15 +2131,16 @@ xlog_recover_buffer_pass2(
  
         bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
                           buf_flags);
-       if (XFS_BUF_ISERROR(bp)) {
+       if (!bp)
+               return XFS_ERROR(ENOMEM);
+       error = bp->b_error;
+       if (error) {
                 xfs_ioerror_alert("xlog_recover_do..(read#1)", mp,
                                   bp, buf_f->blf_blkno);
-               error = XFS_BUF_GETERROR(bp);
                 xfs_buf_relse(bp);
                 return error;
         }
  
-       error = 0;
         if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
                 error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
         } else if (buf_f->blf_flags &
@@ -2227,14 +2224,17 @@ xlog_recover_inode_pass2(
  
         bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
                           XBF_LOCK);
-       if (XFS_BUF_ISERROR(bp)) {
+       if (!bp) {
+               error = ENOMEM;
+               goto error;
+       }
+       error = bp->b_error;
+       if (error) {
                 xfs_ioerror_alert("xlog_recover_do..(read#2)", mp,
                                   bp, in_f->ilf_blkno);
-               error = XFS_BUF_GETERROR(bp);
                 xfs_buf_relse(bp);
                 goto error;
         }
-       error = 0;
         ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
         dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset);
  
@@ -3437,7 +3437,7 @@ xlog_do_recovery_pass(
                         /*
                          * Check for header wrapping around physical end-of-log
                          */
-                       offset = XFS_BUF_PTR(hbp);
+                       offset = hbp->b_addr;
                         split_hblks = 0;
                         wrapped_hblks = 0;
                         if (blk_no + hblks <= log->l_logBBsize) {
@@ -3497,7 +3497,7 @@ xlog_do_recovery_pass(
                         } else {
                                 /* This log record is split across the
                                  * physical end of log */
-                               offset = XFS_BUF_PTR(dbp);
+                               offset = dbp->b_addr;
                                 split_bblks = 0;
                                 if (blk_no != log->l_logBBsize) {
                                         /* some data is before the physical
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c

new file mode 100644 (file)

index 0000000..bd672de
--- /dev/null
+++ b/fs/xfs/xfs_message.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2011 Red Hat, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_types.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+
+/*
+ * XFS logging functions
+ */
+static void
+__xfs_printk(
+       const char              *level,
+       const struct xfs_mount  *mp,
+       struct va_format        *vaf)
+{
+       if (mp && mp->m_fsname) {
+               printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf);
+               return;
+       }
+       printk("%sXFS: %pV\n", level, vaf);
+}
+
+#define define_xfs_printk_level(func, kern_level)              \
+void func(const struct xfs_mount *mp, const char *fmt, ...)    \
+{                                                              \
+       struct va_format        vaf;                            \
+       va_list                 args;                           \
+                                                               \
+       va_start(args, fmt);                                    \
+                                                               \
+       vaf.fmt = fmt;                                          \
+       vaf.va = &args;                                         \
+                                                               \
+       __xfs_printk(kern_level, mp, &vaf);                     \
+       va_end(args);                                           \
+}                                                              \
+
+define_xfs_printk_level(xfs_emerg, KERN_EMERG);
+define_xfs_printk_level(xfs_alert, KERN_ALERT);
+define_xfs_printk_level(xfs_crit, KERN_CRIT);
+define_xfs_printk_level(xfs_err, KERN_ERR);
+define_xfs_printk_level(xfs_warn, KERN_WARNING);
+define_xfs_printk_level(xfs_notice, KERN_NOTICE);
+define_xfs_printk_level(xfs_info, KERN_INFO);
+#ifdef DEBUG
+define_xfs_printk_level(xfs_debug, KERN_DEBUG);
+#endif
+
+void
+xfs_alert_tag(
+       const struct xfs_mount  *mp,
+       int                     panic_tag,
+       const char              *fmt, ...)
+{
+       struct va_format        vaf;
+       va_list                 args;
+       int                     do_panic = 0;
+
+       if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
+               xfs_alert(mp, "Transforming an alert into a BUG.");
+               do_panic = 1;
+       }
+
+       va_start(args, fmt);
+
+       vaf.fmt = fmt;
+       vaf.va = &args;
+
+       __xfs_printk(KERN_ALERT, mp, &vaf);
+       va_end(args);
+
+       BUG_ON(do_panic);
+}
+
+void
+assfail(char *expr, char *file, int line)
+{
+       xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d",
+               expr, file, line);
+       BUG();
+}
+
+void
+xfs_hex_dump(void *p, int length)
+{
+       print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
+}
diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h

new file mode 100644 (file)

index 0000000..7fb7ea0
--- /dev/null
+++ b/fs/xfs/xfs_message.h
@@ -0,0 +1,39 @@
+#ifndef __XFS_MESSAGE_H
+#define __XFS_MESSAGE_H 1
+
+struct xfs_mount;
+
+extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_alert_tag(const struct xfs_mount *mp, int tag,
+                        const char *fmt, ...)
+        __attribute__ ((format (printf, 3, 4)));
+extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+
+#ifdef DEBUG
+extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
+        __attribute__ ((format (printf, 2, 3)));
+#else
+static inline void
+__attribute__ ((format (printf, 2, 3)))
+xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
+{
+}
+#endif
+
+extern void assfail(char *expr, char *f, int l);
+
+extern void xfs_hex_dump(void *p, int length);
+
+#endif /* __XFS_MESSAGE_H */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c

index 092e16ae4d9d69615bb2c1ca48b74e8d983d5979..0081657ad985ee27417ba4b73c142cb3d43e78b7 100644 (file)
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1615,7 +1615,7 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
                 XFS_BUF_UNDELAYWRITE(sbp);
                 XFS_BUF_WRITE(sbp);
                 XFS_BUF_UNASYNC(sbp);
-               ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp);
+               ASSERT(sbp->b_target == mp->m_ddev_targp);
                 xfsbdstrat(mp, sbp);
                 error = xfs_buf_iowait(sbp);
                 if (error)
@@ -1938,7 +1938,7 @@ xfs_getsb(
                 xfs_buf_lock(bp);
         }
  
-       XFS_BUF_HOLD(bp);
+       xfs_buf_hold(bp);
         ASSERT(XFS_BUF_ISDONE(bp));
         return bp;
  }
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c

new file mode 100644 (file)

index 0000000..9a0aa76
--- /dev/null
+++ b/fs/xfs/xfs_qm.c
@@ -0,0 +1,2416 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_ialloc.h"
+#include "xfs_itable.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_bmap.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_space.h"
+#include "xfs_utils.h"
+#include "xfs_qm.h"
+#include "xfs_trace.h"
+
+/*
+ * The global quota manager. There is only one of these for the entire
+ * system, _not_ one per file system. XQM keeps track of the overall
+ * quota functionality, including maintaining the freelist and hash
+ * tables of dquots.
+ */
+struct mutex   xfs_Gqm_lock;
+struct xfs_qm  *xfs_Gqm;
+uint           ndquot;
+
+kmem_zone_t    *qm_dqzone;
+kmem_zone_t    *qm_dqtrxzone;
+
+STATIC void    xfs_qm_list_init(xfs_dqlist_t *, char *, int);
+STATIC void    xfs_qm_list_destroy(xfs_dqlist_t *);
+
+STATIC int     xfs_qm_init_quotainos(xfs_mount_t *);
+STATIC int     xfs_qm_init_quotainfo(xfs_mount_t *);
+STATIC int     xfs_qm_shake(struct shrinker *, struct shrink_control *);
+
+static struct shrinker xfs_qm_shaker = {
+       .shrink = xfs_qm_shake,
+       .seeks = DEFAULT_SEEKS,
+};
+
+/*
+ * Initialize the XQM structure.
+ * Note that there is not one quota manager per file system.
+ */
+STATIC struct xfs_qm *
+xfs_Gqm_init(void)
+{
+       xfs_dqhash_t    *udqhash, *gdqhash;
+       xfs_qm_t        *xqm;
+       size_t          hsize;
+       uint            i;
+
+       /*
+        * Initialize the dquot hash tables.
+        */
+       udqhash = kmem_zalloc_greedy(&hsize,
+                                    XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
+                                    XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t));
+       if (!udqhash)
+               goto out;
+
+       gdqhash = kmem_zalloc_large(hsize);
+       if (!gdqhash)
+               goto out_free_udqhash;
+
+       hsize /= sizeof(xfs_dqhash_t);
+       ndquot = hsize << 8;
+
+       xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
+       xqm->qm_dqhashmask = hsize - 1;
+       xqm->qm_usr_dqhtable = udqhash;
+       xqm->qm_grp_dqhtable = gdqhash;
+       ASSERT(xqm->qm_usr_dqhtable != NULL);
+       ASSERT(xqm->qm_grp_dqhtable != NULL);
+
+       for (i = 0; i < hsize; i++) {
+               xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
+               xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
+       }
+
+       /*
+        * Freelist of all dquots of all file systems
+        */
+       INIT_LIST_HEAD(&xqm->qm_dqfrlist);
+       xqm->qm_dqfrlist_cnt = 0;
+       mutex_init(&xqm->qm_dqfrlist_lock);
+
+       /*
+        * dquot zone. we register our own low-memory callback.
+        */
+       if (!qm_dqzone) {
+               xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
+                                               "xfs_dquots");
+               qm_dqzone = xqm->qm_dqzone;
+       } else
+               xqm->qm_dqzone = qm_dqzone;
+
+       register_shrinker(&xfs_qm_shaker);
+
+       /*
+        * The t_dqinfo portion of transactions.
+        */
+       if (!qm_dqtrxzone) {
+               xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
+                                                  "xfs_dqtrx");
+               qm_dqtrxzone = xqm->qm_dqtrxzone;
+       } else
+               xqm->qm_dqtrxzone = qm_dqtrxzone;
+
+       atomic_set(&xqm->qm_totaldquots, 0);
+       xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
+       xqm->qm_nrefs = 0;
+       return xqm;
+
+ out_free_udqhash:
+       kmem_free_large(udqhash);
+ out:
+       return NULL;
+}
+
+/*
+ * Destroy the global quota manager when its reference count goes to zero.
+ */
+STATIC void
+xfs_qm_destroy(
+       struct xfs_qm   *xqm)
+{
+       struct xfs_dquot *dqp, *n;
+       int             hsize, i;
+
+       ASSERT(xqm != NULL);
+       ASSERT(xqm->qm_nrefs == 0);
+       unregister_shrinker(&xfs_qm_shaker);
+       hsize = xqm->qm_dqhashmask + 1;
+       for (i = 0; i < hsize; i++) {
+               xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
+               xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
+       }
+       kmem_free_large(xqm->qm_usr_dqhtable);
+       kmem_free_large(xqm->qm_grp_dqhtable);
+       xqm->qm_usr_dqhtable = NULL;
+       xqm->qm_grp_dqhtable = NULL;
+       xqm->qm_dqhashmask = 0;
+
+       /* frlist cleanup */
+       mutex_lock(&xqm->qm_dqfrlist_lock);
+       list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
+               xfs_dqlock(dqp);
+               list_del_init(&dqp->q_freelist);
+               xfs_Gqm->qm_dqfrlist_cnt--;
+               xfs_dqunlock(dqp);
+               xfs_qm_dqdestroy(dqp);
+       }
+       mutex_unlock(&xqm->qm_dqfrlist_lock);
+       mutex_destroy(&xqm->qm_dqfrlist_lock);
+       kmem_free(xqm);
+}
+
+/*
+ * Called at mount time to let XQM know that another file system is
+ * starting quotas. This isn't crucial information as the individual mount
+ * structures are pretty independent, but it helps the XQM keep a
+ * global view of what's going on.
+ */
+/* ARGSUSED */
+STATIC int
+xfs_qm_hold_quotafs_ref(
+       struct xfs_mount *mp)
+{
+       /*
+        * Need to lock the xfs_Gqm structure for things like this. For example,
+        * the structure could disappear between the entry to this routine and
+        * a HOLD operation if not locked.
+        */
+       mutex_lock(&xfs_Gqm_lock);
+
+       if (!xfs_Gqm) {
+               xfs_Gqm = xfs_Gqm_init();
+               if (!xfs_Gqm) {
+                       mutex_unlock(&xfs_Gqm_lock);
+                       return ENOMEM;
+               }
+       }
+
+       /*
+        * We can keep a list of all filesystems with quotas mounted for
+        * debugging and statistical purposes, but ...
+        * Just take a reference and get out.
+        */
+       xfs_Gqm->qm_nrefs++;
+       mutex_unlock(&xfs_Gqm_lock);
+
+       return 0;
+}
+
+
+/*
+ * Release the reference that a filesystem took at mount time,
+ * so that we know when we need to destroy the entire quota manager.
+ */
+/* ARGSUSED */
+STATIC void
+xfs_qm_rele_quotafs_ref(
+       struct xfs_mount *mp)
+{
+       xfs_dquot_t     *dqp, *n;
+
+       ASSERT(xfs_Gqm);
+       ASSERT(xfs_Gqm->qm_nrefs > 0);
+
+       /*
+        * Go thru the freelist and destroy all inactive dquots.
+        */
+       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+
+       list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) {
+               xfs_dqlock(dqp);
+               if (dqp->dq_flags & XFS_DQ_INACTIVE) {
+                       ASSERT(dqp->q_mount == NULL);
+                       ASSERT(! XFS_DQ_IS_DIRTY(dqp));
+                       ASSERT(list_empty(&dqp->q_hashlist));
+                       ASSERT(list_empty(&dqp->q_mplist));
+                       list_del_init(&dqp->q_freelist);
+                       xfs_Gqm->qm_dqfrlist_cnt--;
+                       xfs_dqunlock(dqp);
+                       xfs_qm_dqdestroy(dqp);
+               } else {
+                       xfs_dqunlock(dqp);
+               }
+       }
+       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+
+       /*
+        * Destroy the entire XQM. If somebody mounts with quotaon, this'll
+        * be restarted.
+        */
+       mutex_lock(&xfs_Gqm_lock);
+       if (--xfs_Gqm->qm_nrefs == 0) {
+               xfs_qm_destroy(xfs_Gqm);
+               xfs_Gqm = NULL;
+       }
+       mutex_unlock(&xfs_Gqm_lock);
+}
+
+/*
+ * Just destroy the quotainfo structure.
+ */
+void
+xfs_qm_unmount(
+       struct xfs_mount        *mp)
+{
+       if (mp->m_quotainfo) {
+               xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
+               xfs_qm_destroy_quotainfo(mp);
+       }
+}
+
+
+/*
+ * This is called from xfs_mountfs to start quotas and initialize all
+ * necessary data structures like quotainfo.  This is also responsible for
+ * running a quotacheck as necessary.  We are guaranteed that the superblock
+ * is consistently read in at this point.
+ *
+ * If we fail here, the mount will continue with quota turned off. We don't
+ * need to inidicate success or failure at all.
+ */
+void
+xfs_qm_mount_quotas(
+       xfs_mount_t     *mp)
+{
+       int             error = 0;
+       uint            sbf;
+
+       /*
+        * If quotas on realtime volumes is not supported, we disable
+        * quotas immediately.
+        */
+       if (mp->m_sb.sb_rextents) {
+               xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
+               mp->m_qflags = 0;
+               goto write_changes;
+       }
+
+       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+       /*
+        * Allocate the quotainfo structure inside the mount struct, and
+        * create quotainode(s), and change/rev superblock if necessary.
+        */
+       error = xfs_qm_init_quotainfo(mp);
+       if (error) {
+               /*
+                * We must turn off quotas.
+                */
+               ASSERT(mp->m_quotainfo == NULL);
+               mp->m_qflags = 0;
+               goto write_changes;
+       }
+       /*
+        * If any of the quotas are not consistent, do a quotacheck.
+        */
+       if (XFS_QM_NEED_QUOTACHECK(mp)) {
+               error = xfs_qm_quotacheck(mp);
+               if (error) {
+                       /* Quotacheck failed and disabled quotas. */
+                       return;
+               }
+       }
+       /* 
+        * If one type of quotas is off, then it will lose its
+        * quotachecked status, since we won't be doing accounting for
+        * that type anymore.
+        */
+       if (!XFS_IS_UQUOTA_ON(mp))
+               mp->m_qflags &= ~XFS_UQUOTA_CHKD;
+       if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp)))
+               mp->m_qflags &= ~XFS_OQUOTA_CHKD;
+
+ write_changes:
+       /*
+        * We actually don't have to acquire the m_sb_lock at all.
+        * This can only be called from mount, and that's single threaded. XXX
+        */
+       spin_lock(&mp->m_sb_lock);
+       sbf = mp->m_sb.sb_qflags;
+       mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
+       spin_unlock(&mp->m_sb_lock);
+
+       if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
+               if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
+                       /*
+                        * We could only have been turning quotas off.
+                        * We aren't in very good shape actually because
+                        * the incore structures are convinced that quotas are
+                        * off, but the on disk superblock doesn't know that !
+                        */
+                       ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
+                       xfs_alert(mp, "%s: Superblock update failed!",
+                               __func__);
+               }
+       }
+
+       if (error) {
+               xfs_warn(mp, "Failed to initialize disk quotas.");
+               return;
+       }
+}
+
+/*
+ * Called from the vfsops layer.
+ */
+void
+xfs_qm_unmount_quotas(
+       xfs_mount_t     *mp)
+{
+       /*
+        * Release the dquots that root inode, et al might be holding,
+        * before we flush quotas and blow away the quotainfo structure.
+        */
+       ASSERT(mp->m_rootip);
+       xfs_qm_dqdetach(mp->m_rootip);
+       if (mp->m_rbmip)
+               xfs_qm_dqdetach(mp->m_rbmip);
+       if (mp->m_rsumip)
+               xfs_qm_dqdetach(mp->m_rsumip);
+
+       /*
+        * Release the quota inodes.
+        */
+       if (mp->m_quotainfo) {
+               if (mp->m_quotainfo->qi_uquotaip) {
+                       IRELE(mp->m_quotainfo->qi_uquotaip);
+                       mp->m_quotainfo->qi_uquotaip = NULL;
+               }
+               if (mp->m_quotainfo->qi_gquotaip) {
+                       IRELE(mp->m_quotainfo->qi_gquotaip);
+                       mp->m_quotainfo->qi_gquotaip = NULL;
+               }
+       }
+}
+
+/*
+ * Flush all dquots of the given file system to disk. The dquots are
+ * _not_ purged from memory here, just their data written to disk.
+ */
+STATIC int
+xfs_qm_dqflush_all(
+       struct xfs_mount        *mp,
+       int                     sync_mode)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       int                     recl;
+       struct xfs_dquot        *dqp;
+       int                     error;
+
+       if (!q)
+               return 0;
+again:
+       mutex_lock(&q->qi_dqlist_lock);
+       list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
+               xfs_dqlock(dqp);
+               if (! XFS_DQ_IS_DIRTY(dqp)) {
+                       xfs_dqunlock(dqp);
+                       continue;
+               }
+
+               /* XXX a sentinel would be better */
+               recl = q->qi_dqreclaims;
+               if (!xfs_dqflock_nowait(dqp)) {
+                       /*
+                        * If we can't grab the flush lock then check
+                        * to see if the dquot has been flushed delayed
+                        * write.  If so, grab its buffer and send it
+                        * out immediately.  We'll be able to acquire
+                        * the flush lock when the I/O completes.
+                        */
+                       xfs_qm_dqflock_pushbuf_wait(dqp);
+               }
+               /*
+                * Let go of the mplist lock. We don't want to hold it
+                * across a disk write.
+                */
+               mutex_unlock(&q->qi_dqlist_lock);
+               error = xfs_qm_dqflush(dqp, sync_mode);
+               xfs_dqunlock(dqp);
+               if (error)
+                       return error;
+
+               mutex_lock(&q->qi_dqlist_lock);
+               if (recl != q->qi_dqreclaims) {
+                       mutex_unlock(&q->qi_dqlist_lock);
+                       /* XXX restart limit */
+                       goto again;
+               }
+       }
+
+       mutex_unlock(&q->qi_dqlist_lock);
+       /* return ! busy */
+       return 0;
+}
+/*
+ * Release the group dquot pointers the user dquots may be
+ * carrying around as a hint. mplist is locked on entry and exit.
+ */
+STATIC void
+xfs_qm_detach_gdquots(
+       struct xfs_mount        *mp)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       struct xfs_dquot        *dqp, *gdqp;
+       int                     nrecl;
+
+ again:
+       ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
+       list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
+               xfs_dqlock(dqp);
+               if ((gdqp = dqp->q_gdquot)) {
+                       xfs_dqlock(gdqp);
+                       dqp->q_gdquot = NULL;
+               }
+               xfs_dqunlock(dqp);
+
+               if (gdqp) {
+                       /*
+                        * Can't hold the mplist lock across a dqput.
+                        * XXXmust convert to marker based iterations here.
+                        */
+                       nrecl = q->qi_dqreclaims;
+                       mutex_unlock(&q->qi_dqlist_lock);
+                       xfs_qm_dqput(gdqp);
+
+                       mutex_lock(&q->qi_dqlist_lock);
+                       if (nrecl != q->qi_dqreclaims)
+                               goto again;
+               }
+       }
+}
+
+/*
+ * Go through all the incore dquots of this file system and take them
+ * off the mplist and hashlist, if the dquot type matches the dqtype
+ * parameter. This is used when turning off quota accounting for
+ * users and/or groups, as well as when the filesystem is unmounting.
+ */
+STATIC int
+xfs_qm_dqpurge_int(
+       struct xfs_mount        *mp,
+       uint                    flags)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       struct xfs_dquot        *dqp, *n;
+       uint                    dqtype;
+       int                     nrecl;
+       int                     nmisses;
+
+       if (!q)
+               return 0;
+
+       dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
+       dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
+       dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
+
+       mutex_lock(&q->qi_dqlist_lock);
+
+       /*
+        * In the first pass through all incore dquots of this filesystem,
+        * we release the group dquot pointers the user dquots may be
+        * carrying around as a hint. We need to do this irrespective of
+        * what's being turned off.
+        */
+       xfs_qm_detach_gdquots(mp);
+
+      again:
+       nmisses = 0;
+       ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
+       /*
+        * Try to get rid of all of the unwanted dquots. The idea is to
+        * get them off mplist and hashlist, but leave them on freelist.
+        */
+       list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
+               /*
+                * It's OK to look at the type without taking dqlock here.
+                * We're holding the mplist lock here, and that's needed for
+                * a dqreclaim.
+                */
+               if ((dqp->dq_flags & dqtype) == 0)
+                       continue;
+
+               if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
+                       nrecl = q->qi_dqreclaims;
+                       mutex_unlock(&q->qi_dqlist_lock);
+                       mutex_lock(&dqp->q_hash->qh_lock);
+                       mutex_lock(&q->qi_dqlist_lock);
+
+                       /*
+                        * XXXTheoretically, we can get into a very long
+                        * ping pong game here.
+                        * No one can be adding dquots to the mplist at
+                        * this point, but somebody might be taking things off.
+                        */
+                       if (nrecl != q->qi_dqreclaims) {
+                               mutex_unlock(&dqp->q_hash->qh_lock);
+                               goto again;
+                       }
+               }
+
+               /*
+                * Take the dquot off the mplist and hashlist. It may remain on
+                * freelist in INACTIVE state.
+                */
+               nmisses += xfs_qm_dqpurge(dqp);
+       }
+       mutex_unlock(&q->qi_dqlist_lock);
+       return nmisses;
+}
+
+int
+xfs_qm_dqpurge_all(
+       xfs_mount_t     *mp,
+       uint            flags)
+{
+       int             ndquots;
+
+       /*
+        * Purge the dquot cache.
+        * None of the dquots should really be busy at this point.
+        */
+       if (mp->m_quotainfo) {
+               while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
+                       delay(ndquots * 10);
+               }
+       }
+       return 0;
+}
+
+STATIC int
+xfs_qm_dqattach_one(
+       xfs_inode_t     *ip,
+       xfs_dqid_t      id,
+       uint            type,
+       uint            doalloc,
+       xfs_dquot_t     *udqhint, /* hint */
+       xfs_dquot_t     **IO_idqpp)
+{
+       xfs_dquot_t     *dqp;
+       int             error;
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       error = 0;
+
+       /*
+        * See if we already have it in the inode itself. IO_idqpp is
+        * &i_udquot or &i_gdquot. This made the code look weird, but
+        * made the logic a lot simpler.
+        */
+       dqp = *IO_idqpp;
+       if (dqp) {
+               trace_xfs_dqattach_found(dqp);
+               return 0;
+       }
+
+       /*
+        * udqhint is the i_udquot field in inode, and is non-NULL only
+        * when the type arg is group/project. Its purpose is to save a
+        * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
+        * the user dquot.
+        */
+       if (udqhint) {
+               ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
+               xfs_dqlock(udqhint);
+
+               /*
+                * No need to take dqlock to look at the id.
+                *
+                * The ID can't change until it gets reclaimed, and it won't
+                * be reclaimed as long as we have a ref from inode and we
+                * hold the ilock.
+                */
+               dqp = udqhint->q_gdquot;
+               if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
+                       xfs_dqlock(dqp);
+                       XFS_DQHOLD(dqp);
+                       ASSERT(*IO_idqpp == NULL);
+                       *IO_idqpp = dqp;
+
+                       xfs_dqunlock(dqp);
+                       xfs_dqunlock(udqhint);
+                       return 0;
+               }
+
+               /*
+                * We can't hold a dquot lock when we call the dqget code.
+                * We'll deadlock in no time, because of (not conforming to)
+                * lock ordering - the inodelock comes before any dquot lock,
+                * and we may drop and reacquire the ilock in xfs_qm_dqget().
+                */
+               xfs_dqunlock(udqhint);
+       }
+
+       /*
+        * Find the dquot from somewhere. This bumps the
+        * reference count of dquot and returns it locked.
+        * This can return ENOENT if dquot didn't exist on
+        * disk and we didn't ask it to allocate;
+        * ESRCH if quotas got turned off suddenly.
+        */
+       error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp);
+       if (error)
+               return error;
+
+       trace_xfs_dqattach_get(dqp);
+
+       /*
+        * dqget may have dropped and re-acquired the ilock, but it guarantees
+        * that the dquot returned is the one that should go in the inode.
+        */
+       *IO_idqpp = dqp;
+       xfs_dqunlock(dqp);
+       return 0;
+}
+
+
+/*
+ * Given a udquot and gdquot, attach a ptr to the group dquot in the
+ * udquot as a hint for future lookups. The idea sounds simple, but the
+ * execution isn't, because the udquot might have a group dquot attached
+ * already and getting rid of that gets us into lock ordering constraints.
+ * The process is complicated more by the fact that the dquots may or may not
+ * be locked on entry.
+ */
+STATIC void
+xfs_qm_dqattach_grouphint(
+       xfs_dquot_t     *udq,
+       xfs_dquot_t     *gdq)
+{
+       xfs_dquot_t     *tmp;
+
+       xfs_dqlock(udq);
+
+       if ((tmp = udq->q_gdquot)) {
+               if (tmp == gdq) {
+                       xfs_dqunlock(udq);
+                       return;
+               }
+
+               udq->q_gdquot = NULL;
+               /*
+                * We can't keep any dqlocks when calling dqrele,
+                * because the freelist lock comes before dqlocks.
+                */
+               xfs_dqunlock(udq);
+               /*
+                * we took a hard reference once upon a time in dqget,
+                * so give it back when the udquot no longer points at it
+                * dqput() does the unlocking of the dquot.
+                */
+               xfs_qm_dqrele(tmp);
+
+               xfs_dqlock(udq);
+               xfs_dqlock(gdq);
+
+       } else {
+               ASSERT(XFS_DQ_IS_LOCKED(udq));
+               xfs_dqlock(gdq);
+       }
+
+       ASSERT(XFS_DQ_IS_LOCKED(udq));
+       ASSERT(XFS_DQ_IS_LOCKED(gdq));
+       /*
+        * Somebody could have attached a gdquot here,
+        * when we dropped the uqlock. If so, just do nothing.
+        */
+       if (udq->q_gdquot == NULL) {
+               XFS_DQHOLD(gdq);
+               udq->q_gdquot = gdq;
+       }
+
+       xfs_dqunlock(gdq);
+       xfs_dqunlock(udq);
+}
+
+
+/*
+ * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
+ * into account.
+ * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
+ * Inode may get unlocked and relocked in here, and the caller must deal with
+ * the consequences.
+ */
+int
+xfs_qm_dqattach_locked(
+       xfs_inode_t     *ip,
+       uint            flags)
+{
+       xfs_mount_t     *mp = ip->i_mount;
+       uint            nquotas = 0;
+       int             error = 0;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) ||
+           !XFS_IS_QUOTA_ON(mp) ||
+           !XFS_NOT_DQATTACHED(mp, ip) ||
+           ip->i_ino == mp->m_sb.sb_uquotino ||
+           ip->i_ino == mp->m_sb.sb_gquotino)
+               return 0;
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
+       if (XFS_IS_UQUOTA_ON(mp)) {
+               error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
+                                               flags & XFS_QMOPT_DQALLOC,
+                                               NULL, &ip->i_udquot);
+               if (error)
+                       goto done;
+               nquotas++;
+       }
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       if (XFS_IS_OQUOTA_ON(mp)) {
+               error = XFS_IS_GQUOTA_ON(mp) ?
+                       xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
+                                               flags & XFS_QMOPT_DQALLOC,
+                                               ip->i_udquot, &ip->i_gdquot) :
+                       xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,
+                                               flags & XFS_QMOPT_DQALLOC,
+                                               ip->i_udquot, &ip->i_gdquot);
+               /*
+                * Don't worry about the udquot that we may have
+                * attached above. It'll get detached, if not already.
+                */
+               if (error)
+                       goto done;
+               nquotas++;
+       }
+
+       /*
+        * Attach this group quota to the user quota as a hint.
+        * This WON'T, in general, result in a thrash.
+        */
+       if (nquotas == 2) {
+               ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+               ASSERT(ip->i_udquot);
+               ASSERT(ip->i_gdquot);
+
+               /*
+                * We may or may not have the i_udquot locked at this point,
+                * but this check is OK since we don't depend on the i_gdquot to
+                * be accurate 100% all the time. It is just a hint, and this
+                * will succeed in general.
+                */
+               if (ip->i_udquot->q_gdquot == ip->i_gdquot)
+                       goto done;
+               /*
+                * Attach i_gdquot to the gdquot hint inside the i_udquot.
+                */
+               xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
+       }
+
+ done:
+#ifdef DEBUG
+       if (!error) {
+               if (XFS_IS_UQUOTA_ON(mp))
+                       ASSERT(ip->i_udquot);
+               if (XFS_IS_OQUOTA_ON(mp))
+                       ASSERT(ip->i_gdquot);
+       }
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+#endif
+       return error;
+}
+
+int
+xfs_qm_dqattach(
+       struct xfs_inode        *ip,
+       uint                    flags)
+{
+       int                     error;
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       error = xfs_qm_dqattach_locked(ip, flags);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+       return error;
+}
+
+/*
+ * Release dquots (and their references) if any.
+ * The inode should be locked EXCL except when this's called by
+ * xfs_ireclaim.
+ */
+void
+xfs_qm_dqdetach(
+       xfs_inode_t     *ip)
+{
+       if (!(ip->i_udquot || ip->i_gdquot))
+               return;
+
+       trace_xfs_dquot_dqdetach(ip);
+
+       ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
+       ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
+       if (ip->i_udquot) {
+               xfs_qm_dqrele(ip->i_udquot);
+               ip->i_udquot = NULL;
+       }
+       if (ip->i_gdquot) {
+               xfs_qm_dqrele(ip->i_gdquot);
+               ip->i_gdquot = NULL;
+       }
+}
+
+int
+xfs_qm_sync(
+       struct xfs_mount        *mp,
+       int                     flags)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       int                     recl, restarts;
+       struct xfs_dquot        *dqp;
+       int                     error;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+               return 0;
+
+       restarts = 0;
+
+  again:
+       mutex_lock(&q->qi_dqlist_lock);
+       /*
+        * dqpurge_all() also takes the mplist lock and iterate thru all dquots
+        * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
+        * when we have the mplist lock, we know that dquots will be consistent
+        * as long as we have it locked.
+        */
+       if (!XFS_IS_QUOTA_ON(mp)) {
+               mutex_unlock(&q->qi_dqlist_lock);
+               return 0;
+       }
+       ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
+       list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
+               /*
+                * If this is vfs_sync calling, then skip the dquots that
+                * don't 'seem' to be dirty. ie. don't acquire dqlock.
+                * This is very similar to what xfs_sync does with inodes.
+                */
+               if (flags & SYNC_TRYLOCK) {
+                       if (!XFS_DQ_IS_DIRTY(dqp))
+                               continue;
+                       if (!xfs_qm_dqlock_nowait(dqp))
+                               continue;
+               } else {
+                       xfs_dqlock(dqp);
+               }
+
+               /*
+                * Now, find out for sure if this dquot is dirty or not.
+                */
+               if (! XFS_DQ_IS_DIRTY(dqp)) {
+                       xfs_dqunlock(dqp);
+                       continue;
+               }
+
+               /* XXX a sentinel would be better */
+               recl = q->qi_dqreclaims;
+               if (!xfs_dqflock_nowait(dqp)) {
+                       if (flags & SYNC_TRYLOCK) {
+                               xfs_dqunlock(dqp);
+                               continue;
+                       }
+                       /*
+                        * If we can't grab the flush lock then if the caller
+                        * really wanted us to give this our best shot, so
+                        * see if we can give a push to the buffer before we wait
+                        * on the flush lock. At this point, we know that
+                        * even though the dquot is being flushed,
+                        * it has (new) dirty data.
+                        */
+                       xfs_qm_dqflock_pushbuf_wait(dqp);
+               }
+               /*
+                * Let go of the mplist lock. We don't want to hold it
+                * across a disk write
+                */
+               mutex_unlock(&q->qi_dqlist_lock);
+               error = xfs_qm_dqflush(dqp, flags);
+               xfs_dqunlock(dqp);
+               if (error && XFS_FORCED_SHUTDOWN(mp))
+                       return 0;       /* Need to prevent umount failure */
+               else if (error)
+                       return error;
+
+               mutex_lock(&q->qi_dqlist_lock);
+               if (recl != q->qi_dqreclaims) {
+                       if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
+                               break;
+
+                       mutex_unlock(&q->qi_dqlist_lock);
+                       goto again;
+               }
+       }
+
+       mutex_unlock(&q->qi_dqlist_lock);
+       return 0;
+}
+
+/*
+ * The hash chains and the mplist use the same xfs_dqhash structure as
+ * their list head, but we can take the mplist qh_lock and one of the
+ * hash qh_locks at the same time without any problem as they aren't
+ * related.
+ */
+static struct lock_class_key xfs_quota_mplist_class;
+
+/*
+ * This initializes all the quota information that's kept in the
+ * mount structure
+ */
+STATIC int
+xfs_qm_init_quotainfo(
+       xfs_mount_t     *mp)
+{
+       xfs_quotainfo_t *qinf;
+       int             error;
+       xfs_dquot_t     *dqp;
+
+       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+       /*
+        * Tell XQM that we exist as soon as possible.
+        */
+       if ((error = xfs_qm_hold_quotafs_ref(mp))) {
+               return error;
+       }
+
+       qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
+
+       /*
+        * See if quotainodes are setup, and if not, allocate them,
+        * and change the superblock accordingly.
+        */
+       if ((error = xfs_qm_init_quotainos(mp))) {
+               kmem_free(qinf);
+               mp->m_quotainfo = NULL;
+               return error;
+       }
+
+       INIT_LIST_HEAD(&qinf->qi_dqlist);
+       mutex_init(&qinf->qi_dqlist_lock);
+       lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class);
+
+       qinf->qi_dqreclaims = 0;
+
+       /* mutex used to serialize quotaoffs */
+       mutex_init(&qinf->qi_quotaofflock);
+
+       /* Precalc some constants */
+       qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
+       ASSERT(qinf->qi_dqchunklen);
+       qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
+       do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
+
+       mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
+
+       /*
+        * We try to get the limits from the superuser's limits fields.
+        * This is quite hacky, but it is standard quota practice.
+        * We look at the USR dquot with id == 0 first, but if user quotas
+        * are not enabled we goto the GRP dquot with id == 0.
+        * We don't really care to keep separate default limits for user
+        * and group quotas, at least not at this point.
+        */
+       error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
+                            XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 
+                            (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
+                               XFS_DQ_PROJ),
+                            XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
+                            &dqp);
+       if (! error) {
+               xfs_disk_dquot_t        *ddqp = &dqp->q_core;
+
+               /*
+                * The warnings and timers set the grace period given to
+                * a user or group before he or she can not perform any
+                * more writing. If it is zero, a default is used.
+                */
+               qinf->qi_btimelimit = ddqp->d_btimer ?
+                       be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT;
+               qinf->qi_itimelimit = ddqp->d_itimer ?
+                       be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT;
+               qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ?
+                       be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT;
+               qinf->qi_bwarnlimit = ddqp->d_bwarns ?
+                       be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT;
+               qinf->qi_iwarnlimit = ddqp->d_iwarns ?
+                       be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
+               qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
+                       be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
+               qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
+               qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
+               qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
+               qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
+               qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
+               qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
+ 
+               /*
+                * We sent the XFS_QMOPT_DQSUSER flag to dqget because
+                * we don't want this dquot cached. We haven't done a
+                * quotacheck yet, and quotacheck doesn't like incore dquots.
+                */
+               xfs_qm_dqdestroy(dqp);
+       } else {
+               qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
+               qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
+               qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
+               qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
+               qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
+               qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
+       }
+
+       return 0;
+}
+
+
+/*
+ * Gets called when unmounting a filesystem or when all quotas get
+ * turned off.
+ * This purges the quota inodes, destroys locks and frees itself.
+ */
+void
+xfs_qm_destroy_quotainfo(
+       xfs_mount_t     *mp)
+{
+       xfs_quotainfo_t *qi;
+
+       qi = mp->m_quotainfo;
+       ASSERT(qi != NULL);
+       ASSERT(xfs_Gqm != NULL);
+
+       /*
+        * Release the reference that XQM kept, so that we know
+        * when the XQM structure should be freed. We cannot assume
+        * that xfs_Gqm is non-null after this point.
+        */
+       xfs_qm_rele_quotafs_ref(mp);
+
+       ASSERT(list_empty(&qi->qi_dqlist));
+       mutex_destroy(&qi->qi_dqlist_lock);
+
+       if (qi->qi_uquotaip) {
+               IRELE(qi->qi_uquotaip);
+               qi->qi_uquotaip = NULL; /* paranoia */
+       }
+       if (qi->qi_gquotaip) {
+               IRELE(qi->qi_gquotaip);
+               qi->qi_gquotaip = NULL;
+       }
+       mutex_destroy(&qi->qi_quotaofflock);
+       kmem_free(qi);
+       mp->m_quotainfo = NULL;
+}
+
+
+
+/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
+
+/* ARGSUSED */
+STATIC void
+xfs_qm_list_init(
+       xfs_dqlist_t    *list,
+       char            *str,
+       int             n)
+{
+       mutex_init(&list->qh_lock);
+       INIT_LIST_HEAD(&list->qh_list);
+       list->qh_version = 0;
+       list->qh_nelems = 0;
+}
+
+STATIC void
+xfs_qm_list_destroy(
+       xfs_dqlist_t    *list)
+{
+       mutex_destroy(&(list->qh_lock));
+}
+
+/*
+ * Create an inode and return with a reference already taken, but unlocked
+ * This is how we create quota inodes
+ */
+STATIC int
+xfs_qm_qino_alloc(
+       xfs_mount_t     *mp,
+       xfs_inode_t     **ip,
+       __int64_t       sbfields,
+       uint            flags)
+{
+       xfs_trans_t     *tp;
+       int             error;
+       int             committed;
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
+       if ((error = xfs_trans_reserve(tp,
+                                     XFS_QM_QINOCREATE_SPACE_RES(mp),
+                                     XFS_CREATE_LOG_RES(mp), 0,
+                                     XFS_TRANS_PERM_LOG_RES,
+                                     XFS_CREATE_LOG_COUNT))) {
+               xfs_trans_cancel(tp, 0);
+               return error;
+       }
+
+       error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed);
+       if (error) {
+               xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
+                                XFS_TRANS_ABORT);
+               return error;
+       }
+
+       /*
+        * Make the changes in the superblock, and log those too.
+        * sbfields arg may contain fields other than *QUOTINO;
+        * VERSIONNUM for example.
+        */
+       spin_lock(&mp->m_sb_lock);
+       if (flags & XFS_QMOPT_SBVERSION) {
+               ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
+               ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
+                                  XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
+                      (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
+                       XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
+
+               xfs_sb_version_addquota(&mp->m_sb);
+               mp->m_sb.sb_uquotino = NULLFSINO;
+               mp->m_sb.sb_gquotino = NULLFSINO;
+
+               /* qflags will get updated _after_ quotacheck */
+               mp->m_sb.sb_qflags = 0;
+       }
+       if (flags & XFS_QMOPT_UQUOTA)
+               mp->m_sb.sb_uquotino = (*ip)->i_ino;
+       else
+               mp->m_sb.sb_gquotino = (*ip)->i_ino;
+       spin_unlock(&mp->m_sb_lock);
+       xfs_mod_sb(tp, sbfields);
+
+       if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
+               xfs_alert(mp, "%s failed (error %d)!", __func__, error);
+               return error;
+       }
+       return 0;
+}
+
+
+STATIC void
+xfs_qm_reset_dqcounts(
+       xfs_mount_t     *mp,
+       xfs_buf_t       *bp,
+       xfs_dqid_t      id,
+       uint            type)
+{
+       xfs_disk_dquot_t        *ddq;
+       int                     j;
+
+       trace_xfs_reset_dqcounts(bp, _RET_IP_);
+
+       /*
+        * Reset all counters and timers. They'll be
+        * started afresh by xfs_qm_quotacheck.
+        */
+#ifdef DEBUG
+       j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
+       do_div(j, sizeof(xfs_dqblk_t));
+       ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
+#endif
+       ddq = bp->b_addr;
+       for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
+               /*
+                * Do a sanity check, and if needed, repair the dqblk. Don't
+                * output any warnings because it's perfectly possible to
+                * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
+                */
+               (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
+                                     "xfs_quotacheck");
+               ddq->d_bcount = 0;
+               ddq->d_icount = 0;
+               ddq->d_rtbcount = 0;
+               ddq->d_btimer = 0;
+               ddq->d_itimer = 0;
+               ddq->d_rtbtimer = 0;
+               ddq->d_bwarns = 0;
+               ddq->d_iwarns = 0;
+               ddq->d_rtbwarns = 0;
+               ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
+       }
+}
+
+STATIC int
+xfs_qm_dqiter_bufs(
+       xfs_mount_t     *mp,
+       xfs_dqid_t      firstid,
+       xfs_fsblock_t   bno,
+       xfs_filblks_t   blkcnt,
+       uint            flags)
+{
+       xfs_buf_t       *bp;
+       int             error;
+       int             type;
+
+       ASSERT(blkcnt > 0);
+       type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
+               (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
+       error = 0;
+
+       /*
+        * Blkcnt arg can be a very big number, and might even be
+        * larger than the log itself. So, we have to break it up into
+        * manageable-sized transactions.
+        * Note that we don't start a permanent transaction here; we might
+        * not be able to get a log reservation for the whole thing up front,
+        * and we don't really care to either, because we just discard
+        * everything if we were to crash in the middle of this loop.
+        */
+       while (blkcnt--) {
+               error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
+                             XFS_FSB_TO_DADDR(mp, bno),
+                             mp->m_quotainfo->qi_dqchunklen, 0, &bp);
+               if (error)
+                       break;
+
+               xfs_qm_reset_dqcounts(mp, bp, firstid, type);
+               xfs_bdwrite(mp, bp);
+               /*
+                * goto the next block.
+                */
+               bno++;
+               firstid += mp->m_quotainfo->qi_dqperchunk;
+       }
+       return error;
+}
+
+/*
+ * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
+ * caller supplied function for every chunk of dquots that we find.
+ */
+STATIC int
+xfs_qm_dqiterate(
+       xfs_mount_t     *mp,
+       xfs_inode_t     *qip,
+       uint            flags)
+{
+       xfs_bmbt_irec_t         *map;
+       int                     i, nmaps;       /* number of map entries */
+       int                     error;          /* return value */
+       xfs_fileoff_t           lblkno;
+       xfs_filblks_t           maxlblkcnt;
+       xfs_dqid_t              firstid;
+       xfs_fsblock_t           rablkno;
+       xfs_filblks_t           rablkcnt;
+
+       error = 0;
+       /*
+        * This looks racy, but we can't keep an inode lock across a
+        * trans_reserve. But, this gets called during quotacheck, and that
+        * happens only at mount time which is single threaded.
+        */
+       if (qip->i_d.di_nblocks == 0)
+               return 0;
+
+       map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
+
+       lblkno = 0;
+       maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
+       do {
+               nmaps = XFS_DQITER_MAP_SIZE;
+               /*
+                * We aren't changing the inode itself. Just changing
+                * some of its data. No new blocks are added here, and
+                * the inode is never added to the transaction.
+                */
+               xfs_ilock(qip, XFS_ILOCK_SHARED);
+               error = xfs_bmapi(NULL, qip, lblkno,
+                                 maxlblkcnt - lblkno,
+                                 XFS_BMAPI_METADATA,
+                                 NULL,
+                                 0, map, &nmaps, NULL);
+               xfs_iunlock(qip, XFS_ILOCK_SHARED);
+               if (error)
+                       break;
+
+               ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
+               for (i = 0; i < nmaps; i++) {
+                       ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
+                       ASSERT(map[i].br_blockcount);
+
+
+                       lblkno += map[i].br_blockcount;
+
+                       if (map[i].br_startblock == HOLESTARTBLOCK)
+                               continue;
+
+                       firstid = (xfs_dqid_t) map[i].br_startoff *
+                               mp->m_quotainfo->qi_dqperchunk;
+                       /*
+                        * Do a read-ahead on the next extent.
+                        */
+                       if ((i+1 < nmaps) &&
+                           (map[i+1].br_startblock != HOLESTARTBLOCK)) {
+                               rablkcnt =  map[i+1].br_blockcount;
+                               rablkno = map[i+1].br_startblock;
+                               while (rablkcnt--) {
+                                       xfs_buf_readahead(mp->m_ddev_targp,
+                                              XFS_FSB_TO_DADDR(mp, rablkno),
+                                              mp->m_quotainfo->qi_dqchunklen);
+                                       rablkno++;
+                               }
+                       }
+                       /*
+                        * Iterate thru all the blks in the extent and
+                        * reset the counters of all the dquots inside them.
+                        */
+                       if ((error = xfs_qm_dqiter_bufs(mp,
+                                                      firstid,
+                                                      map[i].br_startblock,
+                                                      map[i].br_blockcount,
+                                                      flags))) {
+                               break;
+                       }
+               }
+
+               if (error)
+                       break;
+       } while (nmaps > 0);
+
+       kmem_free(map);
+
+       return error;
+}
+
+/*
+ * Called by dqusage_adjust in doing a quotacheck.
+ *
+ * Given the inode, and a dquot id this updates both the incore dqout as well
+ * as the buffer copy. This is so that once the quotacheck is done, we can
+ * just log all the buffers, as opposed to logging numerous updates to
+ * individual dquots.
+ */
+STATIC int
+xfs_qm_quotacheck_dqadjust(
+       struct xfs_inode        *ip,
+       xfs_dqid_t              id,
+       uint                    type,
+       xfs_qcnt_t              nblks,
+       xfs_qcnt_t              rtblks)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_dquot        *dqp;
+       int                     error;
+
+       error = xfs_qm_dqget(mp, ip, id, type,
+                            XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp);
+       if (error) {
+               /*
+                * Shouldn't be able to turn off quotas here.
+                */
+               ASSERT(error != ESRCH);
+               ASSERT(error != ENOENT);
+               return error;
+       }
+
+       trace_xfs_dqadjust(dqp);
+
+       /*
+        * Adjust the inode count and the block count to reflect this inode's
+        * resource usage.
+        */
+       be64_add_cpu(&dqp->q_core.d_icount, 1);
+       dqp->q_res_icount++;
+       if (nblks) {
+               be64_add_cpu(&dqp->q_core.d_bcount, nblks);
+               dqp->q_res_bcount += nblks;
+       }
+       if (rtblks) {
+               be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks);
+               dqp->q_res_rtbcount += rtblks;
+       }
+
+       /*
+        * Set default limits, adjust timers (since we changed usages)
+        *
+        * There are no timers for the default values set in the root dquot.
+        */
+       if (dqp->q_core.d_id) {
+               xfs_qm_adjust_dqlimits(mp, &dqp->q_core);
+               xfs_qm_adjust_dqtimers(mp, &dqp->q_core);
+       }
+
+       dqp->dq_flags |= XFS_DQ_DIRTY;
+       xfs_qm_dqput(dqp);
+       return 0;
+}
+
+STATIC int
+xfs_qm_get_rtblks(
+       xfs_inode_t     *ip,
+       xfs_qcnt_t      *O_rtblks)
+{
+       xfs_filblks_t   rtblks;                 /* total rt blks */
+       xfs_extnum_t    idx;                    /* extent record index */
+       xfs_ifork_t     *ifp;                   /* inode fork pointer */
+       xfs_extnum_t    nextents;               /* number of extent entries */
+       int             error;
+
+       ASSERT(XFS_IS_REALTIME_INODE(ip));
+       ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+               if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
+                       return error;
+       }
+       rtblks = 0;
+       nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+       for (idx = 0; idx < nextents; idx++)
+               rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
+       *O_rtblks = (xfs_qcnt_t)rtblks;
+       return 0;
+}
+
+/*
+ * callback routine supplied to bulkstat(). Given an inumber, find its
+ * dquots and update them to account for resources taken by that inode.
+ */
+/* ARGSUSED */
+STATIC int
+xfs_qm_dqusage_adjust(
+       xfs_mount_t     *mp,            /* mount point for filesystem */
+       xfs_ino_t       ino,            /* inode number to get data for */
+       void            __user *buffer, /* not used */
+       int             ubsize,         /* not used */
+       int             *ubused,        /* not used */
+       int             *res)           /* result code value */
+{
+       xfs_inode_t     *ip;
+       xfs_qcnt_t      nblks, rtblks = 0;
+       int             error;
+
+       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+       /*
+        * rootino must have its resources accounted for, not so with the quota
+        * inodes.
+        */
+       if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
+               *res = BULKSTAT_RV_NOTHING;
+               return XFS_ERROR(EINVAL);
+       }
+
+       /*
+        * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
+        * interface expects the inode to be exclusively locked because that's
+        * the case in all other instances. It's OK that we do this because
+        * quotacheck is done only at mount time.
+        */
+       error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip);
+       if (error) {
+               *res = BULKSTAT_RV_NOTHING;
+               return error;
+       }
+
+       ASSERT(ip->i_delayed_blks == 0);
+
+       if (XFS_IS_REALTIME_INODE(ip)) {
+               /*
+                * Walk thru the extent list and count the realtime blocks.
+                */
+               error = xfs_qm_get_rtblks(ip, &rtblks);
+               if (error)
+                       goto error0;
+       }
+
+       nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
+
+       /*
+        * Add the (disk blocks and inode) resources occupied by this
+        * inode to its dquots. We do this adjustment in the incore dquot,
+        * and also copy the changes to its buffer.
+        * We don't care about putting these changes in a transaction
+        * envelope because if we crash in the middle of a 'quotacheck'
+        * we have to start from the beginning anyway.
+        * Once we're done, we'll log all the dquot bufs.
+        *
+        * The *QUOTA_ON checks below may look pretty racy, but quotachecks
+        * and quotaoffs don't race. (Quotachecks happen at mount time only).
+        */
+       if (XFS_IS_UQUOTA_ON(mp)) {
+               error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid,
+                                                  XFS_DQ_USER, nblks, rtblks);
+               if (error)
+                       goto error0;
+       }
+
+       if (XFS_IS_GQUOTA_ON(mp)) {
+               error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid,
+                                                  XFS_DQ_GROUP, nblks, rtblks);
+               if (error)
+                       goto error0;
+       }
+
+       if (XFS_IS_PQUOTA_ON(mp)) {
+               error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip),
+                                                  XFS_DQ_PROJ, nblks, rtblks);
+               if (error)
+                       goto error0;
+       }
+
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       IRELE(ip);
+       *res = BULKSTAT_RV_DIDONE;
+       return 0;
+
+error0:
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       IRELE(ip);
+       *res = BULKSTAT_RV_GIVEUP;
+       return error;
+}
+
+/*
+ * Walk thru all the filesystem inodes and construct a consistent view
+ * of the disk quota world. If the quotacheck fails, disable quotas.
+ */
+int
+xfs_qm_quotacheck(
+       xfs_mount_t     *mp)
+{
+       int             done, count, error;
+       xfs_ino_t       lastino;
+       size_t          structsz;
+       xfs_inode_t     *uip, *gip;
+       uint            flags;
+
+       count = INT_MAX;
+       structsz = 1;
+       lastino = 0;
+       flags = 0;
+
+       ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip);
+       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+       /*
+        * There should be no cached dquots. The (simplistic) quotacheck
+        * algorithm doesn't like that.
+        */
+       ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
+
+       xfs_notice(mp, "Quotacheck needed: Please wait.");
+
+       /*
+        * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
+        * their counters to zero. We need a clean slate.
+        * We don't log our changes till later.
+        */
+       uip = mp->m_quotainfo->qi_uquotaip;
+       if (uip) {
+               error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA);
+               if (error)
+                       goto error_return;
+               flags |= XFS_UQUOTA_CHKD;
+       }
+
+       gip = mp->m_quotainfo->qi_gquotaip;
+       if (gip) {
+               error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
+                                       XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
+               if (error)
+                       goto error_return;
+               flags |= XFS_OQUOTA_CHKD;
+       }
+
+       do {
+               /*
+                * Iterate thru all the inodes in the file system,
+                * adjusting the corresponding dquot counters in core.
+                */
+               error = xfs_bulkstat(mp, &lastino, &count,
+                                    xfs_qm_dqusage_adjust,
+                                    structsz, NULL, &done);
+               if (error)
+                       break;
+
+       } while (!done);
+
+       /*
+        * We've made all the changes that we need to make incore.
+        * Flush them down to disk buffers if everything was updated
+        * successfully.
+        */
+       if (!error)
+               error = xfs_qm_dqflush_all(mp, 0);
+
+       /*
+        * We can get this error if we couldn't do a dquot allocation inside
+        * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
+        * dirty dquots that might be cached, we just want to get rid of them
+        * and turn quotaoff. The dquots won't be attached to any of the inodes
+        * at this point (because we intentionally didn't in dqget_noattach).
+        */
+       if (error) {
+               xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
+               goto error_return;
+       }
+
+       /*
+        * We didn't log anything, because if we crashed, we'll have to
+        * start the quotacheck from scratch anyway. However, we must make
+        * sure that our dquot changes are secure before we put the
+        * quotacheck'd stamp on the superblock. So, here we do a synchronous
+        * flush.
+        */
+       XFS_bflush(mp->m_ddev_targp);
+
+       /*
+        * If one type of quotas is off, then it will lose its
+        * quotachecked status, since we won't be doing accounting for
+        * that type anymore.
+        */
+       mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
+       mp->m_qflags |= flags;
+
+ error_return:
+       if (error) {
+               xfs_warn(mp,
+       "Quotacheck: Unsuccessful (Error %d): Disabling quotas.",
+                       error);
+               /*
+                * We must turn off quotas.
+                */
+               ASSERT(mp->m_quotainfo != NULL);
+               ASSERT(xfs_Gqm != NULL);
+               xfs_qm_destroy_quotainfo(mp);
+               if (xfs_mount_reset_sbqflags(mp)) {
+                       xfs_warn(mp,
+                               "Quotacheck: Failed to reset quota flags.");
+               }
+       } else
+               xfs_notice(mp, "Quotacheck: Done.");
+       return (error);
+}
+
+/*
+ * This is called after the superblock has been read in and we're ready to
+ * iget the quota inodes.
+ */
+STATIC int
+xfs_qm_init_quotainos(
+       xfs_mount_t     *mp)
+{
+       xfs_inode_t     *uip, *gip;
+       int             error;
+       __int64_t       sbflags;
+       uint            flags;
+
+       ASSERT(mp->m_quotainfo);
+       uip = gip = NULL;
+       sbflags = 0;
+       flags = 0;
+
+       /*
+        * Get the uquota and gquota inodes
+        */
+       if (xfs_sb_version_hasquota(&mp->m_sb)) {
+               if (XFS_IS_UQUOTA_ON(mp) &&
+                   mp->m_sb.sb_uquotino != NULLFSINO) {
+                       ASSERT(mp->m_sb.sb_uquotino > 0);
+                       if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
+                                            0, 0, &uip)))
+                               return XFS_ERROR(error);
+               }
+               if (XFS_IS_OQUOTA_ON(mp) &&
+                   mp->m_sb.sb_gquotino != NULLFSINO) {
+                       ASSERT(mp->m_sb.sb_gquotino > 0);
+                       if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
+                                            0, 0, &gip))) {
+                               if (uip)
+                                       IRELE(uip);
+                               return XFS_ERROR(error);
+                       }
+               }
+       } else {
+               flags |= XFS_QMOPT_SBVERSION;
+               sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
+                           XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
+       }
+
+       /*
+        * Create the two inodes, if they don't exist already. The changes
+        * made above will get added to a transaction and logged in one of
+        * the qino_alloc calls below.  If the device is readonly,
+        * temporarily switch to read-write to do this.
+        */
+       if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
+               if ((error = xfs_qm_qino_alloc(mp, &uip,
+                                             sbflags | XFS_SB_UQUOTINO,
+                                             flags | XFS_QMOPT_UQUOTA)))
+                       return XFS_ERROR(error);
+
+               flags &= ~XFS_QMOPT_SBVERSION;
+       }
+       if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
+               flags |= (XFS_IS_GQUOTA_ON(mp) ?
+                               XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
+               error = xfs_qm_qino_alloc(mp, &gip,
+                                         sbflags | XFS_SB_GQUOTINO, flags);
+               if (error) {
+                       if (uip)
+                               IRELE(uip);
+
+                       return XFS_ERROR(error);
+               }
+       }
+
+       mp->m_quotainfo->qi_uquotaip = uip;
+       mp->m_quotainfo->qi_gquotaip = gip;
+
+       return 0;
+}
+
+
+
+/*
+ * Just pop the least recently used dquot off the freelist and
+ * recycle it. The returned dquot is locked.
+ */
+STATIC xfs_dquot_t *
+xfs_qm_dqreclaim_one(void)
+{
+       xfs_dquot_t     *dqpout;
+       xfs_dquot_t     *dqp;
+       int             restarts;
+       int             startagain;
+
+       restarts = 0;
+       dqpout = NULL;
+
+       /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
+again:
+       startagain = 0;
+       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+
+       list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
+               struct xfs_mount *mp = dqp->q_mount;
+               xfs_dqlock(dqp);
+
+               /*
+                * We are racing with dqlookup here. Naturally we don't
+                * want to reclaim a dquot that lookup wants. We release the
+                * freelist lock and start over, so that lookup will grab
+                * both the dquot and the freelistlock.
+                */
+               if (dqp->dq_flags & XFS_DQ_WANT) {
+                       ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
+
+                       trace_xfs_dqreclaim_want(dqp);
+                       XQM_STATS_INC(xqmstats.xs_qm_dqwants);
+                       restarts++;
+                       startagain = 1;
+                       goto dqunlock;
+               }
+
+               /*
+                * If the dquot is inactive, we are assured that it is
+                * not on the mplist or the hashlist, and that makes our
+                * life easier.
+                */
+               if (dqp->dq_flags & XFS_DQ_INACTIVE) {
+                       ASSERT(mp == NULL);
+                       ASSERT(! XFS_DQ_IS_DIRTY(dqp));
+                       ASSERT(list_empty(&dqp->q_hashlist));
+                       ASSERT(list_empty(&dqp->q_mplist));
+                       list_del_init(&dqp->q_freelist);
+                       xfs_Gqm->qm_dqfrlist_cnt--;
+                       dqpout = dqp;
+                       XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
+                       goto dqunlock;
+               }
+
+               ASSERT(dqp->q_hash);
+               ASSERT(!list_empty(&dqp->q_mplist));
+
+               /*
+                * Try to grab the flush lock. If this dquot is in the process
+                * of getting flushed to disk, we don't want to reclaim it.
+                */
+               if (!xfs_dqflock_nowait(dqp))
+                       goto dqunlock;
+
+               /*
+                * We have the flush lock so we know that this is not in the
+                * process of being flushed. So, if this is dirty, flush it
+                * DELWRI so that we don't get a freelist infested with
+                * dirty dquots.
+                */
+               if (XFS_DQ_IS_DIRTY(dqp)) {
+                       int     error;
+
+                       trace_xfs_dqreclaim_dirty(dqp);
+
+                       /*
+                        * We flush it delayed write, so don't bother
+                        * releasing the freelist lock.
+                        */
+                       error = xfs_qm_dqflush(dqp, 0);
+                       if (error) {
+                               xfs_warn(mp, "%s: dquot %p flush failed",
+                                       __func__, dqp);
+                       }
+                       goto dqunlock;
+               }
+
+               /*
+                * We're trying to get the hashlock out of order. This races
+                * with dqlookup; so, we giveup and goto the next dquot if
+                * we couldn't get the hashlock. This way, we won't starve
+                * a dqlookup process that holds the hashlock that is
+                * waiting for the freelist lock.
+                */
+               if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
+                       restarts++;
+                       goto dqfunlock;
+               }
+
+               /*
+                * This races with dquot allocation code as well as dqflush_all
+                * and reclaim code. So, if we failed to grab the mplist lock,
+                * giveup everything and start over.
+                */
+               if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
+                       restarts++;
+                       startagain = 1;
+                       goto qhunlock;
+               }
+
+               ASSERT(dqp->q_nrefs == 0);
+               list_del_init(&dqp->q_mplist);
+               mp->m_quotainfo->qi_dquots--;
+               mp->m_quotainfo->qi_dqreclaims++;
+               list_del_init(&dqp->q_hashlist);
+               dqp->q_hash->qh_version++;
+               list_del_init(&dqp->q_freelist);
+               xfs_Gqm->qm_dqfrlist_cnt--;
+               dqpout = dqp;
+               mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
+qhunlock:
+               mutex_unlock(&dqp->q_hash->qh_lock);
+dqfunlock:
+               xfs_dqfunlock(dqp);
+dqunlock:
+               xfs_dqunlock(dqp);
+               if (dqpout)
+                       break;
+               if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
+                       break;
+               if (startagain) {
+                       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+                       goto again;
+               }
+       }
+       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+       return dqpout;
+}
+
+/*
+ * Traverse the freelist of dquots and attempt to reclaim a maximum of
+ * 'howmany' dquots. This operation races with dqlookup(), and attempts to
+ * favor the lookup function ...
+ */
+STATIC int
+xfs_qm_shake_freelist(
+       int     howmany)
+{
+       int             nreclaimed = 0;
+       xfs_dquot_t     *dqp;
+
+       if (howmany <= 0)
+               return 0;
+
+       while (nreclaimed < howmany) {
+               dqp = xfs_qm_dqreclaim_one();
+               if (!dqp)
+                       return nreclaimed;
+               xfs_qm_dqdestroy(dqp);
+               nreclaimed++;
+       }
+       return nreclaimed;
+}
+
+/*
+ * The kmem_shake interface is invoked when memory is running low.
+ */
+/* ARGSUSED */
+STATIC int
+xfs_qm_shake(
+       struct shrinker *shrink,
+       struct shrink_control *sc)
+{
+       int     ndqused, nfree, n;
+       gfp_t gfp_mask = sc->gfp_mask;
+
+       if (!kmem_shake_allow(gfp_mask))
+               return 0;
+       if (!xfs_Gqm)
+               return 0;
+
+       nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */
+       /* incore dquots in all f/s's */
+       ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
+
+       ASSERT(ndqused >= 0);
+
+       if (nfree <= ndqused && nfree < ndquot)
+               return 0;
+
+       ndqused *= xfs_Gqm->qm_dqfree_ratio;    /* target # of free dquots */
+       n = nfree - ndqused - ndquot;           /* # over target */
+
+       return xfs_qm_shake_freelist(MAX(nfree, n));
+}
+
+
+/*------------------------------------------------------------------*/
+
+/*
+ * Return a new incore dquot. Depending on the number of
+ * dquots in the system, we either allocate a new one on the kernel heap,
+ * or reclaim a free one.
+ * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
+ * to reclaim an existing one from the freelist.
+ */
+boolean_t
+xfs_qm_dqalloc_incore(
+       xfs_dquot_t **O_dqpp)
+{
+       xfs_dquot_t     *dqp;
+
+       /*
+        * Check against high water mark to see if we want to pop
+        * a nincompoop dquot off the freelist.
+        */
+       if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
+               /*
+                * Try to recycle a dquot from the freelist.
+                */
+               if ((dqp = xfs_qm_dqreclaim_one())) {
+                       XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
+                       /*
+                        * Just zero the core here. The rest will get
+                        * reinitialized by caller. XXX we shouldn't even
+                        * do this zero ...
+                        */
+                       memset(&dqp->q_core, 0, sizeof(dqp->q_core));
+                       *O_dqpp = dqp;
+                       return B_FALSE;
+               }
+               XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
+       }
+
+       /*
+        * Allocate a brand new dquot on the kernel heap and return it
+        * to the caller to initialize.
+        */
+       ASSERT(xfs_Gqm->qm_dqzone != NULL);
+       *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
+       atomic_inc(&xfs_Gqm->qm_totaldquots);
+
+       return B_TRUE;
+}
+
+
+/*
+ * Start a transaction and write the incore superblock changes to
+ * disk. flags parameter indicates which fields have changed.
+ */
+int
+xfs_qm_write_sb_changes(
+       xfs_mount_t     *mp,
+       __int64_t       flags)
+{
+       xfs_trans_t     *tp;
+       int             error;
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
+       if ((error = xfs_trans_reserve(tp, 0,
+                                     mp->m_sb.sb_sectsize + 128, 0,
+                                     0,
+                                     XFS_DEFAULT_LOG_COUNT))) {
+               xfs_trans_cancel(tp, 0);
+               return error;
+       }
+
+       xfs_mod_sb(tp, flags);
+       error = xfs_trans_commit(tp, 0);
+
+       return error;
+}
+
+
+/* --------------- utility functions for vnodeops ---------------- */
+
+
+/*
+ * Given an inode, a uid, gid and prid make sure that we have
+ * allocated relevant dquot(s) on disk, and that we won't exceed inode
+ * quotas by creating this file.
+ * This also attaches dquot(s) to the given inode after locking it,
+ * and returns the dquots corresponding to the uid and/or gid.
+ *
+ * in  : inode (unlocked)
+ * out : udquot, gdquot with references taken and unlocked
+ */
+int
+xfs_qm_vop_dqalloc(
+       struct xfs_inode        *ip,
+       uid_t                   uid,
+       gid_t                   gid,
+       prid_t                  prid,
+       uint                    flags,
+       struct xfs_dquot        **O_udqpp,
+       struct xfs_dquot        **O_gdqpp)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_dquot        *uq, *gq;
+       int                     error;
+       uint                    lockflags;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+               return 0;
+
+       lockflags = XFS_ILOCK_EXCL;
+       xfs_ilock(ip, lockflags);
+
+       if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
+               gid = ip->i_d.di_gid;
+
+       /*
+        * Attach the dquot(s) to this inode, doing a dquot allocation
+        * if necessary. The dquot(s) will not be locked.
+        */
+       if (XFS_NOT_DQATTACHED(mp, ip)) {
+               error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
+               if (error) {
+                       xfs_iunlock(ip, lockflags);
+                       return error;
+               }
+       }
+
+       uq = gq = NULL;
+       if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
+               if (ip->i_d.di_uid != uid) {
+                       /*
+                        * What we need is the dquot that has this uid, and
+                        * if we send the inode to dqget, the uid of the inode
+                        * takes priority over what's sent in the uid argument.
+                        * We must unlock inode here before calling dqget if
+                        * we're not sending the inode, because otherwise
+                        * we'll deadlock by doing trans_reserve while
+                        * holding ilock.
+                        */
+                       xfs_iunlock(ip, lockflags);
+                       if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
+                                                XFS_DQ_USER,
+                                                XFS_QMOPT_DQALLOC |
+                                                XFS_QMOPT_DOWARN,
+                                                &uq))) {
+                               ASSERT(error != ENOENT);
+                               return error;
+                       }
+                       /*
+                        * Get the ilock in the right order.
+                        */
+                       xfs_dqunlock(uq);
+                       lockflags = XFS_ILOCK_SHARED;
+                       xfs_ilock(ip, lockflags);
+               } else {
+                       /*
+                        * Take an extra reference, because we'll return
+                        * this to caller
+                        */
+                       ASSERT(ip->i_udquot);
+                       uq = ip->i_udquot;
+                       xfs_dqlock(uq);
+                       XFS_DQHOLD(uq);
+                       xfs_dqunlock(uq);
+               }
+       }
+       if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
+               if (ip->i_d.di_gid != gid) {
+                       xfs_iunlock(ip, lockflags);
+                       if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
+                                                XFS_DQ_GROUP,
+                                                XFS_QMOPT_DQALLOC |
+                                                XFS_QMOPT_DOWARN,
+                                                &gq))) {
+                               if (uq)
+                                       xfs_qm_dqrele(uq);
+                               ASSERT(error != ENOENT);
+                               return error;
+                       }
+                       xfs_dqunlock(gq);
+                       lockflags = XFS_ILOCK_SHARED;
+                       xfs_ilock(ip, lockflags);
+               } else {
+                       ASSERT(ip->i_gdquot);
+                       gq = ip->i_gdquot;
+                       xfs_dqlock(gq);
+                       XFS_DQHOLD(gq);
+                       xfs_dqunlock(gq);
+               }
+       } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
+               if (xfs_get_projid(ip) != prid) {
+                       xfs_iunlock(ip, lockflags);
+                       if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
+                                                XFS_DQ_PROJ,
+                                                XFS_QMOPT_DQALLOC |
+                                                XFS_QMOPT_DOWARN,
+                                                &gq))) {
+                               if (uq)
+                                       xfs_qm_dqrele(uq);
+                               ASSERT(error != ENOENT);
+                               return (error);
+                       }
+                       xfs_dqunlock(gq);
+                       lockflags = XFS_ILOCK_SHARED;
+                       xfs_ilock(ip, lockflags);
+               } else {
+                       ASSERT(ip->i_gdquot);
+                       gq = ip->i_gdquot;
+                       xfs_dqlock(gq);
+                       XFS_DQHOLD(gq);
+                       xfs_dqunlock(gq);
+               }
+       }
+       if (uq)
+               trace_xfs_dquot_dqalloc(ip);
+
+       xfs_iunlock(ip, lockflags);
+       if (O_udqpp)
+               *O_udqpp = uq;
+       else if (uq)
+               xfs_qm_dqrele(uq);
+       if (O_gdqpp)
+               *O_gdqpp = gq;
+       else if (gq)
+               xfs_qm_dqrele(gq);
+       return 0;
+}
+
+/*
+ * Actually transfer ownership, and do dquot modifications.
+ * These were already reserved.
+ */
+xfs_dquot_t *
+xfs_qm_vop_chown(
+       xfs_trans_t     *tp,
+       xfs_inode_t     *ip,
+       xfs_dquot_t     **IO_olddq,
+       xfs_dquot_t     *newdq)
+{
+       xfs_dquot_t     *prevdq;
+       uint            bfield = XFS_IS_REALTIME_INODE(ip) ?
+                                XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
+
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
+
+       /* old dquot */
+       prevdq = *IO_olddq;
+       ASSERT(prevdq);
+       ASSERT(prevdq != newdq);
+
+       xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
+       xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
+
+       /* the sparkling new dquot */
+       xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
+       xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
+
+       /*
+        * Take an extra reference, because the inode
+        * is going to keep this dquot pointer even
+        * after the trans_commit.
+        */
+       xfs_dqlock(newdq);
+       XFS_DQHOLD(newdq);
+       xfs_dqunlock(newdq);
+       *IO_olddq = newdq;
+
+       return prevdq;
+}
+
+/*
+ * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
+ */
+int
+xfs_qm_vop_chown_reserve(
+       xfs_trans_t     *tp,
+       xfs_inode_t     *ip,
+       xfs_dquot_t     *udqp,
+       xfs_dquot_t     *gdqp,
+       uint            flags)
+{
+       xfs_mount_t     *mp = ip->i_mount;
+       uint            delblks, blkflags, prjflags = 0;
+       xfs_dquot_t     *unresudq, *unresgdq, *delblksudq, *delblksgdq;
+       int             error;
+
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+       delblks = ip->i_delayed_blks;
+       delblksudq = delblksgdq = unresudq = unresgdq = NULL;
+       blkflags = XFS_IS_REALTIME_INODE(ip) ?
+                       XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
+
+       if (XFS_IS_UQUOTA_ON(mp) && udqp &&
+           ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) {
+               delblksudq = udqp;
+               /*
+                * If there are delayed allocation blocks, then we have to
+                * unreserve those from the old dquot, and add them to the
+                * new dquot.
+                */
+               if (delblks) {
+                       ASSERT(ip->i_udquot);
+                       unresudq = ip->i_udquot;
+               }
+       }
+       if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
+               if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
+                    xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id))
+                       prjflags = XFS_QMOPT_ENOSPC;
+
+               if (prjflags ||
+                   (XFS_IS_GQUOTA_ON(ip->i_mount) &&
+                    ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) {
+                       delblksgdq = gdqp;
+                       if (delblks) {
+                               ASSERT(ip->i_gdquot);
+                               unresgdq = ip->i_gdquot;
+                       }
+               }
+       }
+
+       if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
+                               delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
+                               flags | blkflags | prjflags)))
+               return (error);
+
+       /*
+        * Do the delayed blks reservations/unreservations now. Since, these
+        * are done without the help of a transaction, if a reservation fails
+        * its previous reservations won't be automatically undone by trans
+        * code. So, we have to do it manually here.
+        */
+       if (delblks) {
+               /*
+                * Do the reservations first. Unreservation can't fail.
+                */
+               ASSERT(delblksudq || delblksgdq);
+               ASSERT(unresudq || unresgdq);
+               if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
+                               delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
+                               flags | blkflags | prjflags)))
+                       return (error);
+               xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
+                               unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
+                               blkflags);
+       }
+
+       return (0);
+}
+
+int
+xfs_qm_vop_rename_dqattach(
+       struct xfs_inode        **i_tab)
+{
+       struct xfs_mount        *mp = i_tab[0]->i_mount;
+       int                     i;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+               return 0;
+
+       for (i = 0; (i < 4 && i_tab[i]); i++) {
+               struct xfs_inode        *ip = i_tab[i];
+               int                     error;
+
+               /*
+                * Watch out for duplicate entries in the table.
+                */
+               if (i == 0 || ip != i_tab[i-1]) {
+                       if (XFS_NOT_DQATTACHED(mp, ip)) {
+                               error = xfs_qm_dqattach(ip, 0);
+                               if (error)
+                                       return error;
+                       }
+               }
+       }
+       return 0;
+}
+
+void
+xfs_qm_vop_create_dqattach(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip,
+       struct xfs_dquot        *udqp,
+       struct xfs_dquot        *gdqp)
+{
+       struct xfs_mount        *mp = tp->t_mountp;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+               return;
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+       if (udqp) {
+               xfs_dqlock(udqp);
+               XFS_DQHOLD(udqp);
+               xfs_dqunlock(udqp);
+               ASSERT(ip->i_udquot == NULL);
+               ip->i_udquot = udqp;
+               ASSERT(XFS_IS_UQUOTA_ON(mp));
+               ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
+               xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
+       }
+       if (gdqp) {
+               xfs_dqlock(gdqp);
+               XFS_DQHOLD(gdqp);
+               xfs_dqunlock(gdqp);
+               ASSERT(ip->i_gdquot == NULL);
+               ip->i_gdquot = gdqp;
+               ASSERT(XFS_IS_OQUOTA_ON(mp));
+               ASSERT((XFS_IS_GQUOTA_ON(mp) ?
+                       ip->i_d.di_gid : xfs_get_projid(ip)) ==
+                               be32_to_cpu(gdqp->q_core.d_id));
+               xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
+       }
+}
+
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h

new file mode 100644 (file)

index 0000000..43b9abe
--- /dev/null
+++ b/fs/xfs/xfs_qm.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_QM_H__
+#define __XFS_QM_H__
+
+#include "xfs_dquot_item.h"
+#include "xfs_dquot.h"
+#include "xfs_quota_priv.h"
+#include "xfs_qm_stats.h"
+
+struct xfs_qm;
+struct xfs_inode;
+
+extern uint            ndquot;
+extern struct mutex    xfs_Gqm_lock;
+extern struct xfs_qm   *xfs_Gqm;
+extern kmem_zone_t     *qm_dqzone;
+extern kmem_zone_t     *qm_dqtrxzone;
+
+/*
+ * Used in xfs_qm_sync called by xfs_sync to count the max times that it can
+ * iterate over the mountpt's dquot list in one call.
+ */
+#define XFS_QM_SYNC_MAX_RESTARTS       7
+
+/*
+ * Ditto, for xfs_qm_dqreclaim_one.
+ */
+#define XFS_QM_RECLAIM_MAX_RESTARTS    4
+
+/*
+ * Ideal ratio of free to in use dquots. Quota manager makes an attempt
+ * to keep this balance.
+ */
+#define XFS_QM_DQFREE_RATIO            2
+
+/*
+ * Dquot hashtable constants/threshold values.
+ */
+#define XFS_QM_HASHSIZE_LOW            (PAGE_SIZE / sizeof(xfs_dqhash_t))
+#define XFS_QM_HASHSIZE_HIGH           ((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t))
+
+/*
+ * This defines the unit of allocation of dquots.
+ * Currently, it is just one file system block, and a 4K blk contains 30
+ * (136 * 30 = 4080) dquots. It's probably not worth trying to make
+ * this more dynamic.
+ * XXXsup However, if this number is changed, we have to make sure that we don't
+ * implicitly assume that we do allocations in chunks of a single filesystem
+ * block in the dquot/xqm code.
+ */
+#define XFS_DQUOT_CLUSTER_SIZE_FSB     (xfs_filblks_t)1
+
+typedef xfs_dqhash_t   xfs_dqlist_t;
+
+/*
+ * Quota Manager (global) structure. Lives only in core.
+ */
+typedef struct xfs_qm {
+       xfs_dqlist_t    *qm_usr_dqhtable;/* udquot hash table */
+       xfs_dqlist_t    *qm_grp_dqhtable;/* gdquot hash table */
+       uint             qm_dqhashmask;  /* # buckets in dq hashtab - 1 */
+       struct list_head qm_dqfrlist;    /* freelist of dquots */
+       struct mutex     qm_dqfrlist_lock;
+       int              qm_dqfrlist_cnt;
+       atomic_t         qm_totaldquots; /* total incore dquots */
+       uint             qm_nrefs;       /* file systems with quota on */
+       int              qm_dqfree_ratio;/* ratio of free to inuse dquots */
+       kmem_zone_t     *qm_dqzone;      /* dquot mem-alloc zone */
+       kmem_zone_t     *qm_dqtrxzone;   /* t_dqinfo of transactions */
+} xfs_qm_t;
+
+/*
+ * Various quota information for individual filesystems.
+ * The mount structure keeps a pointer to this.
+ */
+typedef struct xfs_quotainfo {
+       xfs_inode_t     *qi_uquotaip;    /* user quota inode */
+       xfs_inode_t     *qi_gquotaip;    /* group quota inode */
+       struct list_head qi_dqlist;      /* all dquots in filesys */
+       struct mutex     qi_dqlist_lock;
+       int              qi_dquots;
+       int              qi_dqreclaims;  /* a change here indicates
+                                           a removal in the dqlist */
+       time_t           qi_btimelimit;  /* limit for blks timer */
+       time_t           qi_itimelimit;  /* limit for inodes timer */
+       time_t           qi_rtbtimelimit;/* limit for rt blks timer */
+       xfs_qwarncnt_t   qi_bwarnlimit;  /* limit for blks warnings */
+       xfs_qwarncnt_t   qi_iwarnlimit;  /* limit for inodes warnings */
+       xfs_qwarncnt_t   qi_rtbwarnlimit;/* limit for rt blks warnings */
+       struct mutex     qi_quotaofflock;/* to serialize quotaoff */
+       xfs_filblks_t    qi_dqchunklen;  /* # BBs in a chunk of dqs */
+       uint             qi_dqperchunk;  /* # ondisk dqs in above chunk */
+       xfs_qcnt_t       qi_bhardlimit;  /* default data blk hard limit */
+       xfs_qcnt_t       qi_bsoftlimit;  /* default data blk soft limit */
+       xfs_qcnt_t       qi_ihardlimit;  /* default inode count hard limit */
+       xfs_qcnt_t       qi_isoftlimit;  /* default inode count soft limit */
+       xfs_qcnt_t       qi_rtbhardlimit;/* default realtime blk hard limit */
+       xfs_qcnt_t       qi_rtbsoftlimit;/* default realtime blk soft limit */
+} xfs_quotainfo_t;
+
+
+extern void    xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
+extern int     xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
+                       xfs_dquot_t *, xfs_dquot_t *, long, long, uint);
+extern void    xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *);
+extern void    xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *);
+
+/*
+ * We keep the usr and grp dquots separately so that locking will be easier
+ * to do at commit time. All transactions that we know of at this point
+ * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value.
+ */
+#define XFS_QM_TRANS_MAXDQS            2
+typedef struct xfs_dquot_acct {
+       xfs_dqtrx_t     dqa_usrdquots[XFS_QM_TRANS_MAXDQS];
+       xfs_dqtrx_t     dqa_grpdquots[XFS_QM_TRANS_MAXDQS];
+} xfs_dquot_acct_t;
+
+/*
+ * Users are allowed to have a usage exceeding their softlimit for
+ * a period this long.
+ */
+#define XFS_QM_BTIMELIMIT      (7 * 24*60*60)          /* 1 week */
+#define XFS_QM_RTBTIMELIMIT    (7 * 24*60*60)          /* 1 week */
+#define XFS_QM_ITIMELIMIT      (7 * 24*60*60)          /* 1 week */
+
+#define XFS_QM_BWARNLIMIT      5
+#define XFS_QM_IWARNLIMIT      5
+#define XFS_QM_RTBWARNLIMIT    5
+
+extern void            xfs_qm_destroy_quotainfo(xfs_mount_t *);
+extern int             xfs_qm_quotacheck(xfs_mount_t *);
+extern int             xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
+
+/* dquot stuff */
+extern boolean_t       xfs_qm_dqalloc_incore(xfs_dquot_t **);
+extern int             xfs_qm_dqpurge_all(xfs_mount_t *, uint);
+extern void            xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
+
+/* quota ops */
+extern int             xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint);
+extern int             xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint,
+                                       fs_disk_quota_t *);
+extern int             xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint,
+                                       fs_disk_quota_t *);
+extern int             xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
+extern int             xfs_qm_scall_quotaon(xfs_mount_t *, uint);
+extern int             xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
+
+#endif /* __XFS_QM_H__ */
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c

new file mode 100644 (file)

index 0000000..a0a829a
--- /dev/null
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_qm.h"
+
+
+STATIC void
+xfs_fill_statvfs_from_dquot(
+       struct kstatfs          *statp,
+       xfs_disk_dquot_t        *dp)
+{
+       __uint64_t              limit;
+
+       limit = dp->d_blk_softlimit ?
+               be64_to_cpu(dp->d_blk_softlimit) :
+               be64_to_cpu(dp->d_blk_hardlimit);
+       if (limit && statp->f_blocks > limit) {
+               statp->f_blocks = limit;
+               statp->f_bfree = statp->f_bavail =
+                       (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ?
+                        (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0;
+       }
+
+       limit = dp->d_ino_softlimit ?
+               be64_to_cpu(dp->d_ino_softlimit) :
+               be64_to_cpu(dp->d_ino_hardlimit);
+       if (limit && statp->f_files > limit) {
+               statp->f_files = limit;
+               statp->f_ffree =
+                       (statp->f_files > be64_to_cpu(dp->d_icount)) ?
+                        (statp->f_ffree - be64_to_cpu(dp->d_icount)) : 0;
+       }
+}
+
+
+/*
+ * Directory tree accounting is implemented using project quotas, where
+ * the project identifier is inherited from parent directories.
+ * A statvfs (df, etc.) of a directory that is using project quota should
+ * return a statvfs of the project, not the entire filesystem.
+ * This makes such trees appear as if they are filesystems in themselves.
+ */
+void
+xfs_qm_statvfs(
+       xfs_inode_t             *ip,
+       struct kstatfs          *statp)
+{
+       xfs_mount_t             *mp = ip->i_mount;
+       xfs_dquot_t             *dqp;
+
+       if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) {
+               xfs_fill_statvfs_from_dquot(statp, &dqp->q_core);
+               xfs_qm_dqput(dqp);
+       }
+}
+
+int
+xfs_qm_newmount(
+       xfs_mount_t     *mp,
+       uint            *needquotamount,
+       uint            *quotaflags)
+{
+       uint            quotaondisk;
+       uint            uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0;
+
+       quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) &&
+                               (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT);
+
+       if (quotaondisk) {
+               uquotaondisk = mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT;
+               pquotaondisk = mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT;
+               gquotaondisk = mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT;
+       }
+
+       /*
+        * If the device itself is read-only, we can't allow
+        * the user to change the state of quota on the mount -
+        * this would generate a transaction on the ro device,
+        * which would lead to an I/O error and shutdown
+        */
+
+       if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) ||
+           (!uquotaondisk &&  XFS_IS_UQUOTA_ON(mp)) ||
+            (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) ||
+           (!pquotaondisk &&  XFS_IS_PQUOTA_ON(mp)) ||
+            (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) ||
+           (!gquotaondisk &&  XFS_IS_OQUOTA_ON(mp)))  &&
+           xfs_dev_is_read_only(mp, "changing quota state")) {
+               xfs_warn(mp, "please mount with%s%s%s%s.",
+                       (!quotaondisk ? "out quota" : ""),
+                       (uquotaondisk ? " usrquota" : ""),
+                       (pquotaondisk ? " prjquota" : ""),
+                       (gquotaondisk ? " grpquota" : ""));
+               return XFS_ERROR(EPERM);
+       }
+
+       if (XFS_IS_QUOTA_ON(mp) || quotaondisk) {
+               /*
+                * Call mount_quotas at this point only if we won't have to do
+                * a quotacheck.
+                */
+               if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) {
+                       /*
+                        * If an error occurred, qm_mount_quotas code
+                        * has already disabled quotas. So, just finish
+                        * mounting, and get on with the boring life
+                        * without disk quotas.
+                        */
+                       xfs_qm_mount_quotas(mp);
+               } else {
+                       /*
+                        * Clear the quota flags, but remember them. This
+                        * is so that the quota code doesn't get invoked
+                        * before we're ready. This can happen when an
+                        * inode goes inactive and wants to free blocks,
+                        * or via xfs_log_mount_finish.
+                        */
+                       *needquotamount = B_TRUE;
+                       *quotaflags = mp->m_qflags;
+                       mp->m_qflags = 0;
+               }
+       }
+
+       return 0;
+}
+
+void __init
+xfs_qm_init(void)
+{
+       printk(KERN_INFO "SGI XFS Quota Management subsystem\n");
+       mutex_init(&xfs_Gqm_lock);
+       xfs_qm_init_procfs();
+}
+
+void __exit
+xfs_qm_exit(void)
+{
+       xfs_qm_cleanup_procfs();
+       if (qm_dqzone)
+               kmem_zone_destroy(qm_dqzone);
+       if (qm_dqtrxzone)
+               kmem_zone_destroy(qm_dqtrxzone);
+}
diff --git a/fs/xfs/xfs_qm_stats.c b/fs/xfs/xfs_qm_stats.c

new file mode 100644 (file)

index 0000000..8671a0b
--- /dev/null
+++ b/fs/xfs/xfs_qm_stats.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_qm.h"
+
+struct xqmstats xqmstats;
+
+static int xqm_proc_show(struct seq_file *m, void *v)
+{
+       /* maximum; incore; ratio free to inuse; freelist */
+       seq_printf(m, "%d\t%d\t%d\t%u\n",
+                       ndquot,
+                       xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
+                       xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0,
+                       xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
+       return 0;
+}
+
+static int xqm_proc_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, xqm_proc_show, NULL);
+}
+
+static const struct file_operations xqm_proc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = xqm_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+static int xqmstat_proc_show(struct seq_file *m, void *v)
+{
+       /* quota performance statistics */
+       seq_printf(m, "qm %u %u %u %u %u %u %u %u\n",
+                       xqmstats.xs_qm_dqreclaims,
+                       xqmstats.xs_qm_dqreclaim_misses,
+                       xqmstats.xs_qm_dquot_dups,
+                       xqmstats.xs_qm_dqcachemisses,
+                       xqmstats.xs_qm_dqcachehits,
+                       xqmstats.xs_qm_dqwants,
+                       xqmstats.xs_qm_dqshake_reclaims,
+                       xqmstats.xs_qm_dqinact_reclaims);
+       return 0;
+}
+
+static int xqmstat_proc_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, xqmstat_proc_show, NULL);
+}
+
+static const struct file_operations xqmstat_proc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = xqmstat_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+void
+xfs_qm_init_procfs(void)
+{
+       proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops);
+       proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops);
+}
+
+void
+xfs_qm_cleanup_procfs(void)
+{
+       remove_proc_entry("fs/xfs/xqm", NULL);
+       remove_proc_entry("fs/xfs/xqmstat", NULL);
+}
diff --git a/fs/xfs/xfs_qm_stats.h b/fs/xfs/xfs_qm_stats.h

new file mode 100644 (file)

index 0000000..5b964fc
--- /dev/null
+++ b/fs/xfs/xfs_qm_stats.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2002 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_QM_STATS_H__
+#define __XFS_QM_STATS_H__
+
+#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
+
+/*
+ * XQM global statistics
+ */
+struct xqmstats {
+       __uint32_t              xs_qm_dqreclaims;
+       __uint32_t              xs_qm_dqreclaim_misses;
+       __uint32_t              xs_qm_dquot_dups;
+       __uint32_t              xs_qm_dqcachemisses;
+       __uint32_t              xs_qm_dqcachehits;
+       __uint32_t              xs_qm_dqwants;
+       __uint32_t              xs_qm_dqshake_reclaims;
+       __uint32_t              xs_qm_dqinact_reclaims;
+};
+
+extern struct xqmstats xqmstats;
+
+# define XQM_STATS_INC(count)  ( (count)++ )
+
+extern void xfs_qm_init_procfs(void);
+extern void xfs_qm_cleanup_procfs(void);
+
+#else
+
+# define XQM_STATS_INC(count)  do { } while (0)
+
+static inline void xfs_qm_init_procfs(void) { };
+static inline void xfs_qm_cleanup_procfs(void) { };
+
+#endif
+
+#endif /* __XFS_QM_STATS_H__ */
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c

new file mode 100644 (file)

index 0000000..609246f
--- /dev/null
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -0,0 +1,906 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <linux/capability.h>
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_qm.h"
+#include "xfs_trace.h"
+
+STATIC int     xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
+STATIC int     xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
+                                       uint);
+STATIC uint    xfs_qm_export_flags(uint);
+STATIC uint    xfs_qm_export_qtype_flags(uint);
+STATIC void    xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *,
+                                       fs_disk_quota_t *);
+
+
+/*
+ * Turn off quota accounting and/or enforcement for all udquots and/or
+ * gdquots. Called only at unmount time.
+ *
+ * This assumes that there are no dquots of this file system cached
+ * incore, and modifies the ondisk dquot directly. Therefore, for example,
+ * it is an error to call this twice, without purging the cache.
+ */
+int
+xfs_qm_scall_quotaoff(
+       xfs_mount_t             *mp,
+       uint                    flags)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       uint                    dqtype;
+       int                     error;
+       uint                    inactivate_flags;
+       xfs_qoff_logitem_t      *qoffstart;
+       int                     nculprits;
+
+       /*
+        * No file system can have quotas enabled on disk but not in core.
+        * Note that quota utilities (like quotaoff) _expect_
+        * errno == EEXIST here.
+        */
+       if ((mp->m_qflags & flags) == 0)
+               return XFS_ERROR(EEXIST);
+       error = 0;
+
+       flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
+
+       /*
+        * We don't want to deal with two quotaoffs messing up each other,
+        * so we're going to serialize it. quotaoff isn't exactly a performance
+        * critical thing.
+        * If quotaoff, then we must be dealing with the root filesystem.
+        */
+       ASSERT(q);
+       mutex_lock(&q->qi_quotaofflock);
+
+       /*
+        * If we're just turning off quota enforcement, change mp and go.
+        */
+       if ((flags & XFS_ALL_QUOTA_ACCT) == 0) {
+               mp->m_qflags &= ~(flags);
+
+               spin_lock(&mp->m_sb_lock);
+               mp->m_sb.sb_qflags = mp->m_qflags;
+               spin_unlock(&mp->m_sb_lock);
+               mutex_unlock(&q->qi_quotaofflock);
+
+               /* XXX what to do if error ? Revert back to old vals incore ? */
+               error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS);
+               return (error);
+       }
+
+       dqtype = 0;
+       inactivate_flags = 0;
+       /*
+        * If accounting is off, we must turn enforcement off, clear the
+        * quota 'CHKD' certificate to make it known that we have to
+        * do a quotacheck the next time this quota is turned on.
+        */
+       if (flags & XFS_UQUOTA_ACCT) {
+               dqtype |= XFS_QMOPT_UQUOTA;
+               flags |= (XFS_UQUOTA_CHKD | XFS_UQUOTA_ENFD);
+               inactivate_flags |= XFS_UQUOTA_ACTIVE;
+       }
+       if (flags & XFS_GQUOTA_ACCT) {
+               dqtype |= XFS_QMOPT_GQUOTA;
+               flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
+               inactivate_flags |= XFS_GQUOTA_ACTIVE;
+       } else if (flags & XFS_PQUOTA_ACCT) {
+               dqtype |= XFS_QMOPT_PQUOTA;
+               flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
+               inactivate_flags |= XFS_PQUOTA_ACTIVE;
+       }
+
+       /*
+        * Nothing to do?  Don't complain. This happens when we're just
+        * turning off quota enforcement.
+        */
+       if ((mp->m_qflags & flags) == 0)
+               goto out_unlock;
+
+       /*
+        * Write the LI_QUOTAOFF log record, and do SB changes atomically,
+        * and synchronously. If we fail to write, we should abort the
+        * operation as it cannot be recovered safely if we crash.
+        */
+       error = xfs_qm_log_quotaoff(mp, &qoffstart, flags);
+       if (error)
+               goto out_unlock;
+
+       /*
+        * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct
+        * to take care of the race between dqget and quotaoff. We don't take
+        * any special locks to reset these bits. All processes need to check
+        * these bits *after* taking inode lock(s) to see if the particular
+        * quota type is in the process of being turned off. If *ACTIVE, it is
+        * guaranteed that all dquot structures and all quotainode ptrs will all
+        * stay valid as long as that inode is kept locked.
+        *
+        * There is no turning back after this.
+        */
+       mp->m_qflags &= ~inactivate_flags;
+
+       /*
+        * Give back all the dquot reference(s) held by inodes.
+        * Here we go thru every single incore inode in this file system, and
+        * do a dqrele on the i_udquot/i_gdquot that it may have.
+        * Essentially, as long as somebody has an inode locked, this guarantees
+        * that quotas will not be turned off. This is handy because in a
+        * transaction once we lock the inode(s) and check for quotaon, we can
+        * depend on the quota inodes (and other things) being valid as long as
+        * we keep the lock(s).
+        */
+       xfs_qm_dqrele_all_inodes(mp, flags);
+
+       /*
+        * Next we make the changes in the quota flag in the mount struct.
+        * This isn't protected by a particular lock directly, because we
+        * don't want to take a mrlock every time we depend on quotas being on.
+        */
+       mp->m_qflags &= ~(flags);
+
+       /*
+        * Go through all the dquots of this file system and purge them,
+        * according to what was turned off. We may not be able to get rid
+        * of all dquots, because dquots can have temporary references that
+        * are not attached to inodes. eg. xfs_setattr, xfs_create.
+        * So, if we couldn't purge all the dquots from the filesystem,
+        * we can't get rid of the incore data structures.
+        */
+       while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype)))
+               delay(10 * nculprits);
+
+       /*
+        * Transactions that had started before ACTIVE state bit was cleared
+        * could have logged many dquots, so they'd have higher LSNs than
+        * the first QUOTAOFF log record does. If we happen to crash when
+        * the tail of the log has gone past the QUOTAOFF record, but
+        * before the last dquot modification, those dquots __will__
+        * recover, and that's not good.
+        *
+        * So, we have QUOTAOFF start and end logitems; the start
+        * logitem won't get overwritten until the end logitem appears...
+        */
+       error = xfs_qm_log_quotaoff_end(mp, qoffstart, flags);
+       if (error) {
+               /* We're screwed now. Shutdown is the only option. */
+               xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+               goto out_unlock;
+       }
+
+       /*
+        * If quotas is completely disabled, close shop.
+        */
+       if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) ||
+           ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) {
+               mutex_unlock(&q->qi_quotaofflock);
+               xfs_qm_destroy_quotainfo(mp);
+               return (0);
+       }
+
+       /*
+        * Release our quotainode references if we don't need them anymore.
+        */
+       if ((dqtype & XFS_QMOPT_UQUOTA) && q->qi_uquotaip) {
+               IRELE(q->qi_uquotaip);
+               q->qi_uquotaip = NULL;
+       }
+       if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && q->qi_gquotaip) {
+               IRELE(q->qi_gquotaip);
+               q->qi_gquotaip = NULL;
+       }
+
+out_unlock:
+       mutex_unlock(&q->qi_quotaofflock);
+       return error;
+}
+
+STATIC int
+xfs_qm_scall_trunc_qfile(
+       struct xfs_mount        *mp,
+       xfs_ino_t               ino)
+{
+       struct xfs_inode        *ip;
+       struct xfs_trans        *tp;
+       int                     error;
+
+       if (ino == NULLFSINO)
+               return 0;
+
+       error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
+       if (error)
+               return error;
+
+       xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
+       error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
+                                 XFS_TRANS_PERM_LOG_RES,
+                                 XFS_ITRUNCATE_LOG_COUNT);
+       if (error) {
+               xfs_trans_cancel(tp, 0);
+               xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+               goto out_put;
+       }
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, ip);
+
+       error = xfs_itruncate_data(&tp, ip, 0);
+       if (error) {
+               xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
+                                    XFS_TRANS_ABORT);
+               goto out_unlock;
+       }
+
+       xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+
+out_unlock:
+       xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+out_put:
+       IRELE(ip);
+       return error;
+}
+
+int
+xfs_qm_scall_trunc_qfiles(
+       xfs_mount_t     *mp,
+       uint            flags)
+{
+       int             error = 0, error2 = 0;
+
+       if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
+               xfs_debug(mp, "%s: flags=%x m_qflags=%x\n",
+                       __func__, flags, mp->m_qflags);
+               return XFS_ERROR(EINVAL);
+       }
+
+       if (flags & XFS_DQ_USER)
+               error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino);
+       if (flags & (XFS_DQ_GROUP|XFS_DQ_PROJ))
+               error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino);
+
+       return error ? error : error2;
+}
+
+/*
+ * Switch on (a given) quota enforcement for a filesystem.  This takes
+ * effect immediately.
+ * (Switching on quota accounting must be done at mount time.)
+ */
+int
+xfs_qm_scall_quotaon(
+       xfs_mount_t     *mp,
+       uint            flags)
+{
+       int             error;
+       uint            qf;
+       __int64_t       sbflags;
+
+       flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
+       /*
+        * Switching on quota accounting must be done at mount time.
+        */
+       flags &= ~(XFS_ALL_QUOTA_ACCT);
+
+       sbflags = 0;
+
+       if (flags == 0) {
+               xfs_debug(mp, "%s: zero flags, m_qflags=%x\n",
+                       __func__, mp->m_qflags);
+               return XFS_ERROR(EINVAL);
+       }
+
+       /* No fs can turn on quotas with a delayed effect */
+       ASSERT((flags & XFS_ALL_QUOTA_ACCT) == 0);
+
+       /*
+        * Can't enforce without accounting. We check the superblock
+        * qflags here instead of m_qflags because rootfs can have
+        * quota acct on ondisk without m_qflags' knowing.
+        */
+       if (((flags & XFS_UQUOTA_ACCT) == 0 &&
+           (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 &&
+           (flags & XFS_UQUOTA_ENFD))
+           ||
+           ((flags & XFS_PQUOTA_ACCT) == 0 &&
+           (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 &&
+           (flags & XFS_GQUOTA_ACCT) == 0 &&
+           (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 &&
+           (flags & XFS_OQUOTA_ENFD))) {
+               xfs_debug(mp,
+                       "%s: Can't enforce without acct, flags=%x sbflags=%x\n",
+                       __func__, flags, mp->m_sb.sb_qflags);
+               return XFS_ERROR(EINVAL);
+       }
+       /*
+        * If everything's up to-date incore, then don't waste time.
+        */
+       if ((mp->m_qflags & flags) == flags)
+               return XFS_ERROR(EEXIST);
+
+       /*
+        * Change sb_qflags on disk but not incore mp->qflags
+        * if this is the root filesystem.
+        */
+       spin_lock(&mp->m_sb_lock);
+       qf = mp->m_sb.sb_qflags;
+       mp->m_sb.sb_qflags = qf | flags;
+       spin_unlock(&mp->m_sb_lock);
+
+       /*
+        * There's nothing to change if it's the same.
+        */
+       if ((qf & flags) == flags && sbflags == 0)
+               return XFS_ERROR(EEXIST);
+       sbflags |= XFS_SB_QFLAGS;
+
+       if ((error = xfs_qm_write_sb_changes(mp, sbflags)))
+               return (error);
+       /*
+        * If we aren't trying to switch on quota enforcement, we are done.
+        */
+       if  (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) !=
+            (mp->m_qflags & XFS_UQUOTA_ACCT)) ||
+            ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) !=
+            (mp->m_qflags & XFS_PQUOTA_ACCT)) ||
+            ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) !=
+            (mp->m_qflags & XFS_GQUOTA_ACCT)) ||
+           (flags & XFS_ALL_QUOTA_ENFD) == 0)
+               return (0);
+
+       if (! XFS_IS_QUOTA_RUNNING(mp))
+               return XFS_ERROR(ESRCH);
+
+       /*
+        * Switch on quota enforcement in core.
+        */
+       mutex_lock(&mp->m_quotainfo->qi_quotaofflock);
+       mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD);
+       mutex_unlock(&mp->m_quotainfo->qi_quotaofflock);
+
+       return (0);
+}
+
+
+/*
+ * Return quota status information, such as uquota-off, enforcements, etc.
+ */
+int
+xfs_qm_scall_getqstat(
+       struct xfs_mount        *mp,
+       struct fs_quota_stat    *out)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       struct xfs_inode        *uip, *gip;
+       boolean_t               tempuqip, tempgqip;
+
+       uip = gip = NULL;
+       tempuqip = tempgqip = B_FALSE;
+       memset(out, 0, sizeof(fs_quota_stat_t));
+
+       out->qs_version = FS_QSTAT_VERSION;
+       if (!xfs_sb_version_hasquota(&mp->m_sb)) {
+               out->qs_uquota.qfs_ino = NULLFSINO;
+               out->qs_gquota.qfs_ino = NULLFSINO;
+               return (0);
+       }
+       out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
+                                                       (XFS_ALL_QUOTA_ACCT|
+                                                        XFS_ALL_QUOTA_ENFD));
+       out->qs_pad = 0;
+       out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
+       out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
+
+       if (q) {
+               uip = q->qi_uquotaip;
+               gip = q->qi_gquotaip;
+       }
+       if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
+               if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
+                                       0, 0, &uip) == 0)
+                       tempuqip = B_TRUE;
+       }
+       if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
+               if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
+                                       0, 0, &gip) == 0)
+                       tempgqip = B_TRUE;
+       }
+       if (uip) {
+               out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
+               out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
+               if (tempuqip)
+                       IRELE(uip);
+       }
+       if (gip) {
+               out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
+               out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
+               if (tempgqip)
+                       IRELE(gip);
+       }
+       if (q) {
+               out->qs_incoredqs = q->qi_dquots;
+               out->qs_btimelimit = q->qi_btimelimit;
+               out->qs_itimelimit = q->qi_itimelimit;
+               out->qs_rtbtimelimit = q->qi_rtbtimelimit;
+               out->qs_bwarnlimit = q->qi_bwarnlimit;
+               out->qs_iwarnlimit = q->qi_iwarnlimit;
+       }
+       return 0;
+}
+
+#define XFS_DQ_MASK \
+       (FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK)
+
+/*
+ * Adjust quota limits, and start/stop timers accordingly.
+ */
+int
+xfs_qm_scall_setqlim(
+       xfs_mount_t             *mp,
+       xfs_dqid_t              id,
+       uint                    type,
+       fs_disk_quota_t         *newlim)
+{
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       xfs_disk_dquot_t        *ddq;
+       xfs_dquot_t             *dqp;
+       xfs_trans_t             *tp;
+       int                     error;
+       xfs_qcnt_t              hard, soft;
+
+       if (newlim->d_fieldmask & ~XFS_DQ_MASK)
+               return EINVAL;
+       if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0)
+               return 0;
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
+       if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128,
+                                     0, 0, XFS_DEFAULT_LOG_COUNT))) {
+               xfs_trans_cancel(tp, 0);
+               return (error);
+       }
+
+       /*
+        * We don't want to race with a quotaoff so take the quotaoff lock.
+        * (We don't hold an inode lock, so there's nothing else to stop
+        * a quotaoff from happening). (XXXThis doesn't currently happen
+        * because we take the vfslock before calling xfs_qm_sysent).
+        */
+       mutex_lock(&q->qi_quotaofflock);
+
+       /*
+        * Get the dquot (locked), and join it to the transaction.
+        * Allocate the dquot if this doesn't exist.
+        */
+       if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) {
+               xfs_trans_cancel(tp, XFS_TRANS_ABORT);
+               ASSERT(error != ENOENT);
+               goto out_unlock;
+       }
+       xfs_trans_dqjoin(tp, dqp);
+       ddq = &dqp->q_core;
+
+       /*
+        * Make sure that hardlimits are >= soft limits before changing.
+        */
+       hard = (newlim->d_fieldmask & FS_DQ_BHARD) ?
+               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) :
+                       be64_to_cpu(ddq->d_blk_hardlimit);
+       soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ?
+               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) :
+                       be64_to_cpu(ddq->d_blk_softlimit);
+       if (hard == 0 || hard >= soft) {
+               ddq->d_blk_hardlimit = cpu_to_be64(hard);
+               ddq->d_blk_softlimit = cpu_to_be64(soft);
+               if (id == 0) {
+                       q->qi_bhardlimit = hard;
+                       q->qi_bsoftlimit = soft;
+               }
+       } else {
+               xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft);
+       }
+       hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ?
+               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) :
+                       be64_to_cpu(ddq->d_rtb_hardlimit);
+       soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ?
+               (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) :
+                       be64_to_cpu(ddq->d_rtb_softlimit);
+       if (hard == 0 || hard >= soft) {
+               ddq->d_rtb_hardlimit = cpu_to_be64(hard);
+               ddq->d_rtb_softlimit = cpu_to_be64(soft);
+               if (id == 0) {
+                       q->qi_rtbhardlimit = hard;
+                       q->qi_rtbsoftlimit = soft;
+               }
+       } else {
+               xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft);
+       }
+
+       hard = (newlim->d_fieldmask & FS_DQ_IHARD) ?
+               (xfs_qcnt_t) newlim->d_ino_hardlimit :
+                       be64_to_cpu(ddq->d_ino_hardlimit);
+       soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ?
+               (xfs_qcnt_t) newlim->d_ino_softlimit :
+                       be64_to_cpu(ddq->d_ino_softlimit);
+       if (hard == 0 || hard >= soft) {
+               ddq->d_ino_hardlimit = cpu_to_be64(hard);
+               ddq->d_ino_softlimit = cpu_to_be64(soft);
+               if (id == 0) {
+                       q->qi_ihardlimit = hard;
+                       q->qi_isoftlimit = soft;
+               }
+       } else {
+               xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft);
+       }
+
+       /*
+        * Update warnings counter(s) if requested
+        */
+       if (newlim->d_fieldmask & FS_DQ_BWARNS)
+               ddq->d_bwarns = cpu_to_be16(newlim->d_bwarns);
+       if (newlim->d_fieldmask & FS_DQ_IWARNS)
+               ddq->d_iwarns = cpu_to_be16(newlim->d_iwarns);
+       if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
+               ddq->d_rtbwarns = cpu_to_be16(newlim->d_rtbwarns);
+
+       if (id == 0) {
+               /*
+                * Timelimits for the super user set the relative time
+                * the other users can be over quota for this file system.
+                * If it is zero a default is used.  Ditto for the default
+                * soft and hard limit values (already done, above), and
+                * for warnings.
+                */
+               if (newlim->d_fieldmask & FS_DQ_BTIMER) {
+                       q->qi_btimelimit = newlim->d_btimer;
+                       ddq->d_btimer = cpu_to_be32(newlim->d_btimer);
+               }
+               if (newlim->d_fieldmask & FS_DQ_ITIMER) {
+                       q->qi_itimelimit = newlim->d_itimer;
+                       ddq->d_itimer = cpu_to_be32(newlim->d_itimer);
+               }
+               if (newlim->d_fieldmask & FS_DQ_RTBTIMER) {
+                       q->qi_rtbtimelimit = newlim->d_rtbtimer;
+                       ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer);
+               }
+               if (newlim->d_fieldmask & FS_DQ_BWARNS)
+                       q->qi_bwarnlimit = newlim->d_bwarns;
+               if (newlim->d_fieldmask & FS_DQ_IWARNS)
+                       q->qi_iwarnlimit = newlim->d_iwarns;
+               if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
+                       q->qi_rtbwarnlimit = newlim->d_rtbwarns;
+       } else {
+               /*
+                * If the user is now over quota, start the timelimit.
+                * The user will not be 'warned'.
+                * Note that we keep the timers ticking, whether enforcement
+                * is on or off. We don't really want to bother with iterating
+                * over all ondisk dquots and turning the timers on/off.
+                */
+               xfs_qm_adjust_dqtimers(mp, ddq);
+       }
+       dqp->dq_flags |= XFS_DQ_DIRTY;
+       xfs_trans_log_dquot(tp, dqp);
+
+       error = xfs_trans_commit(tp, 0);
+       xfs_qm_dqrele(dqp);
+
+ out_unlock:
+       mutex_unlock(&q->qi_quotaofflock);
+       return error;
+}
+
+int
+xfs_qm_scall_getquota(
+       xfs_mount_t     *mp,
+       xfs_dqid_t      id,
+       uint            type,
+       fs_disk_quota_t *out)
+{
+       xfs_dquot_t     *dqp;
+       int             error;
+
+       /*
+        * Try to get the dquot. We don't want it allocated on disk, so
+        * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
+        * exist, we'll get ENOENT back.
+        */
+       if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) {
+               return (error);
+       }
+
+       /*
+        * If everything's NULL, this dquot doesn't quite exist as far as
+        * our utility programs are concerned.
+        */
+       if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
+               xfs_qm_dqput(dqp);
+               return XFS_ERROR(ENOENT);
+       }
+       /*
+        * Convert the disk dquot to the exportable format
+        */
+       xfs_qm_export_dquot(mp, &dqp->q_core, out);
+       xfs_qm_dqput(dqp);
+       return (error ? XFS_ERROR(EFAULT) : 0);
+}
+
+
+STATIC int
+xfs_qm_log_quotaoff_end(
+       xfs_mount_t             *mp,
+       xfs_qoff_logitem_t      *startqoff,
+       uint                    flags)
+{
+       xfs_trans_t             *tp;
+       int                     error;
+       xfs_qoff_logitem_t      *qoffi;
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
+
+       if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2,
+                                     0, 0, XFS_DEFAULT_LOG_COUNT))) {
+               xfs_trans_cancel(tp, 0);
+               return (error);
+       }
+
+       qoffi = xfs_trans_get_qoff_item(tp, startqoff,
+                                       flags & XFS_ALL_QUOTA_ACCT);
+       xfs_trans_log_quotaoff_item(tp, qoffi);
+
+       /*
+        * We have to make sure that the transaction is secure on disk before we
+        * return and actually stop quota accounting. So, make it synchronous.
+        * We don't care about quotoff's performance.
+        */
+       xfs_trans_set_sync(tp);
+       error = xfs_trans_commit(tp, 0);
+       return (error);
+}
+
+
+STATIC int
+xfs_qm_log_quotaoff(
+       xfs_mount_t            *mp,
+       xfs_qoff_logitem_t     **qoffstartp,
+       uint                   flags)
+{
+       xfs_trans_t            *tp;
+       int                     error;
+       xfs_qoff_logitem_t     *qoffi=NULL;
+       uint                    oldsbqflag=0;
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
+       if ((error = xfs_trans_reserve(tp, 0,
+                                     sizeof(xfs_qoff_logitem_t) * 2 +
+                                     mp->m_sb.sb_sectsize + 128,
+                                     0,
+                                     0,
+                                     XFS_DEFAULT_LOG_COUNT))) {
+               goto error0;
+       }
+
+       qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
+       xfs_trans_log_quotaoff_item(tp, qoffi);
+
+       spin_lock(&mp->m_sb_lock);
+       oldsbqflag = mp->m_sb.sb_qflags;
+       mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL;
+       spin_unlock(&mp->m_sb_lock);
+
+       xfs_mod_sb(tp, XFS_SB_QFLAGS);
+
+       /*
+        * We have to make sure that the transaction is secure on disk before we
+        * return and actually stop quota accounting. So, make it synchronous.
+        * We don't care about quotoff's performance.
+        */
+       xfs_trans_set_sync(tp);
+       error = xfs_trans_commit(tp, 0);
+
+error0:
+       if (error) {
+               xfs_trans_cancel(tp, 0);
+               /*
+                * No one else is modifying sb_qflags, so this is OK.
+                * We still hold the quotaofflock.
+                */
+               spin_lock(&mp->m_sb_lock);
+               mp->m_sb.sb_qflags = oldsbqflag;
+               spin_unlock(&mp->m_sb_lock);
+       }
+       *qoffstartp = qoffi;
+       return (error);
+}
+
+
+/*
+ * Translate an internal style on-disk-dquot to the exportable format.
+ * The main differences are that the counters/limits are all in Basic
+ * Blocks (BBs) instead of the internal FSBs, and all on-disk data has
+ * to be converted to the native endianness.
+ */
+STATIC void
+xfs_qm_export_dquot(
+       xfs_mount_t             *mp,
+       xfs_disk_dquot_t        *src,
+       struct fs_disk_quota    *dst)
+{
+       memset(dst, 0, sizeof(*dst));
+       dst->d_version = FS_DQUOT_VERSION;  /* different from src->d_version */
+       dst->d_flags = xfs_qm_export_qtype_flags(src->d_flags);
+       dst->d_id = be32_to_cpu(src->d_id);
+       dst->d_blk_hardlimit =
+               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_hardlimit));
+       dst->d_blk_softlimit =
+               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_softlimit));
+       dst->d_ino_hardlimit = be64_to_cpu(src->d_ino_hardlimit);
+       dst->d_ino_softlimit = be64_to_cpu(src->d_ino_softlimit);
+       dst->d_bcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_bcount));
+       dst->d_icount = be64_to_cpu(src->d_icount);
+       dst->d_btimer = be32_to_cpu(src->d_btimer);
+       dst->d_itimer = be32_to_cpu(src->d_itimer);
+       dst->d_iwarns = be16_to_cpu(src->d_iwarns);
+       dst->d_bwarns = be16_to_cpu(src->d_bwarns);
+       dst->d_rtb_hardlimit =
+               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_hardlimit));
+       dst->d_rtb_softlimit =
+               XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_softlimit));
+       dst->d_rtbcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtbcount));
+       dst->d_rtbtimer = be32_to_cpu(src->d_rtbtimer);
+       dst->d_rtbwarns = be16_to_cpu(src->d_rtbwarns);
+
+       /*
+        * Internally, we don't reset all the timers when quota enforcement
+        * gets turned off. No need to confuse the user level code,
+        * so return zeroes in that case.
+        */
+       if ((!XFS_IS_UQUOTA_ENFORCED(mp) && src->d_flags == XFS_DQ_USER) ||
+           (!XFS_IS_OQUOTA_ENFORCED(mp) &&
+                       (src->d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) {
+               dst->d_btimer = 0;
+               dst->d_itimer = 0;
+               dst->d_rtbtimer = 0;
+       }
+
+#ifdef DEBUG
+       if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) ||
+            (XFS_IS_OQUOTA_ENFORCED(mp) &&
+                       (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) &&
+           dst->d_id != 0) {
+               if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) &&
+                   (dst->d_blk_softlimit > 0)) {
+                       ASSERT(dst->d_btimer != 0);
+               }
+               if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) &&
+                   (dst->d_ino_softlimit > 0)) {
+                       ASSERT(dst->d_itimer != 0);
+               }
+       }
+#endif
+}
+
+STATIC uint
+xfs_qm_export_qtype_flags(
+       uint flags)
+{
+       /*
+        * Can't be more than one, or none.
+        */
+       ASSERT((flags & (FS_PROJ_QUOTA | FS_USER_QUOTA)) !=
+               (FS_PROJ_QUOTA | FS_USER_QUOTA));
+       ASSERT((flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)) !=
+               (FS_PROJ_QUOTA | FS_GROUP_QUOTA));
+       ASSERT((flags & (FS_USER_QUOTA | FS_GROUP_QUOTA)) !=
+               (FS_USER_QUOTA | FS_GROUP_QUOTA));
+       ASSERT((flags & (FS_PROJ_QUOTA|FS_USER_QUOTA|FS_GROUP_QUOTA)) != 0);
+
+       return (flags & XFS_DQ_USER) ?
+               FS_USER_QUOTA : (flags & XFS_DQ_PROJ) ?
+                       FS_PROJ_QUOTA : FS_GROUP_QUOTA;
+}
+
+STATIC uint
+xfs_qm_export_flags(
+       uint flags)
+{
+       uint uflags;
+
+       uflags = 0;
+       if (flags & XFS_UQUOTA_ACCT)
+               uflags |= FS_QUOTA_UDQ_ACCT;
+       if (flags & XFS_PQUOTA_ACCT)
+               uflags |= FS_QUOTA_PDQ_ACCT;
+       if (flags & XFS_GQUOTA_ACCT)
+               uflags |= FS_QUOTA_GDQ_ACCT;
+       if (flags & XFS_UQUOTA_ENFD)
+               uflags |= FS_QUOTA_UDQ_ENFD;
+       if (flags & (XFS_OQUOTA_ENFD)) {
+               uflags |= (flags & XFS_GQUOTA_ACCT) ?
+                       FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD;
+       }
+       return (uflags);
+}
+
+
+STATIC int
+xfs_dqrele_inode(
+       struct xfs_inode        *ip,
+       struct xfs_perag        *pag,
+       int                     flags)
+{
+       /* skip quota inodes */
+       if (ip == ip->i_mount->m_quotainfo->qi_uquotaip ||
+           ip == ip->i_mount->m_quotainfo->qi_gquotaip) {
+               ASSERT(ip->i_udquot == NULL);
+               ASSERT(ip->i_gdquot == NULL);
+               return 0;
+       }
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
+               xfs_qm_dqrele(ip->i_udquot);
+               ip->i_udquot = NULL;
+       }
+       if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) {
+               xfs_qm_dqrele(ip->i_gdquot);
+               ip->i_gdquot = NULL;
+       }
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       return 0;
+}
+
+
+/*
+ * Go thru all the inodes in the file system, releasing their dquots.
+ *
+ * Note that the mount structure gets modified to indicate that quotas are off
+ * AFTER this, in the case of quotaoff.
+ */
+void
+xfs_qm_dqrele_all_inodes(
+       struct xfs_mount *mp,
+       uint             flags)
+{
+       ASSERT(mp->m_quotainfo);
+       xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
+}
diff --git a/fs/xfs/xfs_quota_priv.h b/fs/xfs/xfs_quota_priv.h

new file mode 100644 (file)

index 0000000..94a3d92
--- /dev/null
+++ b/fs/xfs/xfs_quota_priv.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_QUOTA_PRIV_H__
+#define __XFS_QUOTA_PRIV_H__
+
+/*
+ * Number of bmaps that we ask from bmapi when doing a quotacheck.
+ * We make this restriction to keep the memory usage to a minimum.
+ */
+#define XFS_DQITER_MAP_SIZE    10
+
+/*
+ * Hash into a bucket in the dquot hash table, based on <mp, id>.
+ */
+#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
+                                (__psunsigned_t)(id)) & \
+                               (xfs_Gqm->qm_dqhashmask - 1))
+#define XFS_DQ_HASH(mp, id, type)   (type == XFS_DQ_USER ? \
+                                    (xfs_Gqm->qm_usr_dqhtable + \
+                                     XFS_DQ_HASHVAL(mp, id)) : \
+                                    (xfs_Gqm->qm_grp_dqhtable + \
+                                     XFS_DQ_HASHVAL(mp, id)))
+#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
+       !dqp->q_core.d_blk_hardlimit && \
+       !dqp->q_core.d_blk_softlimit && \
+       !dqp->q_core.d_rtb_hardlimit && \
+       !dqp->q_core.d_rtb_softlimit && \
+       !dqp->q_core.d_ino_hardlimit && \
+       !dqp->q_core.d_ino_softlimit && \
+       !dqp->q_core.d_bcount && \
+       !dqp->q_core.d_rtbcount && \
+       !dqp->q_core.d_icount)
+
+#define DQFLAGTO_TYPESTR(d)    (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
+                                (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
+                                (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
+
+#endif /* __XFS_QUOTA_PRIV_H__ */
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c

new file mode 100644 (file)

index 0000000..7e76f53
--- /dev/null
+++ b/fs/xfs/xfs_quotaops.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2008, Christoph Hellwig
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_sb.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_quota.h"
+#include "xfs_trans.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_qm.h"
+#include <linux/quota.h>
+
+
+STATIC int
+xfs_quota_type(int type)
+{
+       switch (type) {
+       case USRQUOTA:
+               return XFS_DQ_USER;
+       case GRPQUOTA:
+               return XFS_DQ_GROUP;
+       default:
+               return XFS_DQ_PROJ;
+       }
+}
+
+STATIC int
+xfs_fs_get_xstate(
+       struct super_block      *sb,
+       struct fs_quota_stat    *fqs)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+
+       if (!XFS_IS_QUOTA_RUNNING(mp))
+               return -ENOSYS;
+       return -xfs_qm_scall_getqstat(mp, fqs);
+}
+
+STATIC int
+xfs_fs_set_xstate(
+       struct super_block      *sb,
+       unsigned int            uflags,
+       int                     op)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+       unsigned int            flags = 0;
+
+       if (sb->s_flags & MS_RDONLY)
+               return -EROFS;
+       if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp))
+               return -ENOSYS;
+
+       if (uflags & FS_QUOTA_UDQ_ACCT)
+               flags |= XFS_UQUOTA_ACCT;
+       if (uflags & FS_QUOTA_PDQ_ACCT)
+               flags |= XFS_PQUOTA_ACCT;
+       if (uflags & FS_QUOTA_GDQ_ACCT)
+               flags |= XFS_GQUOTA_ACCT;
+       if (uflags & FS_QUOTA_UDQ_ENFD)
+               flags |= XFS_UQUOTA_ENFD;
+       if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD))
+               flags |= XFS_OQUOTA_ENFD;
+
+       switch (op) {
+       case Q_XQUOTAON:
+               return -xfs_qm_scall_quotaon(mp, flags);
+       case Q_XQUOTAOFF:
+               if (!XFS_IS_QUOTA_ON(mp))
+                       return -EINVAL;
+               return -xfs_qm_scall_quotaoff(mp, flags);
+       case Q_XQUOTARM:
+               if (XFS_IS_QUOTA_ON(mp))
+                       return -EINVAL;
+               return -xfs_qm_scall_trunc_qfiles(mp, flags);
+       }
+
+       return -EINVAL;
+}
+
+STATIC int
+xfs_fs_get_dqblk(
+       struct super_block      *sb,
+       int                     type,
+       qid_t                   id,
+       struct fs_disk_quota    *fdq)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+
+       if (!XFS_IS_QUOTA_RUNNING(mp))
+               return -ENOSYS;
+       if (!XFS_IS_QUOTA_ON(mp))
+               return -ESRCH;
+
+       return -xfs_qm_scall_getquota(mp, id, xfs_quota_type(type), fdq);
+}
+
+STATIC int
+xfs_fs_set_dqblk(
+       struct super_block      *sb,
+       int                     type,
+       qid_t                   id,
+       struct fs_disk_quota    *fdq)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+
+       if (sb->s_flags & MS_RDONLY)
+               return -EROFS;
+       if (!XFS_IS_QUOTA_RUNNING(mp))
+               return -ENOSYS;
+       if (!XFS_IS_QUOTA_ON(mp))
+               return -ESRCH;
+
+       return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq);
+}
+
+const struct quotactl_ops xfs_quotactl_operations = {
+       .get_xstate             = xfs_fs_get_xstate,
+       .set_xstate             = xfs_fs_set_xstate,
+       .get_dqblk              = xfs_fs_get_dqblk,
+       .set_dqblk              = xfs_fs_set_dqblk,
+};
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c

index 8f76fdff4f4688484fac811a23f97cf23f91d23a..35561a511b578c6adf7d7cad469f3c6a71c5f612 100644 (file)
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -168,7 +168,7 @@ error_cancel:
                                 xfs_trans_cancel(tp, cancelflags);
                                 goto error;
                         }
-                       memset(XFS_BUF_PTR(bp), 0, mp->m_sb.sb_blocksize);
+                       memset(bp->b_addr, 0, mp->m_sb.sb_blocksize);
                         xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
                         /*
                          * Commit the transaction.
@@ -883,7 +883,7 @@ xfs_rtbuf_get(
         if (error) {
                 return error;
         }
-       ASSERT(bp && !XFS_BUF_GETERROR(bp));
+       ASSERT(!xfs_buf_geterror(bp));
         *bpp = bp;
         return 0;
  }
@@ -943,7 +943,7 @@ xfs_rtcheck_range(
         if (error) {
                 return error;
         }
-       bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+       bufp = bp->b_addr;
         /*
          * Compute the starting word's address, and starting bit.
          */
@@ -994,7 +994,7 @@ xfs_rtcheck_range(
                         if (error) {
                                 return error;
                         }
-                       b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+                       b = bufp = bp->b_addr;
                         word = 0;
                 } else {
                         /*
@@ -1040,7 +1040,7 @@ xfs_rtcheck_range(
                         if (error) {
                                 return error;
                         }
-                       b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+                       b = bufp = bp->b_addr;
                         word = 0;
                 } else {
                         /*
@@ -1158,7 +1158,7 @@ xfs_rtfind_back(
         if (error) {
                 return error;
         }
-       bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+       bufp = bp->b_addr;
         /*
          * Get the first word's index & point to it.
          */
@@ -1210,7 +1210,7 @@ xfs_rtfind_back(
                         if (error) {
                                 return error;
                         }
-                       bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+                       bufp = bp->b_addr;
                         word = XFS_BLOCKWMASK(mp);
                         b = &bufp[word];
                 } else {
@@ -1256,7 +1256,7 @@ xfs_rtfind_back(
                         if (error) {
                                 return error;
                         }
-                       bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+                       bufp = bp->b_addr;
                         word = XFS_BLOCKWMASK(mp);
                         b = &bufp[word];
                 } else {
@@ -1333,7 +1333,7 @@ xfs_rtfind_forw(
         if (error) {
                 return error;
         }
-       bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+       bufp = bp->b_addr;
         /*
          * Get the first word's index & point to it.
          */
@@ -1384,7 +1384,7 @@ xfs_rtfind_forw(
                         if (error) {
                                 return error;
                         }
-                       b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+                       b = bufp = bp->b_addr;
                         word = 0;
                 } else {
                         /*
@@ -1429,7 +1429,7 @@ xfs_rtfind_forw(
                         if (error) {
                                 return error;
                         }
-                       b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+                       b = bufp = bp->b_addr;
                         word = 0;
                 } else {
                         /*
@@ -1649,7 +1649,7 @@ xfs_rtmodify_range(
         if (error) {
                 return error;
         }
-       bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+       bufp = bp->b_addr;
         /*
          * Compute the starting word's address, and starting bit.
          */
@@ -1694,7 +1694,7 @@ xfs_rtmodify_range(
                         if (error) {
                                 return error;
                         }
-                       first = b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+                       first = b = bufp = bp->b_addr;
                         word = 0;
                 } else {
                         /*
@@ -1734,7 +1734,7 @@ xfs_rtmodify_range(
                         if (error) {
                                 return error;
                         }
-                       first = b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+                       first = b = bufp = bp->b_addr;
                         word = 0;
                 } else {
                         /*
@@ -1832,8 +1832,8 @@ xfs_rtmodify_summary(
          */
         sp = XFS_SUMPTR(mp, bp, so);
         *sp += delta;
-       xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)XFS_BUF_PTR(bp)),
-               (uint)((char *)sp - (char *)XFS_BUF_PTR(bp) + sizeof(*sp) - 1));
+       xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr),
+               (uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1));
         return 0;
  }
  
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h

index 09e1f4f35e971c0e1edf326cfeca9d21f67a6953..f7f3a359c1c5a238afd4884b19f4a74363e36287 100644 (file)
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -47,7 +47,7 @@ struct xfs_trans;
  #define        XFS_SUMOFFSTOBLOCK(mp,s)        \
         (((s) * (uint)sizeof(xfs_suminfo_t)) >> (mp)->m_sb.sb_blocklog)
  #define        XFS_SUMPTR(mp,bp,so)    \
-       ((xfs_suminfo_t *)((char *)XFS_BUF_PTR(bp) + \
+       ((xfs_suminfo_t *)((bp)->b_addr + \
                 (((so) * (uint)sizeof(xfs_suminfo_t)) & XFS_BLOCKMASK(mp))))
  
  #define        XFS_BITTOBLOCK(mp,bi)   ((bi) >> (mp)->m_blkbit_log)
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c

index d6d6fdfe9422b47545de1bbeb9e6713c67be160b..c96a8a05ac038ef153f008936e8b9f153c693d1e 100644 (file)
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -104,9 +104,9 @@ xfs_ioerror_alert(
         xfs_alert(mp,
                  "I/O error occurred: meta-data dev %s block 0x%llx"
                  "       (\"%s\") error %d buf count %zd",
-               XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
+               xfs_buf_target_name(bp->b_target),
                 (__uint64_t)blkno, func,
-               XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp));
+               bp->b_error, XFS_BUF_COUNT(bp));
  }
  
  /*
@@ -137,8 +137,8 @@ xfs_read_buf(
         bp = xfs_buf_read(target, blkno, len, flags);
         if (!bp)
                 return XFS_ERROR(EIO);
-       error = XFS_BUF_GETERROR(bp);
-       if (bp && !error && !XFS_FORCED_SHUTDOWN(mp)) {
+       error = bp->b_error;
+       if (!error && !XFS_FORCED_SHUTDOWN(mp)) {
                 *bpp = bp;
         } else {
                 *bpp = NULL;
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h

index 1eb2ba586814c36d6dbbb348d24510bea61e1fab..cb6ae715814a8f87026b26f701abe419aa3ed6c7 100644 (file)
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -509,7 +509,7 @@ static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp)
  
  #define XFS_SB_DADDR           ((xfs_daddr_t)0) /* daddr in filesystem/ag */
  #define        XFS_SB_BLOCK(mp)        XFS_HDR_BLOCK(mp, XFS_SB_DADDR)
-#define XFS_BUF_TO_SBP(bp)     ((xfs_dsb_t *)XFS_BUF_PTR(bp))
+#define XFS_BUF_TO_SBP(bp)     ((xfs_dsb_t *)((bp)->b_addr))
  
  #define        XFS_HDR_BLOCK(mp,d)     ((xfs_agblock_t)XFS_BB_TO_FSBT(mp,d))
  #define        XFS_DADDR_TO_FSB(mp,d)  XFS_AGB_TO_FSB(mp, \
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c

new file mode 100644 (file)

index 0000000..76fdc58
--- /dev/null
+++ b/fs/xfs/xfs_stats.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include <linux/proc_fs.h>
+
+DEFINE_PER_CPU(struct xfsstats, xfsstats);
+
+static int xfs_stat_proc_show(struct seq_file *m, void *v)
+{
+       int             c, i, j, val;
+       __uint64_t      xs_xstrat_bytes = 0;
+       __uint64_t      xs_write_bytes = 0;
+       __uint64_t      xs_read_bytes = 0;
+
+       static const struct xstats_entry {
+               char    *desc;
+               int     endpoint;
+       } xstats[] = {
+               { "extent_alloc",       XFSSTAT_END_EXTENT_ALLOC        },
+               { "abt",                XFSSTAT_END_ALLOC_BTREE         },
+               { "blk_map",            XFSSTAT_END_BLOCK_MAPPING       },
+               { "bmbt",               XFSSTAT_END_BLOCK_MAP_BTREE     },
+               { "dir",                XFSSTAT_END_DIRECTORY_OPS       },
+               { "trans",              XFSSTAT_END_TRANSACTIONS        },
+               { "ig",                 XFSSTAT_END_INODE_OPS           },
+               { "log",                XFSSTAT_END_LOG_OPS             },
+               { "push_ail",           XFSSTAT_END_TAIL_PUSHING        },
+               { "xstrat",             XFSSTAT_END_WRITE_CONVERT       },
+               { "rw",                 XFSSTAT_END_READ_WRITE_OPS      },
+               { "attr",               XFSSTAT_END_ATTRIBUTE_OPS       },
+               { "icluster",           XFSSTAT_END_INODE_CLUSTER       },
+               { "vnodes",             XFSSTAT_END_VNODE_OPS           },
+               { "buf",                XFSSTAT_END_BUF                 },
+               { "abtb2",              XFSSTAT_END_ABTB_V2             },
+               { "abtc2",              XFSSTAT_END_ABTC_V2             },
+               { "bmbt2",              XFSSTAT_END_BMBT_V2             },
+               { "ibt2",               XFSSTAT_END_IBT_V2              },
+       };
+
+       /* Loop over all stats groups */
+       for (i=j = 0; i < ARRAY_SIZE(xstats); i++) {
+               seq_printf(m, "%s", xstats[i].desc);
+               /* inner loop does each group */
+               while (j < xstats[i].endpoint) {
+                       val = 0;
+                       /* sum over all cpus */
+                       for_each_possible_cpu(c)
+                               val += *(((__u32*)&per_cpu(xfsstats, c) + j));
+                       seq_printf(m, " %u", val);
+                       j++;
+               }
+               seq_putc(m, '\n');
+       }
+       /* extra precision counters */
+       for_each_possible_cpu(i) {
+               xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes;
+               xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes;
+               xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
+       }
+
+       seq_printf(m, "xpc %Lu %Lu %Lu\n",
+                       xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
+       seq_printf(m, "debug %u\n",
+#if defined(DEBUG)
+               1);
+#else
+               0);
+#endif
+       return 0;
+}
+
+static int xfs_stat_proc_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, xfs_stat_proc_show, NULL);
+}
+
+static const struct file_operations xfs_stat_proc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = xfs_stat_proc_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+int
+xfs_init_procfs(void)
+{
+       if (!proc_mkdir("fs/xfs", NULL))
+               goto out;
+
+       if (!proc_create("fs/xfs/stat", 0, NULL,
+                        &xfs_stat_proc_fops))
+               goto out_remove_entry;
+       return 0;
+
+ out_remove_entry:
+       remove_proc_entry("fs/xfs", NULL);
+ out:
+       return -ENOMEM;
+}
+
+void
+xfs_cleanup_procfs(void)
+{
+       remove_proc_entry("fs/xfs/stat", NULL);
+       remove_proc_entry("fs/xfs", NULL);
+}
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h

new file mode 100644 (file)

index 0000000..736854b
--- /dev/null
+++ b/fs/xfs/xfs_stats.h
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_STATS_H__
+#define __XFS_STATS_H__
+
+
+#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
+
+#include <linux/percpu.h>
+
+/*
+ * XFS global statistics
+ */
+struct xfsstats {
+# define XFSSTAT_END_EXTENT_ALLOC      4
+       __uint32_t              xs_allocx;
+       __uint32_t              xs_allocb;
+       __uint32_t              xs_freex;
+       __uint32_t              xs_freeb;
+# define XFSSTAT_END_ALLOC_BTREE       (XFSSTAT_END_EXTENT_ALLOC+4)
+       __uint32_t              xs_abt_lookup;
+       __uint32_t              xs_abt_compare;
+       __uint32_t              xs_abt_insrec;
+       __uint32_t              xs_abt_delrec;
+# define XFSSTAT_END_BLOCK_MAPPING     (XFSSTAT_END_ALLOC_BTREE+7)
+       __uint32_t              xs_blk_mapr;
+       __uint32_t              xs_blk_mapw;
+       __uint32_t              xs_blk_unmap;
+       __uint32_t              xs_add_exlist;
+       __uint32_t              xs_del_exlist;
+       __uint32_t              xs_look_exlist;
+       __uint32_t              xs_cmp_exlist;
+# define XFSSTAT_END_BLOCK_MAP_BTREE   (XFSSTAT_END_BLOCK_MAPPING+4)
+       __uint32_t              xs_bmbt_lookup;
+       __uint32_t              xs_bmbt_compare;
+       __uint32_t              xs_bmbt_insrec;
+       __uint32_t              xs_bmbt_delrec;
+# define XFSSTAT_END_DIRECTORY_OPS     (XFSSTAT_END_BLOCK_MAP_BTREE+4)
+       __uint32_t              xs_dir_lookup;
+       __uint32_t              xs_dir_create;
+       __uint32_t              xs_dir_remove;
+       __uint32_t              xs_dir_getdents;
+# define XFSSTAT_END_TRANSACTIONS      (XFSSTAT_END_DIRECTORY_OPS+3)
+       __uint32_t              xs_trans_sync;
+       __uint32_t              xs_trans_async;
+       __uint32_t              xs_trans_empty;
+# define XFSSTAT_END_INODE_OPS         (XFSSTAT_END_TRANSACTIONS+7)
+       __uint32_t              xs_ig_attempts;
+       __uint32_t              xs_ig_found;
+       __uint32_t              xs_ig_frecycle;
+       __uint32_t              xs_ig_missed;
+       __uint32_t              xs_ig_dup;
+       __uint32_t              xs_ig_reclaims;
+       __uint32_t              xs_ig_attrchg;
+# define XFSSTAT_END_LOG_OPS           (XFSSTAT_END_INODE_OPS+5)
+       __uint32_t              xs_log_writes;
+       __uint32_t              xs_log_blocks;
+       __uint32_t              xs_log_noiclogs;
+       __uint32_t              xs_log_force;
+       __uint32_t              xs_log_force_sleep;
+# define XFSSTAT_END_TAIL_PUSHING      (XFSSTAT_END_LOG_OPS+10)
+       __uint32_t              xs_try_logspace;
+       __uint32_t              xs_sleep_logspace;
+       __uint32_t              xs_push_ail;
+       __uint32_t              xs_push_ail_success;
+       __uint32_t              xs_push_ail_pushbuf;
+       __uint32_t              xs_push_ail_pinned;
+       __uint32_t              xs_push_ail_locked;
+       __uint32_t              xs_push_ail_flushing;
+       __uint32_t              xs_push_ail_restarts;
+       __uint32_t              xs_push_ail_flush;
+# define XFSSTAT_END_WRITE_CONVERT     (XFSSTAT_END_TAIL_PUSHING+2)
+       __uint32_t              xs_xstrat_quick;
+       __uint32_t              xs_xstrat_split;
+# define XFSSTAT_END_READ_WRITE_OPS    (XFSSTAT_END_WRITE_CONVERT+2)
+       __uint32_t              xs_write_calls;
+       __uint32_t              xs_read_calls;
+# define XFSSTAT_END_ATTRIBUTE_OPS     (XFSSTAT_END_READ_WRITE_OPS+4)
+       __uint32_t              xs_attr_get;
+       __uint32_t              xs_attr_set;
+       __uint32_t              xs_attr_remove;
+       __uint32_t              xs_attr_list;
+# define XFSSTAT_END_INODE_CLUSTER     (XFSSTAT_END_ATTRIBUTE_OPS+3)
+       __uint32_t              xs_iflush_count;
+       __uint32_t              xs_icluster_flushcnt;
+       __uint32_t              xs_icluster_flushinode;
+# define XFSSTAT_END_VNODE_OPS         (XFSSTAT_END_INODE_CLUSTER+8)
+       __uint32_t              vn_active;      /* # vnodes not on free lists */
+       __uint32_t              vn_alloc;       /* # times vn_alloc called */
+       __uint32_t              vn_get;         /* # times vn_get called */
+       __uint32_t              vn_hold;        /* # times vn_hold called */
+       __uint32_t              vn_rele;        /* # times vn_rele called */
+       __uint32_t              vn_reclaim;     /* # times vn_reclaim called */
+       __uint32_t              vn_remove;      /* # times vn_remove called */
+       __uint32_t              vn_free;        /* # times vn_free called */
+#define XFSSTAT_END_BUF                        (XFSSTAT_END_VNODE_OPS+9)
+       __uint32_t              xb_get;
+       __uint32_t              xb_create;
+       __uint32_t              xb_get_locked;
+       __uint32_t              xb_get_locked_waited;
+       __uint32_t              xb_busy_locked;
+       __uint32_t              xb_miss_locked;
+       __uint32_t              xb_page_retries;
+       __uint32_t              xb_page_found;
+       __uint32_t              xb_get_read;
+/* Version 2 btree counters */
+#define XFSSTAT_END_ABTB_V2            (XFSSTAT_END_BUF+15)
+       __uint32_t              xs_abtb_2_lookup;
+       __uint32_t              xs_abtb_2_compare;
+       __uint32_t              xs_abtb_2_insrec;
+       __uint32_t              xs_abtb_2_delrec;
+       __uint32_t              xs_abtb_2_newroot;
+       __uint32_t              xs_abtb_2_killroot;
+       __uint32_t              xs_abtb_2_increment;
+       __uint32_t              xs_abtb_2_decrement;
+       __uint32_t              xs_abtb_2_lshift;
+       __uint32_t              xs_abtb_2_rshift;
+       __uint32_t              xs_abtb_2_split;
+       __uint32_t              xs_abtb_2_join;
+       __uint32_t              xs_abtb_2_alloc;
+       __uint32_t              xs_abtb_2_free;
+       __uint32_t              xs_abtb_2_moves;
+#define XFSSTAT_END_ABTC_V2            (XFSSTAT_END_ABTB_V2+15)
+       __uint32_t              xs_abtc_2_lookup;
+       __uint32_t              xs_abtc_2_compare;
+       __uint32_t              xs_abtc_2_insrec;
+       __uint32_t              xs_abtc_2_delrec;
+       __uint32_t              xs_abtc_2_newroot;
+       __uint32_t              xs_abtc_2_killroot;
+       __uint32_t              xs_abtc_2_increment;
+       __uint32_t              xs_abtc_2_decrement;
+       __uint32_t              xs_abtc_2_lshift;
+       __uint32_t              xs_abtc_2_rshift;
+       __uint32_t              xs_abtc_2_split;
+       __uint32_t              xs_abtc_2_join;
+       __uint32_t              xs_abtc_2_alloc;
+       __uint32_t              xs_abtc_2_free;
+       __uint32_t              xs_abtc_2_moves;
+#define XFSSTAT_END_BMBT_V2            (XFSSTAT_END_ABTC_V2+15)
+       __uint32_t              xs_bmbt_2_lookup;
+       __uint32_t              xs_bmbt_2_compare;
+       __uint32_t              xs_bmbt_2_insrec;
+       __uint32_t              xs_bmbt_2_delrec;
+       __uint32_t              xs_bmbt_2_newroot;
+       __uint32_t              xs_bmbt_2_killroot;
+       __uint32_t              xs_bmbt_2_increment;
+       __uint32_t              xs_bmbt_2_decrement;
+       __uint32_t              xs_bmbt_2_lshift;
+       __uint32_t              xs_bmbt_2_rshift;
+       __uint32_t              xs_bmbt_2_split;
+       __uint32_t              xs_bmbt_2_join;
+       __uint32_t              xs_bmbt_2_alloc;
+       __uint32_t              xs_bmbt_2_free;
+       __uint32_t              xs_bmbt_2_moves;
+#define XFSSTAT_END_IBT_V2             (XFSSTAT_END_BMBT_V2+15)
+       __uint32_t              xs_ibt_2_lookup;
+       __uint32_t              xs_ibt_2_compare;
+       __uint32_t              xs_ibt_2_insrec;
+       __uint32_t              xs_ibt_2_delrec;
+       __uint32_t              xs_ibt_2_newroot;
+       __uint32_t              xs_ibt_2_killroot;
+       __uint32_t              xs_ibt_2_increment;
+       __uint32_t              xs_ibt_2_decrement;
+       __uint32_t              xs_ibt_2_lshift;
+       __uint32_t              xs_ibt_2_rshift;
+       __uint32_t              xs_ibt_2_split;
+       __uint32_t              xs_ibt_2_join;
+       __uint32_t              xs_ibt_2_alloc;
+       __uint32_t              xs_ibt_2_free;
+       __uint32_t              xs_ibt_2_moves;
+/* Extra precision counters */
+       __uint64_t              xs_xstrat_bytes;
+       __uint64_t              xs_write_bytes;
+       __uint64_t              xs_read_bytes;
+};
+
+DECLARE_PER_CPU(struct xfsstats, xfsstats);
+
+/*
+ * We don't disable preempt, not too worried about poking the
+ * wrong CPU's stat for now (also aggregated before reporting).
+ */
+#define XFS_STATS_INC(v)       (per_cpu(xfsstats, current_cpu()).v++)
+#define XFS_STATS_DEC(v)       (per_cpu(xfsstats, current_cpu()).v--)
+#define XFS_STATS_ADD(v, inc)  (per_cpu(xfsstats, current_cpu()).v += (inc))
+
+extern int xfs_init_procfs(void);
+extern void xfs_cleanup_procfs(void);
+
+
+#else  /* !CONFIG_PROC_FS */
+
+# define XFS_STATS_INC(count)
+# define XFS_STATS_DEC(count)
+# define XFS_STATS_ADD(count, inc)
+
+static inline int xfs_init_procfs(void)
+{
+       return 0;
+}
+
+static inline void xfs_cleanup_procfs(void)
+{
+}
+
+#endif /* !CONFIG_PROC_FS */
+
+#endif /* __XFS_STATS_H__ */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c

new file mode 100644 (file)

index 0000000..2366c54
--- /dev/null
+++ b/fs/xfs/xfs_super.c
@@ -0,0 +1,1759 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "xfs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_fsops.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_utils.h"
+#include "xfs_vnodeops.h"
+#include "xfs_log_priv.h"
+#include "xfs_trans_priv.h"
+#include "xfs_filestream.h"
+#include "xfs_da_btree.h"
+#include "xfs_extfree_item.h"
+#include "xfs_mru_cache.h"
+#include "xfs_inode_item.h"
+#include "xfs_sync.h"
+#include "xfs_trace.h"
+
+#include <linux/namei.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/mount.h>
+#include <linux/mempool.h>
+#include <linux/writeback.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+#include <linux/parser.h>
+
+static const struct super_operations xfs_super_operations;
+static kmem_zone_t *xfs_ioend_zone;
+mempool_t *xfs_ioend_pool;
+
+#define MNTOPT_LOGBUFS "logbufs"       /* number of XFS log buffers */
+#define MNTOPT_LOGBSIZE        "logbsize"      /* size of XFS log buffers */
+#define MNTOPT_LOGDEV  "logdev"        /* log device */
+#define MNTOPT_RTDEV   "rtdev"         /* realtime I/O device */
+#define MNTOPT_BIOSIZE "biosize"       /* log2 of preferred buffered io size */
+#define MNTOPT_WSYNC   "wsync"         /* safe-mode nfs compatible mount */
+#define MNTOPT_NOALIGN "noalign"       /* turn off stripe alignment */
+#define MNTOPT_SWALLOC "swalloc"       /* turn on stripe width allocation */
+#define MNTOPT_SUNIT   "sunit"         /* data volume stripe unit */
+#define MNTOPT_SWIDTH  "swidth"        /* data volume stripe width */
+#define MNTOPT_NOUUID  "nouuid"        /* ignore filesystem UUID */
+#define MNTOPT_MTPT    "mtpt"          /* filesystem mount point */
+#define MNTOPT_GRPID   "grpid"         /* group-ID from parent directory */
+#define MNTOPT_NOGRPID "nogrpid"       /* group-ID from current process */
+#define MNTOPT_BSDGROUPS    "bsdgroups"    /* group-ID from parent directory */
+#define MNTOPT_SYSVGROUPS   "sysvgroups"   /* group-ID from current process */
+#define MNTOPT_ALLOCSIZE    "allocsize"    /* preferred allocation size */
+#define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
+#define MNTOPT_BARRIER "barrier"       /* use writer barriers for log write and
+                                        * unwritten extent conversion */
+#define MNTOPT_NOBARRIER "nobarrier"   /* .. disable */
+#define MNTOPT_64BITINODE   "inode64"  /* inodes can be allocated anywhere */
+#define MNTOPT_IKEEP   "ikeep"         /* do not free empty inode clusters */
+#define MNTOPT_NOIKEEP "noikeep"       /* free empty inode clusters */
+#define MNTOPT_LARGEIO    "largeio"    /* report large I/O sizes in stat() */
+#define MNTOPT_NOLARGEIO   "nolargeio" /* do not report large I/O sizes
+                                        * in stat(). */
+#define MNTOPT_ATTR2   "attr2"         /* do use attr2 attribute format */
+#define MNTOPT_NOATTR2 "noattr2"       /* do not use attr2 attribute format */
+#define MNTOPT_FILESTREAM  "filestreams" /* use filestreams allocator */
+#define MNTOPT_QUOTA   "quota"         /* disk quotas (user) */
+#define MNTOPT_NOQUOTA "noquota"       /* no quotas */
+#define MNTOPT_USRQUOTA        "usrquota"      /* user quota enabled */
+#define MNTOPT_GRPQUOTA        "grpquota"      /* group quota enabled */
+#define MNTOPT_PRJQUOTA        "prjquota"      /* project quota enabled */
+#define MNTOPT_UQUOTA  "uquota"        /* user quota (IRIX variant) */
+#define MNTOPT_GQUOTA  "gquota"        /* group quota (IRIX variant) */
+#define MNTOPT_PQUOTA  "pquota"        /* project quota (IRIX variant) */
+#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
+#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
+#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
+#define MNTOPT_QUOTANOENF  "qnoenforce"        /* same as uqnoenforce */
+#define MNTOPT_DELAYLOG    "delaylog"  /* Delayed logging enabled */
+#define MNTOPT_NODELAYLOG  "nodelaylog"        /* Delayed logging disabled */
+#define MNTOPT_DISCARD    "discard"    /* Discard unused blocks */
+#define MNTOPT_NODISCARD   "nodiscard" /* Do not discard unused blocks */
+
+/*
+ * Table driven mount option parser.
+ *
+ * Currently only used for remount, but it will be used for mount
+ * in the future, too.
+ */
+enum {
+       Opt_barrier, Opt_nobarrier, Opt_err
+};
+
+static const match_table_t tokens = {
+       {Opt_barrier, "barrier"},
+       {Opt_nobarrier, "nobarrier"},
+       {Opt_err, NULL}
+};
+
+
+STATIC unsigned long
+suffix_strtoul(char *s, char **endp, unsigned int base)
+{
+       int     last, shift_left_factor = 0;
+       char    *value = s;
+
+       last = strlen(value) - 1;
+       if (value[last] == 'K' || value[last] == 'k') {
+               shift_left_factor = 10;
+               value[last] = '\0';
+       }
+       if (value[last] == 'M' || value[last] == 'm') {
+               shift_left_factor = 20;
+               value[last] = '\0';
+       }
+       if (value[last] == 'G' || value[last] == 'g') {
+               shift_left_factor = 30;
+               value[last] = '\0';
+       }
+
+       return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
+}
+
+/*
+ * This function fills in xfs_mount_t fields based on mount args.
+ * Note: the superblock has _not_ yet been read in.
+ *
+ * Note that this function leaks the various device name allocations on
+ * failure.  The caller takes care of them.
+ */
+STATIC int
+xfs_parseargs(
+       struct xfs_mount        *mp,
+       char                    *options)
+{
+       struct super_block      *sb = mp->m_super;
+       char                    *this_char, *value, *eov;
+       int                     dsunit = 0;
+       int                     dswidth = 0;
+       int                     iosize = 0;
+       __uint8_t               iosizelog = 0;
+
+       /*
+        * set up the mount name first so all the errors will refer to the
+        * correct device.
+        */
+       mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
+       if (!mp->m_fsname)
+               return ENOMEM;
+       mp->m_fsname_len = strlen(mp->m_fsname) + 1;
+
+       /*
+        * Copy binary VFS mount flags we are interested in.
+        */
+       if (sb->s_flags & MS_RDONLY)
+               mp->m_flags |= XFS_MOUNT_RDONLY;
+       if (sb->s_flags & MS_DIRSYNC)
+               mp->m_flags |= XFS_MOUNT_DIRSYNC;
+       if (sb->s_flags & MS_SYNCHRONOUS)
+               mp->m_flags |= XFS_MOUNT_WSYNC;
+
+       /*
+        * Set some default flags that could be cleared by the mount option
+        * parsing.
+        */
+       mp->m_flags |= XFS_MOUNT_BARRIER;
+       mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
+       mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
+       mp->m_flags |= XFS_MOUNT_DELAYLOG;
+
+       /*
+        * These can be overridden by the mount option parsing.
+        */
+       mp->m_logbufs = -1;
+       mp->m_logbsize = -1;
+
+       if (!options)
+               goto done;
+
+       while ((this_char = strsep(&options, ",")) != NULL) {
+               if (!*this_char)
+                       continue;
+               if ((value = strchr(this_char, '=')) != NULL)
+                       *value++ = 0;
+
+               if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       mp->m_logbufs = simple_strtoul(value, &eov, 10);
+               } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       mp->m_logbsize = suffix_strtoul(value, &eov, 10);
+               } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+                       if (!mp->m_logname)
+                               return ENOMEM;
+               } else if (!strcmp(this_char, MNTOPT_MTPT)) {
+                       xfs_warn(mp, "%s option not allowed on this system",
+                               this_char);
+                       return EINVAL;
+               } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+                       if (!mp->m_rtname)
+                               return ENOMEM;
+               } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       iosize = simple_strtoul(value, &eov, 10);
+                       iosizelog = ffs(iosize) - 1;
+               } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       iosize = suffix_strtoul(value, &eov, 10);
+                       iosizelog = ffs(iosize) - 1;
+               } else if (!strcmp(this_char, MNTOPT_GRPID) ||
+                          !strcmp(this_char, MNTOPT_BSDGROUPS)) {
+                       mp->m_flags |= XFS_MOUNT_GRPID;
+               } else if (!strcmp(this_char, MNTOPT_NOGRPID) ||
+                          !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
+                       mp->m_flags &= ~XFS_MOUNT_GRPID;
+               } else if (!strcmp(this_char, MNTOPT_WSYNC)) {
+                       mp->m_flags |= XFS_MOUNT_WSYNC;
+               } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
+                       mp->m_flags |= XFS_MOUNT_NORECOVERY;
+               } else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
+                       mp->m_flags |= XFS_MOUNT_NOALIGN;
+               } else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
+                       mp->m_flags |= XFS_MOUNT_SWALLOC;
+               } else if (!strcmp(this_char, MNTOPT_SUNIT)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       dsunit = simple_strtoul(value, &eov, 10);
+               } else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
+                       if (!value || !*value) {
+                               xfs_warn(mp, "%s option requires an argument",
+                                       this_char);
+                               return EINVAL;
+                       }
+                       dswidth = simple_strtoul(value, &eov, 10);
+               } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
+                       mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
+#if !XFS_BIG_INUMS
+                       xfs_warn(mp, "%s option not allowed on this system",
+                               this_char);
+                       return EINVAL;
+#endif
+               } else if (!strcmp(this_char, MNTOPT_NOUUID)) {
+                       mp->m_flags |= XFS_MOUNT_NOUUID;
+               } else if (!strcmp(this_char, MNTOPT_BARRIER)) {
+                       mp->m_flags |= XFS_MOUNT_BARRIER;
+               } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
+                       mp->m_flags &= ~XFS_MOUNT_BARRIER;
+               } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
+                       mp->m_flags |= XFS_MOUNT_IKEEP;
+               } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
+                       mp->m_flags &= ~XFS_MOUNT_IKEEP;
+               } else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
+                       mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
+               } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
+                       mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
+               } else if (!strcmp(this_char, MNTOPT_ATTR2)) {
+                       mp->m_flags |= XFS_MOUNT_ATTR2;
+               } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
+                       mp->m_flags &= ~XFS_MOUNT_ATTR2;
+                       mp->m_flags |= XFS_MOUNT_NOATTR2;
+               } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
+                       mp->m_flags |= XFS_MOUNT_FILESTREAMS;
+               } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
+                       mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
+                                         XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
+                                         XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
+                                         XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD);
+               } else if (!strcmp(this_char, MNTOPT_QUOTA) ||
+                          !strcmp(this_char, MNTOPT_UQUOTA) ||
+                          !strcmp(this_char, MNTOPT_USRQUOTA)) {
+                       mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
+                                        XFS_UQUOTA_ENFD);
+               } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
+                          !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
+                       mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
+                       mp->m_qflags &= ~XFS_UQUOTA_ENFD;
+               } else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
+                          !strcmp(this_char, MNTOPT_PRJQUOTA)) {
+                       mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
+                                        XFS_OQUOTA_ENFD);
+               } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
+                       mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
+                       mp->m_qflags &= ~XFS_OQUOTA_ENFD;
+               } else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
+                          !strcmp(this_char, MNTOPT_GRPQUOTA)) {
+                       mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
+                                        XFS_OQUOTA_ENFD);
+               } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
+                       mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
+                       mp->m_qflags &= ~XFS_OQUOTA_ENFD;
+               } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
+                       mp->m_flags |= XFS_MOUNT_DELAYLOG;
+               } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
+                       mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
+                       xfs_warn(mp,
+       "nodelaylog is deprecated and will be removed in Linux 3.3");
+               } else if (!strcmp(this_char, MNTOPT_DISCARD)) {
+                       mp->m_flags |= XFS_MOUNT_DISCARD;
+               } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
+                       mp->m_flags &= ~XFS_MOUNT_DISCARD;
+               } else if (!strcmp(this_char, "ihashsize")) {
+                       xfs_warn(mp,
+       "ihashsize no longer used, option is deprecated.");
+               } else if (!strcmp(this_char, "osyncisdsync")) {
+                       xfs_warn(mp,
+       "osyncisdsync has no effect, option is deprecated.");
+               } else if (!strcmp(this_char, "osyncisosync")) {
+                       xfs_warn(mp,
+       "osyncisosync has no effect, option is deprecated.");
+               } else if (!strcmp(this_char, "irixsgid")) {
+                       xfs_warn(mp,
+       "irixsgid is now a sysctl(2) variable, option is deprecated.");
+               } else {
+                       xfs_warn(mp, "unknown mount option [%s].", this_char);
+                       return EINVAL;
+               }
+       }
+
+       /*
+        * no recovery flag requires a read-only mount
+        */
+       if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
+           !(mp->m_flags & XFS_MOUNT_RDONLY)) {
+               xfs_warn(mp, "no-recovery mounts must be read-only.");
+               return EINVAL;
+       }
+
+       if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
+               xfs_warn(mp,
+       "sunit and swidth options incompatible with the noalign option");
+               return EINVAL;
+       }
+
+       if ((mp->m_flags & XFS_MOUNT_DISCARD) &&
+           !(mp->m_flags & XFS_MOUNT_DELAYLOG)) {
+               xfs_warn(mp,
+       "the discard option is incompatible with the nodelaylog option");
+               return EINVAL;
+       }
+
+#ifndef CONFIG_XFS_QUOTA
+       if (XFS_IS_QUOTA_RUNNING(mp)) {
+               xfs_warn(mp, "quota support not available in this kernel.");
+               return EINVAL;
+       }
+#endif
+
+       if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
+           (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
+               xfs_warn(mp, "cannot mount with both project and group quota");
+               return EINVAL;
+       }
+
+       if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
+               xfs_warn(mp, "sunit and swidth must be specified together");
+               return EINVAL;
+       }
+
+       if (dsunit && (dswidth % dsunit != 0)) {
+               xfs_warn(mp,
+       "stripe width (%d) must be a multiple of the stripe unit (%d)",
+                       dswidth, dsunit);
+               return EINVAL;
+       }
+
+done:
+       if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) {
+               /*
+                * At this point the superblock has not been read
+                * in, therefore we do not know the block size.
+                * Before the mount call ends we will convert
+                * these to FSBs.
+                */
+               if (dsunit) {
+                       mp->m_dalign = dsunit;
+                       mp->m_flags |= XFS_MOUNT_RETERR;
+               }
+
+               if (dswidth)
+                       mp->m_swidth = dswidth;
+       }
+
+       if (mp->m_logbufs != -1 &&
+           mp->m_logbufs != 0 &&
+           (mp->m_logbufs < XLOG_MIN_ICLOGS ||
+            mp->m_logbufs > XLOG_MAX_ICLOGS)) {
+               xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
+                       mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
+               return XFS_ERROR(EINVAL);
+       }
+       if (mp->m_logbsize != -1 &&
+           mp->m_logbsize !=  0 &&
+           (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
+            mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
+            !is_power_of_2(mp->m_logbsize))) {
+               xfs_warn(mp,
+                       "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
+                       mp->m_logbsize);
+               return XFS_ERROR(EINVAL);
+       }
+
+       if (iosizelog) {
+               if (iosizelog > XFS_MAX_IO_LOG ||
+                   iosizelog < XFS_MIN_IO_LOG) {
+                       xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
+                               iosizelog, XFS_MIN_IO_LOG,
+                               XFS_MAX_IO_LOG);
+                       return XFS_ERROR(EINVAL);
+               }
+
+               mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
+               mp->m_readio_log = iosizelog;
+               mp->m_writeio_log = iosizelog;
+       }
+
+       return 0;
+}
+
+struct proc_xfs_info {
+       int     flag;
+       char    *str;
+};
+
+STATIC int
+xfs_showargs(
+       struct xfs_mount        *mp,
+       struct seq_file         *m)
+{
+       static struct proc_xfs_info xfs_info_set[] = {
+               /* the few simple ones we can get from the mount struct */
+               { XFS_MOUNT_IKEEP,              "," MNTOPT_IKEEP },
+               { XFS_MOUNT_WSYNC,              "," MNTOPT_WSYNC },
+               { XFS_MOUNT_NOALIGN,            "," MNTOPT_NOALIGN },
+               { XFS_MOUNT_SWALLOC,            "," MNTOPT_SWALLOC },
+               { XFS_MOUNT_NOUUID,             "," MNTOPT_NOUUID },
+               { XFS_MOUNT_NORECOVERY,         "," MNTOPT_NORECOVERY },
+               { XFS_MOUNT_ATTR2,              "," MNTOPT_ATTR2 },
+               { XFS_MOUNT_FILESTREAMS,        "," MNTOPT_FILESTREAM },
+               { XFS_MOUNT_GRPID,              "," MNTOPT_GRPID },
+               { XFS_MOUNT_DELAYLOG,           "," MNTOPT_DELAYLOG },
+               { XFS_MOUNT_DISCARD,            "," MNTOPT_DISCARD },
+               { 0, NULL }
+       };
+       static struct proc_xfs_info xfs_info_unset[] = {
+               /* the few simple ones we can get from the mount struct */
+               { XFS_MOUNT_COMPAT_IOSIZE,      "," MNTOPT_LARGEIO },
+               { XFS_MOUNT_BARRIER,            "," MNTOPT_NOBARRIER },
+               { XFS_MOUNT_SMALL_INUMS,        "," MNTOPT_64BITINODE },
+               { 0, NULL }
+       };
+       struct proc_xfs_info    *xfs_infop;
+
+       for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
+               if (mp->m_flags & xfs_infop->flag)
+                       seq_puts(m, xfs_infop->str);
+       }
+       for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) {
+               if (!(mp->m_flags & xfs_infop->flag))
+                       seq_puts(m, xfs_infop->str);
+       }
+
+       if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
+               seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
+                               (int)(1 << mp->m_writeio_log) >> 10);
+
+       if (mp->m_logbufs > 0)
+               seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
+       if (mp->m_logbsize > 0)
+               seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
+
+       if (mp->m_logname)
+               seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname);
+       if (mp->m_rtname)
+               seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname);
+
+       if (mp->m_dalign > 0)
+               seq_printf(m, "," MNTOPT_SUNIT "=%d",
+                               (int)XFS_FSB_TO_BB(mp, mp->m_dalign));
+       if (mp->m_swidth > 0)
+               seq_printf(m, "," MNTOPT_SWIDTH "=%d",
+                               (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
+
+       if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
+               seq_puts(m, "," MNTOPT_USRQUOTA);
+       else if (mp->m_qflags & XFS_UQUOTA_ACCT)
+               seq_puts(m, "," MNTOPT_UQUOTANOENF);
+
+       /* Either project or group quotas can be active, not both */
+
+       if (mp->m_qflags & XFS_PQUOTA_ACCT) {
+               if (mp->m_qflags & XFS_OQUOTA_ENFD)
+                       seq_puts(m, "," MNTOPT_PRJQUOTA);
+               else
+                       seq_puts(m, "," MNTOPT_PQUOTANOENF);
+       } else if (mp->m_qflags & XFS_GQUOTA_ACCT) {
+               if (mp->m_qflags & XFS_OQUOTA_ENFD)
+                       seq_puts(m, "," MNTOPT_GRPQUOTA);
+               else
+                       seq_puts(m, "," MNTOPT_GQUOTANOENF);
+       }
+
+       if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
+               seq_puts(m, "," MNTOPT_NOQUOTA);
+
+       return 0;
+}
+__uint64_t
+xfs_max_file_offset(
+       unsigned int            blockshift)
+{
+       unsigned int            pagefactor = 1;
+       unsigned int            bitshift = BITS_PER_LONG - 1;
+
+       /* Figure out maximum filesize, on Linux this can depend on
+        * the filesystem blocksize (on 32 bit platforms).
+        * __block_write_begin does this in an [unsigned] long...
+        *      page->index << (PAGE_CACHE_SHIFT - bbits)
+        * So, for page sized blocks (4K on 32 bit platforms),
+        * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
+        *      (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
+        * but for smaller blocksizes it is less (bbits = log2 bsize).
+        * Note1: get_block_t takes a long (implicit cast from above)
+        * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
+        * can optionally convert the [unsigned] long from above into
+        * an [unsigned] long long.
+        */
+
+#if BITS_PER_LONG == 32
+# if defined(CONFIG_LBDAF)
+       ASSERT(sizeof(sector_t) == 8);
+       pagefactor = PAGE_CACHE_SIZE;
+       bitshift = BITS_PER_LONG;
+# else
+       pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
+# endif
+#endif
+
+       return (((__uint64_t)pagefactor) << bitshift) - 1;
+}
+
+STATIC int
+xfs_blkdev_get(
+       xfs_mount_t             *mp,
+       const char              *name,
+       struct block_device     **bdevp)
+{
+       int                     error = 0;
+
+       *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
+                                   mp);
+       if (IS_ERR(*bdevp)) {
+               error = PTR_ERR(*bdevp);
+               xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error);
+       }
+
+       return -error;
+}
+
+STATIC void
+xfs_blkdev_put(
+       struct block_device     *bdev)
+{
+       if (bdev)
+               blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+}
+
+void
+xfs_blkdev_issue_flush(
+       xfs_buftarg_t           *buftarg)
+{
+       blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL);
+}
+
+STATIC void
+xfs_close_devices(
+       struct xfs_mount        *mp)
+{
+       if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
+               struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
+               xfs_free_buftarg(mp, mp->m_logdev_targp);
+               xfs_blkdev_put(logdev);
+       }
+       if (mp->m_rtdev_targp) {
+               struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
+               xfs_free_buftarg(mp, mp->m_rtdev_targp);
+               xfs_blkdev_put(rtdev);
+       }
+       xfs_free_buftarg(mp, mp->m_ddev_targp);
+}
+
+/*
+ * The file system configurations are:
+ *     (1) device (partition) with data and internal log
+ *     (2) logical volume with data and log subvolumes.
+ *     (3) logical volume with data, log, and realtime subvolumes.
+ *
+ * We only have to handle opening the log and realtime volumes here if
+ * they are present.  The data subvolume has already been opened by
+ * get_sb_bdev() and is stored in sb->s_bdev.
+ */
+STATIC int
+xfs_open_devices(
+       struct xfs_mount        *mp)
+{
+       struct block_device     *ddev = mp->m_super->s_bdev;
+       struct block_device     *logdev = NULL, *rtdev = NULL;
+       int                     error;
+
+       /*
+        * Open real time and log devices - order is important.
+        */
+       if (mp->m_logname) {
+               error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
+               if (error)
+                       goto out;
+       }
+
+       if (mp->m_rtname) {
+               error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
+               if (error)
+                       goto out_close_logdev;
+
+               if (rtdev == ddev || rtdev == logdev) {
+                       xfs_warn(mp,
+       "Cannot mount filesystem with identical rtdev and ddev/logdev.");
+                       error = EINVAL;
+                       goto out_close_rtdev;
+               }
+       }
+
+       /*
+        * Setup xfs_mount buffer target pointers
+        */
+       error = ENOMEM;
+       mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname);
+       if (!mp->m_ddev_targp)
+               goto out_close_rtdev;
+
+       if (rtdev) {
+               mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1,
+                                                       mp->m_fsname);
+               if (!mp->m_rtdev_targp)
+                       goto out_free_ddev_targ;
+       }
+
+       if (logdev && logdev != ddev) {
+               mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1,
+                                                       mp->m_fsname);
+               if (!mp->m_logdev_targp)
+                       goto out_free_rtdev_targ;
+       } else {
+               mp->m_logdev_targp = mp->m_ddev_targp;
+       }
+
+       return 0;
+
+ out_free_rtdev_targ:
+       if (mp->m_rtdev_targp)
+               xfs_free_buftarg(mp, mp->m_rtdev_targp);
+ out_free_ddev_targ:
+       xfs_free_buftarg(mp, mp->m_ddev_targp);
+ out_close_rtdev:
+       if (rtdev)
+               xfs_blkdev_put(rtdev);
+ out_close_logdev:
+       if (logdev && logdev != ddev)
+               xfs_blkdev_put(logdev);
+ out:
+       return error;
+}
+
+/*
+ * Setup xfs_mount buffer target pointers based on superblock
+ */
+STATIC int
+xfs_setup_devices(
+       struct xfs_mount        *mp)
+{
+       int                     error;
+
+       error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
+                                   mp->m_sb.sb_sectsize);
+       if (error)
+               return error;
+
+       if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
+               unsigned int    log_sector_size = BBSIZE;
+
+               if (xfs_sb_version_hassector(&mp->m_sb))
+                       log_sector_size = mp->m_sb.sb_logsectsize;
+               error = xfs_setsize_buftarg(mp->m_logdev_targp,
+                                           mp->m_sb.sb_blocksize,
+                                           log_sector_size);
+               if (error)
+                       return error;
+       }
+       if (mp->m_rtdev_targp) {
+               error = xfs_setsize_buftarg(mp->m_rtdev_targp,
+                                           mp->m_sb.sb_blocksize,
+                                           mp->m_sb.sb_sectsize);
+               if (error)
+                       return error;
+       }
+
+       return 0;
+}
+
+/* Catch misguided souls that try to use this interface on XFS */
+STATIC struct inode *
+xfs_fs_alloc_inode(
+       struct super_block      *sb)
+{
+       BUG();
+       return NULL;
+}
+
+/*
+ * Now that the generic code is guaranteed not to be accessing
+ * the linux inode, we can reclaim the inode.
+ */
+STATIC void
+xfs_fs_destroy_inode(
+       struct inode            *inode)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+
+       trace_xfs_destroy_inode(ip);
+
+       XFS_STATS_INC(vn_reclaim);
+
+       /* bad inode, get out here ASAP */
+       if (is_bad_inode(inode))
+               goto out_reclaim;
+
+       xfs_ioend_wait(ip);
+
+       ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
+
+       /*
+        * We should never get here with one of the reclaim flags already set.
+        */
+       ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+       ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
+
+       /*
+        * We always use background reclaim here because even if the
+        * inode is clean, it still may be under IO and hence we have
+        * to take the flush lock. The background reclaim path handles
+        * this more efficiently than we can here, so simply let background
+        * reclaim tear down all inodes.
+        */
+out_reclaim:
+       xfs_inode_set_reclaim_tag(ip);
+}
+
+/*
+ * Slab object creation initialisation for the XFS inode.
+ * This covers only the idempotent fields in the XFS inode;
+ * all other fields need to be initialised on allocation
+ * from the slab. This avoids the need to repeatedly initialise
+ * fields in the xfs inode that left in the initialise state
+ * when freeing the inode.
+ */
+STATIC void
+xfs_fs_inode_init_once(
+       void                    *inode)
+{
+       struct xfs_inode        *ip = inode;
+
+       memset(ip, 0, sizeof(struct xfs_inode));
+
+       /* vfs inode */
+       inode_init_once(VFS_I(ip));
+
+       /* xfs inode */
+       atomic_set(&ip->i_iocount, 0);
+       atomic_set(&ip->i_pincount, 0);
+       spin_lock_init(&ip->i_flags_lock);
+       init_waitqueue_head(&ip->i_ipin_wait);
+       /*
+        * Because we want to use a counting completion, complete
+        * the flush completion once to allow a single access to
+        * the flush completion without blocking.
+        */
+       init_completion(&ip->i_flush);
+       complete(&ip->i_flush);
+
+       mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
+                    "xfsino", ip->i_ino);
+}
+
+/*
+ * Dirty the XFS inode when mark_inode_dirty_sync() is called so that
+ * we catch unlogged VFS level updates to the inode.
+ *
+ * We need the barrier() to maintain correct ordering between unlogged
+ * updates and the transaction commit code that clears the i_update_core
+ * field. This requires all updates to be completed before marking the
+ * inode dirty.
+ */
+STATIC void
+xfs_fs_dirty_inode(
+       struct inode    *inode,
+       int             flags)
+{
+       barrier();
+       XFS_I(inode)->i_update_core = 1;
+}
+
+STATIC int
+xfs_log_inode(
+       struct xfs_inode        *ip)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_trans        *tp;
+       int                     error;
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+       error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+       if (error) {
+               xfs_trans_cancel(tp, 0);
+               return error;
+       }
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
+       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+       return xfs_trans_commit(tp, 0);
+}
+
+STATIC int
+xfs_fs_write_inode(
+       struct inode            *inode,
+       struct writeback_control *wbc)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       int                     error = EAGAIN;
+
+       trace_xfs_write_inode(ip);
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -XFS_ERROR(EIO);
+       if (!ip->i_update_core)
+               return 0;
+
+       if (wbc->sync_mode == WB_SYNC_ALL) {
+               /*
+                * Make sure the inode has made it it into the log.  Instead
+                * of forcing it all the way to stable storage using a
+                * synchronous transaction we let the log force inside the
+                * ->sync_fs call do that for thus, which reduces the number
+                * of synchronous log foces dramatically.
+                */
+               xfs_ioend_wait(ip);
+               error = xfs_log_inode(ip);
+               if (error)
+                       goto out;
+               return 0;
+       } else {
+               /*
+                * We make this non-blocking if the inode is contended, return
+                * EAGAIN to indicate to the caller that they did not succeed.
+                * This prevents the flush path from blocking on inodes inside
+                * another operation right now, they get caught later by
+                * xfs_sync.
+                */
+               if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
+                       goto out;
+
+               if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
+                       goto out_unlock;
+
+               /*
+                * Now we have the flush lock and the inode is not pinned, we
+                * can check if the inode is really clean as we know that
+                * there are no pending transaction completions, it is not
+                * waiting on the delayed write queue and there is no IO in
+                * progress.
+                */
+               if (xfs_inode_clean(ip)) {
+                       xfs_ifunlock(ip);
+                       error = 0;
+                       goto out_unlock;
+               }
+               error = xfs_iflush(ip, SYNC_TRYLOCK);
+       }
+
+ out_unlock:
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ out:
+       /*
+        * if we failed to write out the inode then mark
+        * it dirty again so we'll try again later.
+        */
+       if (error)
+               xfs_mark_inode_dirty_sync(ip);
+       return -error;
+}
+
+STATIC void
+xfs_fs_evict_inode(
+       struct inode            *inode)
+{
+       xfs_inode_t             *ip = XFS_I(inode);
+
+       trace_xfs_evict_inode(ip);
+
+       truncate_inode_pages(&inode->i_data, 0);
+       end_writeback(inode);
+       XFS_STATS_INC(vn_rele);
+       XFS_STATS_INC(vn_remove);
+       XFS_STATS_DEC(vn_active);
+
+       /*
+        * The iolock is used by the file system to coordinate reads,
+        * writes, and block truncates.  Up to this point the lock
+        * protected concurrent accesses by users of the inode.  But
+        * from here forward we're doing some final processing of the
+        * inode because we're done with it, and although we reuse the
+        * iolock for protection it is really a distinct lock class
+        * (in the lockdep sense) from before.  To keep lockdep happy
+        * (and basically indicate what we are doing), we explicitly
+        * re-init the iolock here.
+        */
+       ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
+       mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
+       lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
+                       &xfs_iolock_reclaimable, "xfs_iolock_reclaimable");
+
+       xfs_inactive(ip);
+}
+
+STATIC void
+xfs_free_fsname(
+       struct xfs_mount        *mp)
+{
+       kfree(mp->m_fsname);
+       kfree(mp->m_rtname);
+       kfree(mp->m_logname);
+}
+
+STATIC void
+xfs_fs_put_super(
+       struct super_block      *sb)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+
+       xfs_syncd_stop(mp);
+
+       /*
+        * Blow away any referenced inode in the filestreams cache.
+        * This can and will cause log traffic as inodes go inactive
+        * here.
+        */
+       xfs_filestream_unmount(mp);
+
+       XFS_bflush(mp->m_ddev_targp);
+
+       xfs_unmountfs(mp);
+       xfs_freesb(mp);
+       xfs_icsb_destroy_counters(mp);
+       xfs_close_devices(mp);
+       xfs_free_fsname(mp);
+       kfree(mp);
+}
+
+STATIC int
+xfs_fs_sync_fs(
+       struct super_block      *sb,
+       int                     wait)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+       int                     error;
+
+       /*
+        * Not much we can do for the first async pass.  Writing out the
+        * superblock would be counter-productive as we are going to redirty
+        * when writing out other data and metadata (and writing out a single
+        * block is quite fast anyway).
+        *
+        * Try to asynchronously kick off quota syncing at least.
+        */
+       if (!wait) {
+               xfs_qm_sync(mp, SYNC_TRYLOCK);
+               return 0;
+       }
+
+       error = xfs_quiesce_data(mp);
+       if (error)
+               return -error;
+
+       if (laptop_mode) {
+               /*
+                * The disk must be active because we're syncing.
+                * We schedule xfssyncd now (now that the disk is
+                * active) instead of later (when it might not be).
+                */
+               flush_delayed_work_sync(&mp->m_sync_work);
+       }
+
+       return 0;
+}
+
+STATIC int
+xfs_fs_statfs(
+       struct dentry           *dentry,
+       struct kstatfs          *statp)
+{
+       struct xfs_mount        *mp = XFS_M(dentry->d_sb);
+       xfs_sb_t                *sbp = &mp->m_sb;
+       struct xfs_inode        *ip = XFS_I(dentry->d_inode);
+       __uint64_t              fakeinos, id;
+       xfs_extlen_t            lsize;
+       __int64_t               ffree;
+
+       statp->f_type = XFS_SB_MAGIC;
+       statp->f_namelen = MAXNAMELEN - 1;
+
+       id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
+       statp->f_fsid.val[0] = (u32)id;
+       statp->f_fsid.val[1] = (u32)(id >> 32);
+
+       xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
+
+       spin_lock(&mp->m_sb_lock);
+       statp->f_bsize = sbp->sb_blocksize;
+       lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
+       statp->f_blocks = sbp->sb_dblocks - lsize;
+       statp->f_bfree = statp->f_bavail =
+                               sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+       fakeinos = statp->f_bfree << sbp->sb_inopblog;
+       statp->f_files =
+           MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
+       if (mp->m_maxicount)
+               statp->f_files = min_t(typeof(statp->f_files),
+                                       statp->f_files,
+                                       mp->m_maxicount);
+
+       /* make sure statp->f_ffree does not underflow */
+       ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
+       statp->f_ffree = max_t(__int64_t, ffree, 0);
+
+       spin_unlock(&mp->m_sb_lock);
+
+       if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
+           ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
+                             (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
+               xfs_qm_statvfs(ip, statp);
+       return 0;
+}
+
+STATIC void
+xfs_save_resvblks(struct xfs_mount *mp)
+{
+       __uint64_t resblks = 0;
+
+       mp->m_resblks_save = mp->m_resblks;
+       xfs_reserve_blocks(mp, &resblks, NULL);
+}
+
+STATIC void
+xfs_restore_resvblks(struct xfs_mount *mp)
+{
+       __uint64_t resblks;
+
+       if (mp->m_resblks_save) {
+               resblks = mp->m_resblks_save;
+               mp->m_resblks_save = 0;
+       } else
+               resblks = xfs_default_resblks(mp);
+
+       xfs_reserve_blocks(mp, &resblks, NULL);
+}
+
+STATIC int
+xfs_fs_remount(
+       struct super_block      *sb,
+       int                     *flags,
+       char                    *options)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+       substring_t             args[MAX_OPT_ARGS];
+       char                    *p;
+       int                     error;
+
+       while ((p = strsep(&options, ",")) != NULL) {
+               int token;
+
+               if (!*p)
+                       continue;
+
+               token = match_token(p, tokens, args);
+               switch (token) {
+               case Opt_barrier:
+                       mp->m_flags |= XFS_MOUNT_BARRIER;
+                       break;
+               case Opt_nobarrier:
+                       mp->m_flags &= ~XFS_MOUNT_BARRIER;
+                       break;
+               default:
+                       /*
+                        * Logically we would return an error here to prevent
+                        * users from believing they might have changed
+                        * mount options using remount which can't be changed.
+                        *
+                        * But unfortunately mount(8) adds all options from
+                        * mtab and fstab to the mount arguments in some cases
+                        * so we can't blindly reject options, but have to
+                        * check for each specified option if it actually
+                        * differs from the currently set option and only
+                        * reject it if that's the case.
+                        *
+                        * Until that is implemented we return success for
+                        * every remount request, and silently ignore all
+                        * options that we can't actually change.
+                        */
+#if 0
+                       xfs_info(mp,
+               "mount option \"%s\" not supported for remount\n", p);
+                       return -EINVAL;
+#else
+                       break;
+#endif
+               }
+       }
+
+       /* ro -> rw */
+       if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
+               mp->m_flags &= ~XFS_MOUNT_RDONLY;
+
+               /*
+                * If this is the first remount to writeable state we
+                * might have some superblock changes to update.
+                */
+               if (mp->m_update_flags) {
+                       error = xfs_mount_log_sb(mp, mp->m_update_flags);
+                       if (error) {
+                               xfs_warn(mp, "failed to write sb changes");
+                               return error;
+                       }
+                       mp->m_update_flags = 0;
+               }
+
+               /*
+                * Fill out the reserve pool if it is empty. Use the stashed
+                * value if it is non-zero, otherwise go with the default.
+                */
+               xfs_restore_resvblks(mp);
+       }
+
+       /* rw -> ro */
+       if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
+               /*
+                * After we have synced the data but before we sync the
+                * metadata, we need to free up the reserve block pool so that
+                * the used block count in the superblock on disk is correct at
+                * the end of the remount. Stash the current reserve pool size
+                * so that if we get remounted rw, we can return it to the same
+                * size.
+                */
+
+               xfs_quiesce_data(mp);
+               xfs_save_resvblks(mp);
+               xfs_quiesce_attr(mp);
+               mp->m_flags |= XFS_MOUNT_RDONLY;
+       }
+
+       return 0;
+}
+
+/*
+ * Second stage of a freeze. The data is already frozen so we only
+ * need to take care of the metadata. Once that's done write a dummy
+ * record to dirty the log in case of a crash while frozen.
+ */
+STATIC int
+xfs_fs_freeze(
+       struct super_block      *sb)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+
+       xfs_save_resvblks(mp);
+       xfs_quiesce_attr(mp);
+       return -xfs_fs_log_dummy(mp);
+}
+
+STATIC int
+xfs_fs_unfreeze(
+       struct super_block      *sb)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+
+       xfs_restore_resvblks(mp);
+       return 0;
+}
+
+STATIC int
+xfs_fs_show_options(
+       struct seq_file         *m,
+       struct vfsmount         *mnt)
+{
+       return -xfs_showargs(XFS_M(mnt->mnt_sb), m);
+}
+
+/*
+ * This function fills in xfs_mount_t fields based on mount args.
+ * Note: the superblock _has_ now been read in.
+ */
+STATIC int
+xfs_finish_flags(
+       struct xfs_mount        *mp)
+{
+       int                     ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
+
+       /* Fail a mount where the logbuf is smaller than the log stripe */
+       if (xfs_sb_version_haslogv2(&mp->m_sb)) {
+               if (mp->m_logbsize <= 0 &&
+                   mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
+                       mp->m_logbsize = mp->m_sb.sb_logsunit;
+               } else if (mp->m_logbsize > 0 &&
+                          mp->m_logbsize < mp->m_sb.sb_logsunit) {
+                       xfs_warn(mp,
+               "logbuf size must be greater than or equal to log stripe size");
+                       return XFS_ERROR(EINVAL);
+               }
+       } else {
+               /* Fail a mount if the logbuf is larger than 32K */
+               if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
+                       xfs_warn(mp,
+               "logbuf size for version 1 logs must be 16K or 32K");
+                       return XFS_ERROR(EINVAL);
+               }
+       }
+
+       /*
+        * mkfs'ed attr2 will turn on attr2 mount unless explicitly
+        * told by noattr2 to turn it off
+        */
+       if (xfs_sb_version_hasattr2(&mp->m_sb) &&
+           !(mp->m_flags & XFS_MOUNT_NOATTR2))
+               mp->m_flags |= XFS_MOUNT_ATTR2;
+
+       /*
+        * prohibit r/w mounts of read-only filesystems
+        */
+       if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
+               xfs_warn(mp,
+                       "cannot mount a read-only filesystem as read-write");
+               return XFS_ERROR(EROFS);
+       }
+
+       return 0;
+}
+
+STATIC int
+xfs_fs_fill_super(
+       struct super_block      *sb,
+       void                    *data,
+       int                     silent)
+{
+       struct inode            *root;
+       struct xfs_mount        *mp = NULL;
+       int                     flags = 0, error = ENOMEM;
+
+       mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
+       if (!mp)
+               goto out;
+
+       spin_lock_init(&mp->m_sb_lock);
+       mutex_init(&mp->m_growlock);
+       atomic_set(&mp->m_active_trans, 0);
+
+       mp->m_super = sb;
+       sb->s_fs_info = mp;
+
+       error = xfs_parseargs(mp, (char *)data);
+       if (error)
+               goto out_free_fsname;
+
+       sb_min_blocksize(sb, BBSIZE);
+       sb->s_xattr = xfs_xattr_handlers;
+       sb->s_export_op = &xfs_export_operations;
+#ifdef CONFIG_XFS_QUOTA
+       sb->s_qcop = &xfs_quotactl_operations;
+#endif
+       sb->s_op = &xfs_super_operations;
+
+       if (silent)
+               flags |= XFS_MFSI_QUIET;
+
+       error = xfs_open_devices(mp);
+       if (error)
+               goto out_free_fsname;
+
+       error = xfs_icsb_init_counters(mp);
+       if (error)
+               goto out_close_devices;
+
+       error = xfs_readsb(mp, flags);
+       if (error)
+               goto out_destroy_counters;
+
+       error = xfs_finish_flags(mp);
+       if (error)
+               goto out_free_sb;
+
+       error = xfs_setup_devices(mp);
+       if (error)
+               goto out_free_sb;
+
+       error = xfs_filestream_mount(mp);
+       if (error)
+               goto out_free_sb;
+
+       /*
+        * we must configure the block size in the superblock before we run the
+        * full mount process as the mount process can lookup and cache inodes.
+        * For the same reason we must also initialise the syncd and register
+        * the inode cache shrinker so that inodes can be reclaimed during
+        * operations like a quotacheck that iterate all inodes in the
+        * filesystem.
+        */
+       sb->s_magic = XFS_SB_MAGIC;
+       sb->s_blocksize = mp->m_sb.sb_blocksize;
+       sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
+       sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
+       sb->s_time_gran = 1;
+       set_posix_acl_flag(sb);
+
+       error = xfs_mountfs(mp);
+       if (error)
+               goto out_filestream_unmount;
+
+       error = xfs_syncd_init(mp);
+       if (error)
+               goto out_unmount;
+
+       root = igrab(VFS_I(mp->m_rootip));
+       if (!root) {
+               error = ENOENT;
+               goto out_syncd_stop;
+       }
+       if (is_bad_inode(root)) {
+               error = EINVAL;
+               goto out_syncd_stop;
+       }
+       sb->s_root = d_alloc_root(root);
+       if (!sb->s_root) {
+               error = ENOMEM;
+               goto out_iput;
+       }
+
+       return 0;
+
+ out_filestream_unmount:
+       xfs_filestream_unmount(mp);
+ out_free_sb:
+       xfs_freesb(mp);
+ out_destroy_counters:
+       xfs_icsb_destroy_counters(mp);
+ out_close_devices:
+       xfs_close_devices(mp);
+ out_free_fsname:
+       xfs_free_fsname(mp);
+       kfree(mp);
+ out:
+       return -error;
+
+ out_iput:
+       iput(root);
+ out_syncd_stop:
+       xfs_syncd_stop(mp);
+ out_unmount:
+       /*
+        * Blow away any referenced inode in the filestreams cache.
+        * This can and will cause log traffic as inodes go inactive
+        * here.
+        */
+       xfs_filestream_unmount(mp);
+
+       XFS_bflush(mp->m_ddev_targp);
+
+       xfs_unmountfs(mp);
+       goto out_free_sb;
+}
+
+STATIC struct dentry *
+xfs_fs_mount(
+       struct file_system_type *fs_type,
+       int                     flags,
+       const char              *dev_name,
+       void                    *data)
+{
+       return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
+}
+
+static int
+xfs_fs_nr_cached_objects(
+       struct super_block      *sb)
+{
+       return xfs_reclaim_inodes_count(XFS_M(sb));
+}
+
+static void
+xfs_fs_free_cached_objects(
+       struct super_block      *sb,
+       int                     nr_to_scan)
+{
+       xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan);
+}
+
+static const struct super_operations xfs_super_operations = {
+       .alloc_inode            = xfs_fs_alloc_inode,
+       .destroy_inode          = xfs_fs_destroy_inode,
+       .dirty_inode            = xfs_fs_dirty_inode,
+       .write_inode            = xfs_fs_write_inode,
+       .evict_inode            = xfs_fs_evict_inode,
+       .put_super              = xfs_fs_put_super,
+       .sync_fs                = xfs_fs_sync_fs,
+       .freeze_fs              = xfs_fs_freeze,
+       .unfreeze_fs            = xfs_fs_unfreeze,
+       .statfs                 = xfs_fs_statfs,
+       .remount_fs             = xfs_fs_remount,
+       .show_options           = xfs_fs_show_options,
+       .nr_cached_objects      = xfs_fs_nr_cached_objects,
+       .free_cached_objects    = xfs_fs_free_cached_objects,
+};
+
+static struct file_system_type xfs_fs_type = {
+       .owner                  = THIS_MODULE,
+       .name                   = "xfs",
+       .mount                  = xfs_fs_mount,
+       .kill_sb                = kill_block_super,
+       .fs_flags               = FS_REQUIRES_DEV,
+};
+
+STATIC int __init
+xfs_init_zones(void)
+{
+
+       xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
+       if (!xfs_ioend_zone)
+               goto out;
+
+       xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
+                                                 xfs_ioend_zone);
+       if (!xfs_ioend_pool)
+               goto out_destroy_ioend_zone;
+
+       xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
+                                               "xfs_log_ticket");
+       if (!xfs_log_ticket_zone)
+               goto out_destroy_ioend_pool;
+
+       xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
+                                               "xfs_bmap_free_item");
+       if (!xfs_bmap_free_item_zone)
+               goto out_destroy_log_ticket_zone;
+
+       xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
+                                               "xfs_btree_cur");
+       if (!xfs_btree_cur_zone)
+               goto out_destroy_bmap_free_item_zone;
+
+       xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
+                                               "xfs_da_state");
+       if (!xfs_da_state_zone)
+               goto out_destroy_btree_cur_zone;
+
+       xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
+       if (!xfs_dabuf_zone)
+               goto out_destroy_da_state_zone;
+
+       xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
+       if (!xfs_ifork_zone)
+               goto out_destroy_dabuf_zone;
+
+       xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
+       if (!xfs_trans_zone)
+               goto out_destroy_ifork_zone;
+
+       xfs_log_item_desc_zone =
+               kmem_zone_init(sizeof(struct xfs_log_item_desc),
+                              "xfs_log_item_desc");
+       if (!xfs_log_item_desc_zone)
+               goto out_destroy_trans_zone;
+
+       /*
+        * The size of the zone allocated buf log item is the maximum
+        * size possible under XFS.  This wastes a little bit of memory,
+        * but it is much faster.
+        */
+       xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) +
+                               (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) /
+                                 NBWORD) * sizeof(int))), "xfs_buf_item");
+       if (!xfs_buf_item_zone)
+               goto out_destroy_log_item_desc_zone;
+
+       xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
+                       ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
+                                sizeof(xfs_extent_t))), "xfs_efd_item");
+       if (!xfs_efd_zone)
+               goto out_destroy_buf_item_zone;
+
+       xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
+                       ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
+                               sizeof(xfs_extent_t))), "xfs_efi_item");
+       if (!xfs_efi_zone)
+               goto out_destroy_efd_zone;
+
+       xfs_inode_zone =
+               kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
+                       KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD,
+                       xfs_fs_inode_init_once);
+       if (!xfs_inode_zone)
+               goto out_destroy_efi_zone;
+
+       xfs_ili_zone =
+               kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
+                                       KM_ZONE_SPREAD, NULL);
+       if (!xfs_ili_zone)
+               goto out_destroy_inode_zone;
+
+       return 0;
+
+ out_destroy_inode_zone:
+       kmem_zone_destroy(xfs_inode_zone);
+ out_destroy_efi_zone:
+       kmem_zone_destroy(xfs_efi_zone);
+ out_destroy_efd_zone:
+       kmem_zone_destroy(xfs_efd_zone);
+ out_destroy_buf_item_zone:
+       kmem_zone_destroy(xfs_buf_item_zone);
+ out_destroy_log_item_desc_zone:
+       kmem_zone_destroy(xfs_log_item_desc_zone);
+ out_destroy_trans_zone:
+       kmem_zone_destroy(xfs_trans_zone);
+ out_destroy_ifork_zone:
+       kmem_zone_destroy(xfs_ifork_zone);
+ out_destroy_dabuf_zone:
+       kmem_zone_destroy(xfs_dabuf_zone);
+ out_destroy_da_state_zone:
+       kmem_zone_destroy(xfs_da_state_zone);
+ out_destroy_btree_cur_zone:
+       kmem_zone_destroy(xfs_btree_cur_zone);
+ out_destroy_bmap_free_item_zone:
+       kmem_zone_destroy(xfs_bmap_free_item_zone);
+ out_destroy_log_ticket_zone:
+       kmem_zone_destroy(xfs_log_ticket_zone);
+ out_destroy_ioend_pool:
+       mempool_destroy(xfs_ioend_pool);
+ out_destroy_ioend_zone:
+       kmem_zone_destroy(xfs_ioend_zone);
+ out:
+       return -ENOMEM;
+}
+
+STATIC void
+xfs_destroy_zones(void)
+{
+       kmem_zone_destroy(xfs_ili_zone);
+       kmem_zone_destroy(xfs_inode_zone);
+       kmem_zone_destroy(xfs_efi_zone);
+       kmem_zone_destroy(xfs_efd_zone);
+       kmem_zone_destroy(xfs_buf_item_zone);
+       kmem_zone_destroy(xfs_log_item_desc_zone);
+       kmem_zone_destroy(xfs_trans_zone);
+       kmem_zone_destroy(xfs_ifork_zone);
+       kmem_zone_destroy(xfs_dabuf_zone);
+       kmem_zone_destroy(xfs_da_state_zone);
+       kmem_zone_destroy(xfs_btree_cur_zone);
+       kmem_zone_destroy(xfs_bmap_free_item_zone);
+       kmem_zone_destroy(xfs_log_ticket_zone);
+       mempool_destroy(xfs_ioend_pool);
+       kmem_zone_destroy(xfs_ioend_zone);
+
+}
+
+STATIC int __init
+xfs_init_workqueues(void)
+{
+       /*
+        * max_active is set to 8 to give enough concurency to allow
+        * multiple work operations on each CPU to run. This allows multiple
+        * filesystems to be running sync work concurrently, and scales with
+        * the number of CPUs in the system.
+        */
+       xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
+       if (!xfs_syncd_wq)
+               goto out;
+
+       xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8);
+       if (!xfs_ail_wq)
+               goto out_destroy_syncd;
+
+       return 0;
+
+out_destroy_syncd:
+       destroy_workqueue(xfs_syncd_wq);
+out:
+       return -ENOMEM;
+}
+
+STATIC void
+xfs_destroy_workqueues(void)
+{
+       destroy_workqueue(xfs_ail_wq);
+       destroy_workqueue(xfs_syncd_wq);
+}
+
+STATIC int __init
+init_xfs_fs(void)
+{
+       int                     error;
+
+       printk(KERN_INFO XFS_VERSION_STRING " with "
+                        XFS_BUILD_OPTIONS " enabled\n");
+
+       xfs_ioend_init();
+       xfs_dir_startup();
+
+       error = xfs_init_zones();
+       if (error)
+               goto out;
+
+       error = xfs_init_workqueues();
+       if (error)
+               goto out_destroy_zones;
+
+       error = xfs_mru_cache_init();
+       if (error)
+               goto out_destroy_wq;
+
+       error = xfs_filestream_init();
+       if (error)
+               goto out_mru_cache_uninit;
+
+       error = xfs_buf_init();
+       if (error)
+               goto out_filestream_uninit;
+
+       error = xfs_init_procfs();
+       if (error)
+               goto out_buf_terminate;
+
+       error = xfs_sysctl_register();
+       if (error)
+               goto out_cleanup_procfs;
+
+       vfs_initquota();
+
+       error = register_filesystem(&xfs_fs_type);
+       if (error)
+               goto out_sysctl_unregister;
+       return 0;
+
+ out_sysctl_unregister:
+       xfs_sysctl_unregister();
+ out_cleanup_procfs:
+       xfs_cleanup_procfs();
+ out_buf_terminate:
+       xfs_buf_terminate();
+ out_filestream_uninit:
+       xfs_filestream_uninit();
+ out_mru_cache_uninit:
+       xfs_mru_cache_uninit();
+ out_destroy_wq:
+       xfs_destroy_workqueues();
+ out_destroy_zones:
+       xfs_destroy_zones();
+ out:
+       return error;
+}
+
+STATIC void __exit
+exit_xfs_fs(void)
+{
+       vfs_exitquota();
+       unregister_filesystem(&xfs_fs_type);
+       xfs_sysctl_unregister();
+       xfs_cleanup_procfs();
+       xfs_buf_terminate();
+       xfs_filestream_uninit();
+       xfs_mru_cache_uninit();
+       xfs_destroy_workqueues();
+       xfs_destroy_zones();
+}
+
+module_init(init_xfs_fs);
+module_exit(exit_xfs_fs);
+
+MODULE_AUTHOR("Silicon Graphics, Inc.");
+MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
+MODULE_LICENSE("GPL");
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h

new file mode 100644 (file)

index 0000000..50a3266
--- /dev/null
+++ b/fs/xfs/xfs_super.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SUPER_H__
+#define __XFS_SUPER_H__
+
+#include <linux/exportfs.h>
+
+#ifdef CONFIG_XFS_QUOTA
+extern void xfs_qm_init(void);
+extern void xfs_qm_exit(void);
+# define vfs_initquota()       xfs_qm_init()
+# define vfs_exitquota()       xfs_qm_exit()
+#else
+# define vfs_initquota()       do { } while (0)
+# define vfs_exitquota()       do { } while (0)
+#endif
+
+#ifdef CONFIG_XFS_POSIX_ACL
+# define XFS_ACL_STRING                "ACLs, "
+# define set_posix_acl_flag(sb)        ((sb)->s_flags |= MS_POSIXACL)
+#else
+# define XFS_ACL_STRING
+# define set_posix_acl_flag(sb)        do { } while (0)
+#endif
+
+#define XFS_SECURITY_STRING    "security attributes, "
+
+#ifdef CONFIG_XFS_RT
+# define XFS_REALTIME_STRING   "realtime, "
+#else
+# define XFS_REALTIME_STRING
+#endif
+
+#if XFS_BIG_BLKNOS
+# if XFS_BIG_INUMS
+#  define XFS_BIGFS_STRING     "large block/inode numbers, "
+# else
+#  define XFS_BIGFS_STRING     "large block numbers, "
+# endif
+#else
+# define XFS_BIGFS_STRING
+#endif
+
+#ifdef DEBUG
+# define XFS_DBG_STRING                "debug"
+#else
+# define XFS_DBG_STRING                "no debug"
+#endif
+
+#define XFS_VERSION_STRING     "SGI XFS"
+#define XFS_BUILD_OPTIONS      XFS_ACL_STRING \
+                               XFS_SECURITY_STRING \
+                               XFS_REALTIME_STRING \
+                               XFS_BIGFS_STRING \
+                               XFS_DBG_STRING /* DBG must be last */
+
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_buftarg;
+struct block_device;
+
+extern __uint64_t xfs_max_file_offset(unsigned int);
+
+extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
+
+extern const struct export_operations xfs_export_operations;
+extern const struct xattr_handler *xfs_xattr_handlers[];
+extern const struct quotactl_ops xfs_quotactl_operations;
+
+#define XFS_M(sb)              ((struct xfs_mount *)((sb)->s_fs_info))
+
+#endif /* __XFS_SUPER_H__ */
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c

new file mode 100644 (file)

index 0000000..4604f90
--- /dev/null
+++ b/fs/xfs/xfs_sync.c
@@ -0,0 +1,1065 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_types.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_dinode.h"
+#include "xfs_error.h"
+#include "xfs_filestream.h"
+#include "xfs_vnodeops.h"
+#include "xfs_inode_item.h"
+#include "xfs_quota.h"
+#include "xfs_trace.h"
+#include "xfs_fsops.h"
+
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+
+struct workqueue_struct        *xfs_syncd_wq;  /* sync workqueue */
+
+/*
+ * The inode lookup is done in batches to keep the amount of lock traffic and
+ * radix tree lookups to a minimum. The batch size is a trade off between
+ * lookup reduction and stack usage. This is in the reclaim path, so we can't
+ * be too greedy.
+ */
+#define XFS_LOOKUP_BATCH       32
+
+STATIC int
+xfs_inode_ag_walk_grab(
+       struct xfs_inode        *ip)
+{
+       struct inode            *inode = VFS_I(ip);
+
+       ASSERT(rcu_read_lock_held());
+
+       /*
+        * check for stale RCU freed inode
+        *
+        * If the inode has been reallocated, it doesn't matter if it's not in
+        * the AG we are walking - we are walking for writeback, so if it
+        * passes all the "valid inode" checks and is dirty, then we'll write
+        * it back anyway.  If it has been reallocated and still being
+        * initialised, the XFS_INEW check below will catch it.
+        */
+       spin_lock(&ip->i_flags_lock);
+       if (!ip->i_ino)
+               goto out_unlock_noent;
+
+       /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
+       if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
+               goto out_unlock_noent;
+       spin_unlock(&ip->i_flags_lock);
+
+       /* nothing to sync during shutdown */
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+               return EFSCORRUPTED;
+
+       /* If we can't grab the inode, it must on it's way to reclaim. */
+       if (!igrab(inode))
+               return ENOENT;
+
+       if (is_bad_inode(inode)) {
+               IRELE(ip);
+               return ENOENT;
+       }
+
+       /* inode is valid */
+       return 0;
+
+out_unlock_noent:
+       spin_unlock(&ip->i_flags_lock);
+       return ENOENT;
+}
+
+STATIC int
+xfs_inode_ag_walk(
+       struct xfs_mount        *mp,
+       struct xfs_perag        *pag,
+       int                     (*execute)(struct xfs_inode *ip,
+                                          struct xfs_perag *pag, int flags),
+       int                     flags)
+{
+       uint32_t                first_index;
+       int                     last_error = 0;
+       int                     skipped;
+       int                     done;
+       int                     nr_found;
+
+restart:
+       done = 0;
+       skipped = 0;
+       first_index = 0;
+       nr_found = 0;
+       do {
+               struct xfs_inode *batch[XFS_LOOKUP_BATCH];
+               int             error = 0;
+               int             i;
+
+               rcu_read_lock();
+               nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+                                       (void **)batch, first_index,
+                                       XFS_LOOKUP_BATCH);
+               if (!nr_found) {
+                       rcu_read_unlock();
+                       break;
+               }
+
+               /*
+                * Grab the inodes before we drop the lock. if we found
+                * nothing, nr == 0 and the loop will be skipped.
+                */
+               for (i = 0; i < nr_found; i++) {
+                       struct xfs_inode *ip = batch[i];
+
+                       if (done || xfs_inode_ag_walk_grab(ip))
+                               batch[i] = NULL;
+
+                       /*
+                        * Update the index for the next lookup. Catch
+                        * overflows into the next AG range which can occur if
+                        * we have inodes in the last block of the AG and we
+                        * are currently pointing to the last inode.
+                        *
+                        * Because we may see inodes that are from the wrong AG
+                        * due to RCU freeing and reallocation, only update the
+                        * index if it lies in this AG. It was a race that lead
+                        * us to see this inode, so another lookup from the
+                        * same index will not find it again.
+                        */
+                       if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
+                               continue;
+                       first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+                       if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+                               done = 1;
+               }
+
+               /* unlock now we've grabbed the inodes. */
+               rcu_read_unlock();
+
+               for (i = 0; i < nr_found; i++) {
+                       if (!batch[i])
+                               continue;
+                       error = execute(batch[i], pag, flags);
+                       IRELE(batch[i]);
+                       if (error == EAGAIN) {
+                               skipped++;
+                               continue;
+                       }
+                       if (error && last_error != EFSCORRUPTED)
+                               last_error = error;
+               }
+
+               /* bail out if the filesystem is corrupted.  */
+               if (error == EFSCORRUPTED)
+                       break;
+
+               cond_resched();
+
+       } while (nr_found && !done);
+
+       if (skipped) {
+               delay(1);
+               goto restart;
+       }
+       return last_error;
+}
+
+int
+xfs_inode_ag_iterator(
+       struct xfs_mount        *mp,
+       int                     (*execute)(struct xfs_inode *ip,
+                                          struct xfs_perag *pag, int flags),
+       int                     flags)
+{
+       struct xfs_perag        *pag;
+       int                     error = 0;
+       int                     last_error = 0;
+       xfs_agnumber_t          ag;
+
+       ag = 0;
+       while ((pag = xfs_perag_get(mp, ag))) {
+               ag = pag->pag_agno + 1;
+               error = xfs_inode_ag_walk(mp, pag, execute, flags);
+               xfs_perag_put(pag);
+               if (error) {
+                       last_error = error;
+                       if (error == EFSCORRUPTED)
+                               break;
+               }
+       }
+       return XFS_ERROR(last_error);
+}
+
+STATIC int
+xfs_sync_inode_data(
+       struct xfs_inode        *ip,
+       struct xfs_perag        *pag,
+       int                     flags)
+{
+       struct inode            *inode = VFS_I(ip);
+       struct address_space *mapping = inode->i_mapping;
+       int                     error = 0;
+
+       if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
+               goto out_wait;
+
+       if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
+               if (flags & SYNC_TRYLOCK)
+                       goto out_wait;
+               xfs_ilock(ip, XFS_IOLOCK_SHARED);
+       }
+
+       error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
+                               0 : XBF_ASYNC, FI_NONE);
+       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+
+ out_wait:
+       if (flags & SYNC_WAIT)
+               xfs_ioend_wait(ip);
+       return error;
+}
+
+STATIC int
+xfs_sync_inode_attr(
+       struct xfs_inode        *ip,
+       struct xfs_perag        *pag,
+       int                     flags)
+{
+       int                     error = 0;
+
+       xfs_ilock(ip, XFS_ILOCK_SHARED);
+       if (xfs_inode_clean(ip))
+               goto out_unlock;
+       if (!xfs_iflock_nowait(ip)) {
+               if (!(flags & SYNC_WAIT))
+                       goto out_unlock;
+               xfs_iflock(ip);
+       }
+
+       if (xfs_inode_clean(ip)) {
+               xfs_ifunlock(ip);
+               goto out_unlock;
+       }
+
+       error = xfs_iflush(ip, flags);
+
+       /*
+        * We don't want to try again on non-blocking flushes that can't run
+        * again immediately. If an inode really must be written, then that's
+        * what the SYNC_WAIT flag is for.
+        */
+       if (error == EAGAIN) {
+               ASSERT(!(flags & SYNC_WAIT));
+               error = 0;
+       }
+
+ out_unlock:
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+       return error;
+}
+
+/*
+ * Write out pagecache data for the whole filesystem.
+ */
+STATIC int
+xfs_sync_data(
+       struct xfs_mount        *mp,
+       int                     flags)
+{
+       int                     error;
+
+       ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
+
+       error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags);
+       if (error)
+               return XFS_ERROR(error);
+
+       xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
+       return 0;
+}
+
+/*
+ * Write out inode metadata (attributes) for the whole filesystem.
+ */
+STATIC int
+xfs_sync_attr(
+       struct xfs_mount        *mp,
+       int                     flags)
+{
+       ASSERT((flags & ~SYNC_WAIT) == 0);
+
+       return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
+}
+
+STATIC int
+xfs_sync_fsdata(
+       struct xfs_mount        *mp)
+{
+       struct xfs_buf          *bp;
+
+       /*
+        * If the buffer is pinned then push on the log so we won't get stuck
+        * waiting in the write for someone, maybe ourselves, to flush the log.
+        *
+        * Even though we just pushed the log above, we did not have the
+        * superblock buffer locked at that point so it can become pinned in
+        * between there and here.
+        */
+       bp = xfs_getsb(mp, 0);
+       if (xfs_buf_ispinned(bp))
+               xfs_log_force(mp, 0);
+
+       return xfs_bwrite(mp, bp);
+}
+
+/*
+ * When remounting a filesystem read-only or freezing the filesystem, we have
+ * two phases to execute. This first phase is syncing the data before we
+ * quiesce the filesystem, and the second is flushing all the inodes out after
+ * we've waited for all the transactions created by the first phase to
+ * complete. The second phase ensures that the inodes are written to their
+ * location on disk rather than just existing in transactions in the log. This
+ * means after a quiesce there is no log replay required to write the inodes to
+ * disk (this is the main difference between a sync and a quiesce).
+ */
+/*
+ * First stage of freeze - no writers will make progress now we are here,
+ * so we flush delwri and delalloc buffers here, then wait for all I/O to
+ * complete.  Data is frozen at that point. Metadata is not frozen,
+ * transactions can still occur here so don't bother flushing the buftarg
+ * because it'll just get dirty again.
+ */
+int
+xfs_quiesce_data(
+       struct xfs_mount        *mp)
+{
+       int                     error, error2 = 0;
+
+       xfs_qm_sync(mp, SYNC_TRYLOCK);
+       xfs_qm_sync(mp, SYNC_WAIT);
+
+       /* force out the newly dirtied log buffers */
+       xfs_log_force(mp, XFS_LOG_SYNC);
+
+       /* write superblock and hoover up shutdown errors */
+       error = xfs_sync_fsdata(mp);
+
+       /* make sure all delwri buffers are written out */
+       xfs_flush_buftarg(mp->m_ddev_targp, 1);
+
+       /* mark the log as covered if needed */
+       if (xfs_log_need_covered(mp))
+               error2 = xfs_fs_log_dummy(mp);
+
+       /* flush data-only devices */
+       if (mp->m_rtdev_targp)
+               XFS_bflush(mp->m_rtdev_targp);
+
+       return error ? error : error2;
+}
+
+STATIC void
+xfs_quiesce_fs(
+       struct xfs_mount        *mp)
+{
+       int     count = 0, pincount;
+
+       xfs_reclaim_inodes(mp, 0);
+       xfs_flush_buftarg(mp->m_ddev_targp, 0);
+
+       /*
+        * This loop must run at least twice.  The first instance of the loop
+        * will flush most meta data but that will generate more meta data
+        * (typically directory updates).  Which then must be flushed and
+        * logged before we can write the unmount record. We also so sync
+        * reclaim of inodes to catch any that the above delwri flush skipped.
+        */
+       do {
+               xfs_reclaim_inodes(mp, SYNC_WAIT);
+               xfs_sync_attr(mp, SYNC_WAIT);
+               pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
+               if (!pincount) {
+                       delay(50);
+                       count++;
+               }
+       } while (count < 2);
+}
+
+/*
+ * Second stage of a quiesce. The data is already synced, now we have to take
+ * care of the metadata. New transactions are already blocked, so we need to
+ * wait for any remaining transactions to drain out before proceeding.
+ */
+void
+xfs_quiesce_attr(
+       struct xfs_mount        *mp)
+{
+       int     error = 0;
+
+       /* wait for all modifications to complete */
+       while (atomic_read(&mp->m_active_trans) > 0)
+               delay(100);
+
+       /* flush inodes and push all remaining buffers out to disk */
+       xfs_quiesce_fs(mp);
+
+       /*
+        * Just warn here till VFS can correctly support
+        * read-only remount without racing.
+        */
+       WARN_ON(atomic_read(&mp->m_active_trans) != 0);
+
+       /* Push the superblock and write an unmount record */
+       error = xfs_log_sbcount(mp);
+       if (error)
+               xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
+                               "Frozen image may not be consistent.");
+       xfs_log_unmount_write(mp);
+       xfs_unmountfs_writesb(mp);
+}
+
+static void
+xfs_syncd_queue_sync(
+       struct xfs_mount        *mp)
+{
+       queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work,
+                               msecs_to_jiffies(xfs_syncd_centisecs * 10));
+}
+
+/*
+ * Every sync period we need to unpin all items, reclaim inodes and sync
+ * disk quotas.  We might need to cover the log to indicate that the
+ * filesystem is idle and not frozen.
+ */
+STATIC void
+xfs_sync_worker(
+       struct work_struct *work)
+{
+       struct xfs_mount *mp = container_of(to_delayed_work(work),
+                                       struct xfs_mount, m_sync_work);
+       int             error;
+
+       if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
+               /* dgc: errors ignored here */
+               if (mp->m_super->s_frozen == SB_UNFROZEN &&
+                   xfs_log_need_covered(mp))
+                       error = xfs_fs_log_dummy(mp);
+               else
+                       xfs_log_force(mp, 0);
+               error = xfs_qm_sync(mp, SYNC_TRYLOCK);
+
+               /* start pushing all the metadata that is currently dirty */
+               xfs_ail_push_all(mp->m_ail);
+       }
+
+       /* queue us up again */
+       xfs_syncd_queue_sync(mp);
+}
+
+/*
+ * Queue a new inode reclaim pass if there are reclaimable inodes and there
+ * isn't a reclaim pass already in progress. By default it runs every 5s based
+ * on the xfs syncd work default of 30s. Perhaps this should have it's own
+ * tunable, but that can be done if this method proves to be ineffective or too
+ * aggressive.
+ */
+static void
+xfs_syncd_queue_reclaim(
+       struct xfs_mount        *mp)
+{
+
+       /*
+        * We can have inodes enter reclaim after we've shut down the syncd
+        * workqueue during unmount, so don't allow reclaim work to be queued
+        * during unmount.
+        */
+       if (!(mp->m_super->s_flags & MS_ACTIVE))
+               return;
+
+       rcu_read_lock();
+       if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
+               queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
+                       msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
+       }
+       rcu_read_unlock();
+}
+
+/*
+ * This is a fast pass over the inode cache to try to get reclaim moving on as
+ * many inodes as possible in a short period of time. It kicks itself every few
+ * seconds, as well as being kicked by the inode cache shrinker when memory
+ * goes low. It scans as quickly as possible avoiding locked inodes or those
+ * already being flushed, and once done schedules a future pass.
+ */
+STATIC void
+xfs_reclaim_worker(
+       struct work_struct *work)
+{
+       struct xfs_mount *mp = container_of(to_delayed_work(work),
+                                       struct xfs_mount, m_reclaim_work);
+
+       xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
+       xfs_syncd_queue_reclaim(mp);
+}
+
+/*
+ * Flush delayed allocate data, attempting to free up reserved space
+ * from existing allocations.  At this point a new allocation attempt
+ * has failed with ENOSPC and we are in the process of scratching our
+ * heads, looking about for more room.
+ *
+ * Queue a new data flush if there isn't one already in progress and
+ * wait for completion of the flush. This means that we only ever have one
+ * inode flush in progress no matter how many ENOSPC events are occurring and
+ * so will prevent the system from bogging down due to every concurrent
+ * ENOSPC event scanning all the active inodes in the system for writeback.
+ */
+void
+xfs_flush_inodes(
+       struct xfs_inode        *ip)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+
+       queue_work(xfs_syncd_wq, &mp->m_flush_work);
+       flush_work_sync(&mp->m_flush_work);
+}
+
+STATIC void
+xfs_flush_worker(
+       struct work_struct *work)
+{
+       struct xfs_mount *mp = container_of(work,
+                                       struct xfs_mount, m_flush_work);
+
+       xfs_sync_data(mp, SYNC_TRYLOCK);
+       xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
+}
+
+int
+xfs_syncd_init(
+       struct xfs_mount        *mp)
+{
+       INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
+       INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
+       INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
+
+       xfs_syncd_queue_sync(mp);
+       xfs_syncd_queue_reclaim(mp);
+
+       return 0;
+}
+
+void
+xfs_syncd_stop(
+       struct xfs_mount        *mp)
+{
+       cancel_delayed_work_sync(&mp->m_sync_work);
+       cancel_delayed_work_sync(&mp->m_reclaim_work);
+       cancel_work_sync(&mp->m_flush_work);
+}
+
+void
+__xfs_inode_set_reclaim_tag(
+       struct xfs_perag        *pag,
+       struct xfs_inode        *ip)
+{
+       radix_tree_tag_set(&pag->pag_ici_root,
+                          XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
+                          XFS_ICI_RECLAIM_TAG);
+
+       if (!pag->pag_ici_reclaimable) {
+               /* propagate the reclaim tag up into the perag radix tree */
+               spin_lock(&ip->i_mount->m_perag_lock);
+               radix_tree_tag_set(&ip->i_mount->m_perag_tree,
+                               XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+                               XFS_ICI_RECLAIM_TAG);
+               spin_unlock(&ip->i_mount->m_perag_lock);
+
+               /* schedule periodic background inode reclaim */
+               xfs_syncd_queue_reclaim(ip->i_mount);
+
+               trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
+                                                       -1, _RET_IP_);
+       }
+       pag->pag_ici_reclaimable++;
+}
+
+/*
+ * We set the inode flag atomically with the radix tree tag.
+ * Once we get tag lookups on the radix tree, this inode flag
+ * can go away.
+ */
+void
+xfs_inode_set_reclaim_tag(
+       xfs_inode_t     *ip)
+{
+       struct xfs_mount *mp = ip->i_mount;
+       struct xfs_perag *pag;
+
+       pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+       spin_lock(&pag->pag_ici_lock);
+       spin_lock(&ip->i_flags_lock);
+       __xfs_inode_set_reclaim_tag(pag, ip);
+       __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
+       spin_unlock(&ip->i_flags_lock);
+       spin_unlock(&pag->pag_ici_lock);
+       xfs_perag_put(pag);
+}
+
+STATIC void
+__xfs_inode_clear_reclaim(
+       xfs_perag_t     *pag,
+       xfs_inode_t     *ip)
+{
+       pag->pag_ici_reclaimable--;
+       if (!pag->pag_ici_reclaimable) {
+               /* clear the reclaim tag from the perag radix tree */
+               spin_lock(&ip->i_mount->m_perag_lock);
+               radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
+                               XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+                               XFS_ICI_RECLAIM_TAG);
+               spin_unlock(&ip->i_mount->m_perag_lock);
+               trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
+                                                       -1, _RET_IP_);
+       }
+}
+
+void
+__xfs_inode_clear_reclaim_tag(
+       xfs_mount_t     *mp,
+       xfs_perag_t     *pag,
+       xfs_inode_t     *ip)
+{
+       radix_tree_tag_clear(&pag->pag_ici_root,
+                       XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+       __xfs_inode_clear_reclaim(pag, ip);
+}
+
+/*
+ * Grab the inode for reclaim exclusively.
+ * Return 0 if we grabbed it, non-zero otherwise.
+ */
+STATIC int
+xfs_reclaim_inode_grab(
+       struct xfs_inode        *ip,
+       int                     flags)
+{
+       ASSERT(rcu_read_lock_held());
+
+       /* quick check for stale RCU freed inode */
+       if (!ip->i_ino)
+               return 1;
+
+       /*
+        * do some unlocked checks first to avoid unnecessary lock traffic.
+        * The first is a flush lock check, the second is a already in reclaim
+        * check. Only do these checks if we are not going to block on locks.
+        */
+       if ((flags & SYNC_TRYLOCK) &&
+           (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) {
+               return 1;
+       }
+
+       /*
+        * The radix tree lock here protects a thread in xfs_iget from racing
+        * with us starting reclaim on the inode.  Once we have the
+        * XFS_IRECLAIM flag set it will not touch us.
+        *
+        * Due to RCU lookup, we may find inodes that have been freed and only
+        * have XFS_IRECLAIM set.  Indeed, we may see reallocated inodes that
+        * aren't candidates for reclaim at all, so we must check the
+        * XFS_IRECLAIMABLE is set first before proceeding to reclaim.
+        */
+       spin_lock(&ip->i_flags_lock);
+       if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) ||
+           __xfs_iflags_test(ip, XFS_IRECLAIM)) {
+               /* not a reclaim candidate. */
+               spin_unlock(&ip->i_flags_lock);
+               return 1;
+       }
+       __xfs_iflags_set(ip, XFS_IRECLAIM);
+       spin_unlock(&ip->i_flags_lock);
+       return 0;
+}
+
+/*
+ * Inodes in different states need to be treated differently, and the return
+ * value of xfs_iflush is not sufficient to get this right. The following table
+ * lists the inode states and the reclaim actions necessary for non-blocking
+ * reclaim:
+ *
+ *
+ *     inode state          iflush ret         required action
+ *      ---------------      ----------         ---------------
+ *     bad                     -               reclaim
+ *     shutdown                EIO             unpin and reclaim
+ *     clean, unpinned         0               reclaim
+ *     stale, unpinned         0               reclaim
+ *     clean, pinned(*)        0               requeue
+ *     stale, pinned           EAGAIN          requeue
+ *     dirty, delwri ok        0               requeue
+ *     dirty, delwri blocked   EAGAIN          requeue
+ *     dirty, sync flush       0               reclaim
+ *
+ * (*) dgc: I don't think the clean, pinned state is possible but it gets
+ * handled anyway given the order of checks implemented.
+ *
+ * As can be seen from the table, the return value of xfs_iflush() is not
+ * sufficient to correctly decide the reclaim action here. The checks in
+ * xfs_iflush() might look like duplicates, but they are not.
+ *
+ * Also, because we get the flush lock first, we know that any inode that has
+ * been flushed delwri has had the flush completed by the time we check that
+ * the inode is clean. The clean inode check needs to be done before flushing
+ * the inode delwri otherwise we would loop forever requeuing clean inodes as
+ * we cannot tell apart a successful delwri flush and a clean inode from the
+ * return value of xfs_iflush().
+ *
+ * Note that because the inode is flushed delayed write by background
+ * writeback, the flush lock may already be held here and waiting on it can
+ * result in very long latencies. Hence for sync reclaims, where we wait on the
+ * flush lock, the caller should push out delayed write inodes first before
+ * trying to reclaim them to minimise the amount of time spent waiting. For
+ * background relaim, we just requeue the inode for the next pass.
+ *
+ * Hence the order of actions after gaining the locks should be:
+ *     bad             => reclaim
+ *     shutdown        => unpin and reclaim
+ *     pinned, delwri  => requeue
+ *     pinned, sync    => unpin
+ *     stale           => reclaim
+ *     clean           => reclaim
+ *     dirty, delwri   => flush and requeue
+ *     dirty, sync     => flush, wait and reclaim
+ */
+STATIC int
+xfs_reclaim_inode(
+       struct xfs_inode        *ip,
+       struct xfs_perag        *pag,
+       int                     sync_mode)
+{
+       int     error;
+
+restart:
+       error = 0;
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       if (!xfs_iflock_nowait(ip)) {
+               if (!(sync_mode & SYNC_WAIT))
+                       goto out;
+               xfs_iflock(ip);
+       }
+
+       if (is_bad_inode(VFS_I(ip)))
+               goto reclaim;
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+               xfs_iunpin_wait(ip);
+               goto reclaim;
+       }
+       if (xfs_ipincount(ip)) {
+               if (!(sync_mode & SYNC_WAIT)) {
+                       xfs_ifunlock(ip);
+                       goto out;
+               }
+               xfs_iunpin_wait(ip);
+       }
+       if (xfs_iflags_test(ip, XFS_ISTALE))
+               goto reclaim;
+       if (xfs_inode_clean(ip))
+               goto reclaim;
+
+       /*
+        * Now we have an inode that needs flushing.
+        *
+        * We do a nonblocking flush here even if we are doing a SYNC_WAIT
+        * reclaim as we can deadlock with inode cluster removal.
+        * xfs_ifree_cluster() can lock the inode buffer before it locks the
+        * ip->i_lock, and we are doing the exact opposite here. As a result,
+        * doing a blocking xfs_itobp() to get the cluster buffer will result
+        * in an ABBA deadlock with xfs_ifree_cluster().
+        *
+        * As xfs_ifree_cluser() must gather all inodes that are active in the
+        * cache to mark them stale, if we hit this case we don't actually want
+        * to do IO here - we want the inode marked stale so we can simply
+        * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush,
+        * just unlock the inode, back off and try again. Hopefully the next
+        * pass through will see the stale flag set on the inode.
+        */
+       error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
+       if (sync_mode & SYNC_WAIT) {
+               if (error == EAGAIN) {
+                       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+                       /* backoff longer than in xfs_ifree_cluster */
+                       delay(2);
+                       goto restart;
+               }
+               xfs_iflock(ip);
+               goto reclaim;
+       }
+
+       /*
+        * When we have to flush an inode but don't have SYNC_WAIT set, we
+        * flush the inode out using a delwri buffer and wait for the next
+        * call into reclaim to find it in a clean state instead of waiting for
+        * it now. We also don't return errors here - if the error is transient
+        * then the next reclaim pass will flush the inode, and if the error
+        * is permanent then the next sync reclaim will reclaim the inode and
+        * pass on the error.
+        */
+       if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+               xfs_warn(ip->i_mount,
+                       "inode 0x%llx background reclaim flush failed with %d",
+                       (long long)ip->i_ino, error);
+       }
+out:
+       xfs_iflags_clear(ip, XFS_IRECLAIM);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       /*
+        * We could return EAGAIN here to make reclaim rescan the inode tree in
+        * a short while. However, this just burns CPU time scanning the tree
+        * waiting for IO to complete and xfssyncd never goes back to the idle
+        * state. Instead, return 0 to let the next scheduled background reclaim
+        * attempt to reclaim the inode again.
+        */
+       return 0;
+
+reclaim:
+       xfs_ifunlock(ip);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+       XFS_STATS_INC(xs_ig_reclaims);
+       /*
+        * Remove the inode from the per-AG radix tree.
+        *
+        * Because radix_tree_delete won't complain even if the item was never
+        * added to the tree assert that it's been there before to catch
+        * problems with the inode life time early on.
+        */
+       spin_lock(&pag->pag_ici_lock);
+       if (!radix_tree_delete(&pag->pag_ici_root,
+                               XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
+               ASSERT(0);
+       __xfs_inode_clear_reclaim(pag, ip);
+       spin_unlock(&pag->pag_ici_lock);
+
+       /*
+        * Here we do an (almost) spurious inode lock in order to coordinate
+        * with inode cache radix tree lookups.  This is because the lookup
+        * can reference the inodes in the cache without taking references.
+        *
+        * We make that OK here by ensuring that we wait until the inode is
+        * unlocked after the lookup before we go ahead and free it.  We get
+        * both the ilock and the iolock because the code may need to drop the
+        * ilock one but will still hold the iolock.
+        */
+       xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+       xfs_qm_dqdetach(ip);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+
+       xfs_inode_free(ip);
+       return error;
+
+}
+
+/*
+ * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
+ * corrupted, we still want to try to reclaim all the inodes. If we don't,
+ * then a shut down during filesystem unmount reclaim walk leak all the
+ * unreclaimed inodes.
+ */
+int
+xfs_reclaim_inodes_ag(
+       struct xfs_mount        *mp,
+       int                     flags,
+       int                     *nr_to_scan)
+{
+       struct xfs_perag        *pag;
+       int                     error = 0;
+       int                     last_error = 0;
+       xfs_agnumber_t          ag;
+       int                     trylock = flags & SYNC_TRYLOCK;
+       int                     skipped;
+
+restart:
+       ag = 0;
+       skipped = 0;
+       while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
+               unsigned long   first_index = 0;
+               int             done = 0;
+               int             nr_found = 0;
+
+               ag = pag->pag_agno + 1;
+
+               if (trylock) {
+                       if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
+                               skipped++;
+                               xfs_perag_put(pag);
+                               continue;
+                       }
+                       first_index = pag->pag_ici_reclaim_cursor;
+               } else
+                       mutex_lock(&pag->pag_ici_reclaim_lock);
+
+               do {
+                       struct xfs_inode *batch[XFS_LOOKUP_BATCH];
+                       int     i;
+
+                       rcu_read_lock();
+                       nr_found = radix_tree_gang_lookup_tag(
+                                       &pag->pag_ici_root,
+                                       (void **)batch, first_index,
+                                       XFS_LOOKUP_BATCH,
+                                       XFS_ICI_RECLAIM_TAG);
+                       if (!nr_found) {
+                               done = 1;
+                               rcu_read_unlock();
+                               break;
+                       }
+
+                       /*
+                        * Grab the inodes before we drop the lock. if we found
+                        * nothing, nr == 0 and the loop will be skipped.
+                        */
+                       for (i = 0; i < nr_found; i++) {
+                               struct xfs_inode *ip = batch[i];
+
+                               if (done || xfs_reclaim_inode_grab(ip, flags))
+                                       batch[i] = NULL;
+
+                               /*
+                                * Update the index for the next lookup. Catch
+                                * overflows into the next AG range which can
+                                * occur if we have inodes in the last block of
+                                * the AG and we are currently pointing to the
+                                * last inode.
+                                *
+                                * Because we may see inodes that are from the
+                                * wrong AG due to RCU freeing and
+                                * reallocation, only update the index if it
+                                * lies in this AG. It was a race that lead us
+                                * to see this inode, so another lookup from
+                                * the same index will not find it again.
+                                */
+                               if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
+                                                               pag->pag_agno)
+                                       continue;
+                               first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+                               if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+                                       done = 1;
+                       }
+
+                       /* unlock now we've grabbed the inodes. */
+                       rcu_read_unlock();
+
+                       for (i = 0; i < nr_found; i++) {
+                               if (!batch[i])
+                                       continue;
+                               error = xfs_reclaim_inode(batch[i], pag, flags);
+                               if (error && last_error != EFSCORRUPTED)
+                                       last_error = error;
+                       }
+
+                       *nr_to_scan -= XFS_LOOKUP_BATCH;
+
+                       cond_resched();
+
+               } while (nr_found && !done && *nr_to_scan > 0);
+
+               if (trylock && !done)
+                       pag->pag_ici_reclaim_cursor = first_index;
+               else
+                       pag->pag_ici_reclaim_cursor = 0;
+               mutex_unlock(&pag->pag_ici_reclaim_lock);
+               xfs_perag_put(pag);
+       }
+
+       /*
+        * if we skipped any AG, and we still have scan count remaining, do
+        * another pass this time using blocking reclaim semantics (i.e
+        * waiting on the reclaim locks and ignoring the reclaim cursors). This
+        * ensure that when we get more reclaimers than AGs we block rather
+        * than spin trying to execute reclaim.
+        */
+       if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) {
+               trylock = 0;
+               goto restart;
+       }
+       return XFS_ERROR(last_error);
+}
+
+int
+xfs_reclaim_inodes(
+       xfs_mount_t     *mp,
+       int             mode)
+{
+       int             nr_to_scan = INT_MAX;
+
+       return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
+}
+
+/*
+ * Scan a certain number of inodes for reclaim.
+ *
+ * When called we make sure that there is a background (fast) inode reclaim in
+ * progress, while we will throttle the speed of reclaim via doing synchronous
+ * reclaim of inodes. That means if we come across dirty inodes, we wait for
+ * them to be cleaned, which we hope will not be very long due to the
+ * background walker having already kicked the IO off on those dirty inodes.
+ */
+void
+xfs_reclaim_inodes_nr(
+       struct xfs_mount        *mp,
+       int                     nr_to_scan)
+{
+       /* kick background reclaimer and push the AIL */
+       xfs_syncd_queue_reclaim(mp);
+       xfs_ail_push_all(mp->m_ail);
+
+       xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan);
+}
+
+/*
+ * Return the number of reclaimable inodes in the filesystem for
+ * the shrinker to determine how much to reclaim.
+ */
+int
+xfs_reclaim_inodes_count(
+       struct xfs_mount        *mp)
+{
+       struct xfs_perag        *pag;
+       xfs_agnumber_t          ag = 0;
+       int                     reclaimable = 0;
+
+       while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
+               ag = pag->pag_agno + 1;
+               reclaimable += pag->pag_ici_reclaimable;
+               xfs_perag_put(pag);
+       }
+       return reclaimable;
+}
+
diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h

new file mode 100644 (file)

index 0000000..941202e
--- /dev/null
+++ b/fs/xfs/xfs_sync.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef XFS_SYNC_H
+#define XFS_SYNC_H 1
+
+struct xfs_mount;
+struct xfs_perag;
+
+#define SYNC_WAIT              0x0001  /* wait for i/o to complete */
+#define SYNC_TRYLOCK           0x0002  /* only try to lock inodes */
+
+extern struct workqueue_struct *xfs_syncd_wq;  /* sync workqueue */
+
+int xfs_syncd_init(struct xfs_mount *mp);
+void xfs_syncd_stop(struct xfs_mount *mp);
+
+int xfs_quiesce_data(struct xfs_mount *mp);
+void xfs_quiesce_attr(struct xfs_mount *mp);
+
+void xfs_flush_inodes(struct xfs_inode *ip);
+
+int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
+int xfs_reclaim_inodes_count(struct xfs_mount *mp);
+void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
+
+void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
+void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
+void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
+                               struct xfs_inode *ip);
+
+int xfs_sync_inode_grab(struct xfs_inode *ip);
+int xfs_inode_ag_iterator(struct xfs_mount *mp,
+       int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
+       int flags);
+
+#endif
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c

new file mode 100644 (file)

index 0000000..ee2d2ad
--- /dev/null
+++ b/fs/xfs/xfs_sysctl.c
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 2001-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include "xfs_error.h"
+
+static struct ctl_table_header *xfs_table_header;
+
+#ifdef CONFIG_PROC_FS
+STATIC int
+xfs_stats_clear_proc_handler(
+       ctl_table       *ctl,
+       int             write,
+       void            __user *buffer,
+       size_t          *lenp,
+       loff_t          *ppos)
+{
+       int             c, ret, *valp = ctl->data;
+       __uint32_t      vn_active;
+
+       ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
+
+       if (!ret && write && *valp) {
+               xfs_notice(NULL, "Clearing xfsstats");
+               for_each_possible_cpu(c) {
+                       preempt_disable();
+                       /* save vn_active, it's a universal truth! */
+                       vn_active = per_cpu(xfsstats, c).vn_active;
+                       memset(&per_cpu(xfsstats, c), 0,
+                              sizeof(struct xfsstats));
+                       per_cpu(xfsstats, c).vn_active = vn_active;
+                       preempt_enable();
+               }
+               xfs_stats_clear = 0;
+       }
+
+       return ret;
+}
+
+STATIC int
+xfs_panic_mask_proc_handler(
+       ctl_table       *ctl,
+       int             write,
+       void            __user *buffer,
+       size_t          *lenp,
+       loff_t          *ppos)
+{
+       int             ret, *valp = ctl->data;
+
+       ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
+       if (!ret && write) {
+               xfs_panic_mask = *valp;
+#ifdef DEBUG
+               xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES);
+#endif
+       }
+       return ret;
+}
+#endif /* CONFIG_PROC_FS */
+
+static ctl_table xfs_table[] = {
+       {
+               .procname       = "irix_sgid_inherit",
+               .data           = &xfs_params.sgid_inherit.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.sgid_inherit.min,
+               .extra2         = &xfs_params.sgid_inherit.max
+       },
+       {
+               .procname       = "irix_symlink_mode",
+               .data           = &xfs_params.symlink_mode.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.symlink_mode.min,
+               .extra2         = &xfs_params.symlink_mode.max
+       },
+       {
+               .procname       = "panic_mask",
+               .data           = &xfs_params.panic_mask.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = xfs_panic_mask_proc_handler,
+               .extra1         = &xfs_params.panic_mask.min,
+               .extra2         = &xfs_params.panic_mask.max
+       },
+
+       {
+               .procname       = "error_level",
+               .data           = &xfs_params.error_level.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.error_level.min,
+               .extra2         = &xfs_params.error_level.max
+       },
+       {
+               .procname       = "xfssyncd_centisecs",
+               .data           = &xfs_params.syncd_timer.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.syncd_timer.min,
+               .extra2         = &xfs_params.syncd_timer.max
+       },
+       {
+               .procname       = "inherit_sync",
+               .data           = &xfs_params.inherit_sync.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.inherit_sync.min,
+               .extra2         = &xfs_params.inherit_sync.max
+       },
+       {
+               .procname       = "inherit_nodump",
+               .data           = &xfs_params.inherit_nodump.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.inherit_nodump.min,
+               .extra2         = &xfs_params.inherit_nodump.max
+       },
+       {
+               .procname       = "inherit_noatime",
+               .data           = &xfs_params.inherit_noatim.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.inherit_noatim.min,
+               .extra2         = &xfs_params.inherit_noatim.max
+       },
+       {
+               .procname       = "xfsbufd_centisecs",
+               .data           = &xfs_params.xfs_buf_timer.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.xfs_buf_timer.min,
+               .extra2         = &xfs_params.xfs_buf_timer.max
+       },
+       {
+               .procname       = "age_buffer_centisecs",
+               .data           = &xfs_params.xfs_buf_age.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.xfs_buf_age.min,
+               .extra2         = &xfs_params.xfs_buf_age.max
+       },
+       {
+               .procname       = "inherit_nosymlinks",
+               .data           = &xfs_params.inherit_nosym.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.inherit_nosym.min,
+               .extra2         = &xfs_params.inherit_nosym.max
+       },
+       {
+               .procname       = "rotorstep",
+               .data           = &xfs_params.rotorstep.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.rotorstep.min,
+               .extra2         = &xfs_params.rotorstep.max
+       },
+       {
+               .procname       = "inherit_nodefrag",
+               .data           = &xfs_params.inherit_nodfrg.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.inherit_nodfrg.min,
+               .extra2         = &xfs_params.inherit_nodfrg.max
+       },
+       {
+               .procname       = "filestream_centisecs",
+               .data           = &xfs_params.fstrm_timer.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &xfs_params.fstrm_timer.min,
+               .extra2         = &xfs_params.fstrm_timer.max,
+       },
+       /* please keep this the last entry */
+#ifdef CONFIG_PROC_FS
+       {
+               .procname       = "stats_clear",
+               .data           = &xfs_params.stats_clear.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = xfs_stats_clear_proc_handler,
+               .extra1         = &xfs_params.stats_clear.min,
+               .extra2         = &xfs_params.stats_clear.max
+       },
+#endif /* CONFIG_PROC_FS */
+
+       {}
+};
+
+static ctl_table xfs_dir_table[] = {
+       {
+               .procname       = "xfs",
+               .mode           = 0555,
+               .child          = xfs_table
+       },
+       {}
+};
+
+static ctl_table xfs_root_table[] = {
+       {
+               .procname       = "fs",
+               .mode           = 0555,
+               .child          = xfs_dir_table
+       },
+       {}
+};
+
+int
+xfs_sysctl_register(void)
+{
+       xfs_table_header = register_sysctl_table(xfs_root_table);
+       if (!xfs_table_header)
+               return -ENOMEM;
+       return 0;
+}
+
+void
+xfs_sysctl_unregister(void)
+{
+       unregister_sysctl_table(xfs_table_header);
+}
diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h

new file mode 100644 (file)

index 0000000..b9937d4
--- /dev/null
+++ b/fs/xfs/xfs_sysctl.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2001-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_SYSCTL_H__
+#define __XFS_SYSCTL_H__
+
+#include <linux/sysctl.h>
+
+/*
+ * Tunable xfs parameters
+ */
+
+typedef struct xfs_sysctl_val {
+       int min;
+       int val;
+       int max;
+} xfs_sysctl_val_t;
+
+typedef struct xfs_param {
+       xfs_sysctl_val_t sgid_inherit;  /* Inherit S_ISGID if process' GID is
+                                        * not a member of parent dir GID. */
+       xfs_sysctl_val_t symlink_mode;  /* Link creat mode affected by umask */
+       xfs_sysctl_val_t panic_mask;    /* bitmask to cause panic on errors. */
+       xfs_sysctl_val_t error_level;   /* Degree of reporting for problems  */
+       xfs_sysctl_val_t syncd_timer;   /* Interval between xfssyncd wakeups */
+       xfs_sysctl_val_t stats_clear;   /* Reset all XFS statistics to zero. */
+       xfs_sysctl_val_t inherit_sync;  /* Inherit the "sync" inode flag. */
+       xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */
+       xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */
+       xfs_sysctl_val_t xfs_buf_timer; /* Interval between xfsbufd wakeups. */
+       xfs_sysctl_val_t xfs_buf_age;   /* Metadata buffer age before flush. */
+       xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */
+       xfs_sysctl_val_t rotorstep;     /* inode32 AG rotoring control knob */
+       xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */
+       xfs_sysctl_val_t fstrm_timer;   /* Filestream dir-AG assoc'n timeout. */
+} xfs_param_t;
+
+/*
+ * xfs_error_level:
+ *
+ * How much error reporting will be done when internal problems are
+ * encountered.  These problems normally return an EFSCORRUPTED to their
+ * caller, with no other information reported.
+ *
+ * 0   No error reports
+ * 1   Report EFSCORRUPTED errors that will cause a filesystem shutdown
+ * 5   Report all EFSCORRUPTED errors (all of the above errors, plus any
+ *     additional errors that are known to not cause shutdowns)
+ *
+ * xfs_panic_mask bit 0x8 turns the error reports into panics
+ */
+
+enum {
+       /* XFS_REFCACHE_SIZE = 1 */
+       /* XFS_REFCACHE_PURGE = 2 */
+       /* XFS_RESTRICT_CHOWN = 3 */
+       XFS_SGID_INHERIT = 4,
+       XFS_SYMLINK_MODE = 5,
+       XFS_PANIC_MASK = 6,
+       XFS_ERRLEVEL = 7,
+       XFS_SYNCD_TIMER = 8,
+       /* XFS_PROBE_DMAPI = 9 */
+       /* XFS_PROBE_IOOPS = 10 */
+       /* XFS_PROBE_QUOTA = 11 */
+       XFS_STATS_CLEAR = 12,
+       XFS_INHERIT_SYNC = 13,
+       XFS_INHERIT_NODUMP = 14,
+       XFS_INHERIT_NOATIME = 15,
+       XFS_BUF_TIMER = 16,
+       XFS_BUF_AGE = 17,
+       /* XFS_IO_BYPASS = 18 */
+       XFS_INHERIT_NOSYM = 19,
+       XFS_ROTORSTEP = 20,
+       XFS_INHERIT_NODFRG = 21,
+       XFS_FILESTREAM_TIMER = 22,
+};
+
+extern xfs_param_t     xfs_params;
+
+#ifdef CONFIG_SYSCTL
+extern int xfs_sysctl_register(void);
+extern void xfs_sysctl_unregister(void);
+#else
+# define xfs_sysctl_register()         (0)
+# define xfs_sysctl_unregister()       do { } while (0)
+#endif /* CONFIG_SYSCTL */
+
+#endif /* __XFS_SYSCTL_H__ */
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c

new file mode 100644 (file)

index 0000000..9010ce8
--- /dev/null
+++ b/fs/xfs/xfs_trace.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2009, Christoph Hellwig
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_types.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_mount.h"
+#include "xfs_ialloc.h"
+#include "xfs_itable.h"
+#include "xfs_alloc.h"
+#include "xfs_bmap.h"
+#include "xfs_attr.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_log_priv.h"
+#include "xfs_buf_item.h"
+#include "xfs_quota.h"
+#include "xfs_iomap.h"
+#include "xfs_aops.h"
+#include "xfs_dquot_item.h"
+#include "xfs_dquot.h"
+#include "xfs_log_recover.h"
+#include "xfs_inode_item.h"
+
+/*
+ * We include this last to have the helpers above available for the trace
+ * event implementations.
+ */
+#define CREATE_TRACE_POINTS
+#include "xfs_trace.h"
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h

new file mode 100644 (file)

index 0000000..690fc7a
--- /dev/null
+++ b/fs/xfs/xfs_trace.h
@@ -0,0 +1,1746 @@
+/*
+ * Copyright (c) 2009, Christoph Hellwig
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM xfs
+
+#if !defined(_TRACE_XFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_XFS_H
+
+#include <linux/tracepoint.h>
+
+struct xfs_agf;
+struct xfs_alloc_arg;
+struct xfs_attr_list_context;
+struct xfs_buf_log_item;
+struct xfs_da_args;
+struct xfs_da_node_entry;
+struct xfs_dquot;
+struct xlog_ticket;
+struct log;
+struct xlog_recover;
+struct xlog_recover_item;
+struct xfs_buf_log_format;
+struct xfs_inode_log_format;
+
+DECLARE_EVENT_CLASS(xfs_attr_list_class,
+       TP_PROTO(struct xfs_attr_list_context *ctx),
+       TP_ARGS(ctx),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(u32, hashval)
+               __field(u32, blkno)
+               __field(u32, offset)
+               __field(void *, alist)
+               __field(int, bufsize)
+               __field(int, count)
+               __field(int, firstu)
+               __field(int, dupcnt)
+               __field(int, flags)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
+               __entry->ino = ctx->dp->i_ino;
+               __entry->hashval = ctx->cursor->hashval;
+               __entry->blkno = ctx->cursor->blkno;
+               __entry->offset = ctx->cursor->offset;
+               __entry->alist = ctx->alist;
+               __entry->bufsize = ctx->bufsize;
+               __entry->count = ctx->count;
+               __entry->firstu = ctx->firstu;
+               __entry->flags = ctx->flags;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
+                 "alist 0x%p size %u count %u firstu %u flags %d %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                  __entry->ino,
+                  __entry->hashval,
+                  __entry->blkno,
+                  __entry->offset,
+                  __entry->dupcnt,
+                  __entry->alist,
+                  __entry->bufsize,
+                  __entry->count,
+                  __entry->firstu,
+                  __entry->flags,
+                  __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS)
+       )
+)
+
+#define DEFINE_ATTR_LIST_EVENT(name) \
+DEFINE_EVENT(xfs_attr_list_class, name, \
+       TP_PROTO(struct xfs_attr_list_context *ctx), \
+       TP_ARGS(ctx))
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf_end);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_full);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk);
+DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
+
+DECLARE_EVENT_CLASS(xfs_perag_class,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,
+                unsigned long caller_ip),
+       TP_ARGS(mp, agno, refcount, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(int, refcount)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->refcount = refcount;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d agno %u refcount %d caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->refcount,
+                 (char *)__entry->caller_ip)
+);
+
+#define DEFINE_PERAG_REF_EVENT(name)   \
+DEFINE_EVENT(xfs_perag_class, name,    \
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,       \
+                unsigned long caller_ip),                                      \
+       TP_ARGS(mp, agno, refcount, caller_ip))
+DEFINE_PERAG_REF_EVENT(xfs_perag_get);
+DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
+DEFINE_PERAG_REF_EVENT(xfs_perag_put);
+DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
+DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
+
+TRACE_EVENT(xfs_attr_list_node_descend,
+       TP_PROTO(struct xfs_attr_list_context *ctx,
+                struct xfs_da_node_entry *btree),
+       TP_ARGS(ctx, btree),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(u32, hashval)
+               __field(u32, blkno)
+               __field(u32, offset)
+               __field(void *, alist)
+               __field(int, bufsize)
+               __field(int, count)
+               __field(int, firstu)
+               __field(int, dupcnt)
+               __field(int, flags)
+               __field(u32, bt_hashval)
+               __field(u32, bt_before)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
+               __entry->ino = ctx->dp->i_ino;
+               __entry->hashval = ctx->cursor->hashval;
+               __entry->blkno = ctx->cursor->blkno;
+               __entry->offset = ctx->cursor->offset;
+               __entry->alist = ctx->alist;
+               __entry->bufsize = ctx->bufsize;
+               __entry->count = ctx->count;
+               __entry->firstu = ctx->firstu;
+               __entry->flags = ctx->flags;
+               __entry->bt_hashval = be32_to_cpu(btree->hashval);
+               __entry->bt_before = be32_to_cpu(btree->before);
+       ),
+       TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
+                 "alist 0x%p size %u count %u firstu %u flags %d %s "
+                 "node hashval %u, node before %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                  __entry->ino,
+                  __entry->hashval,
+                  __entry->blkno,
+                  __entry->offset,
+                  __entry->dupcnt,
+                  __entry->alist,
+                  __entry->bufsize,
+                  __entry->count,
+                  __entry->firstu,
+                  __entry->flags,
+                  __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS),
+                  __entry->bt_hashval,
+                  __entry->bt_before)
+);
+
+TRACE_EVENT(xfs_iext_insert,
+       TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx,
+                struct xfs_bmbt_irec *r, int state, unsigned long caller_ip),
+       TP_ARGS(ip, idx, r, state, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_extnum_t, idx)
+               __field(xfs_fileoff_t, startoff)
+               __field(xfs_fsblock_t, startblock)
+               __field(xfs_filblks_t, blockcount)
+               __field(xfs_exntst_t, state)
+               __field(int, bmap_state)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->idx = idx;
+               __entry->startoff = r->br_startoff;
+               __entry->startblock = r->br_startblock;
+               __entry->blockcount = r->br_blockcount;
+               __entry->state = r->br_state;
+               __entry->bmap_state = state;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
+                 "offset %lld block %lld count %lld flag %d caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
+                 (long)__entry->idx,
+                 __entry->startoff,
+                 (__int64_t)__entry->startblock,
+                 __entry->blockcount,
+                 __entry->state,
+                 (char *)__entry->caller_ip)
+);
+
+DECLARE_EVENT_CLASS(xfs_bmap_class,
+       TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state,
+                unsigned long caller_ip),
+       TP_ARGS(ip, idx, state, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_extnum_t, idx)
+               __field(xfs_fileoff_t, startoff)
+               __field(xfs_fsblock_t, startblock)
+               __field(xfs_filblks_t, blockcount)
+               __field(xfs_exntst_t, state)
+               __field(int, bmap_state)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               struct xfs_ifork        *ifp = (state & BMAP_ATTRFORK) ?
+                                               ip->i_afp : &ip->i_df;
+               struct xfs_bmbt_irec    r;
+
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r);
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->idx = idx;
+               __entry->startoff = r.br_startoff;
+               __entry->startblock = r.br_startblock;
+               __entry->blockcount = r.br_blockcount;
+               __entry->state = r.br_state;
+               __entry->bmap_state = state;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
+                 "offset %lld block %lld count %lld flag %d caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
+                 (long)__entry->idx,
+                 __entry->startoff,
+                 (__int64_t)__entry->startblock,
+                 __entry->blockcount,
+                 __entry->state,
+                 (char *)__entry->caller_ip)
+)
+
+#define DEFINE_BMAP_EVENT(name) \
+DEFINE_EVENT(xfs_bmap_class, name, \
+       TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \
+                unsigned long caller_ip), \
+       TP_ARGS(ip, idx, state, caller_ip))
+DEFINE_BMAP_EVENT(xfs_iext_remove);
+DEFINE_BMAP_EVENT(xfs_bmap_pre_update);
+DEFINE_BMAP_EVENT(xfs_bmap_post_update);
+DEFINE_BMAP_EVENT(xfs_extlist);
+
+DECLARE_EVENT_CLASS(xfs_buf_class,
+       TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip),
+       TP_ARGS(bp, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_daddr_t, bno)
+               __field(size_t, buffer_length)
+               __field(int, hold)
+               __field(int, pincount)
+               __field(unsigned, lockval)
+               __field(unsigned, flags)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = bp->b_target->bt_dev;
+               __entry->bno = bp->b_bn;
+               __entry->buffer_length = bp->b_buffer_length;
+               __entry->hold = atomic_read(&bp->b_hold);
+               __entry->pincount = atomic_read(&bp->b_pin_count);
+               __entry->lockval = bp->b_sema.count;
+               __entry->flags = bp->b_flags;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
+                 "lock %d flags %s caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long long)__entry->bno,
+                 __entry->buffer_length,
+                 __entry->hold,
+                 __entry->pincount,
+                 __entry->lockval,
+                 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
+                 (void *)__entry->caller_ip)
+)
+
+#define DEFINE_BUF_EVENT(name) \
+DEFINE_EVENT(xfs_buf_class, name, \
+       TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \
+       TP_ARGS(bp, caller_ip))
+DEFINE_BUF_EVENT(xfs_buf_init);
+DEFINE_BUF_EVENT(xfs_buf_free);
+DEFINE_BUF_EVENT(xfs_buf_hold);
+DEFINE_BUF_EVENT(xfs_buf_rele);
+DEFINE_BUF_EVENT(xfs_buf_iodone);
+DEFINE_BUF_EVENT(xfs_buf_iorequest);
+DEFINE_BUF_EVENT(xfs_buf_bawrite);
+DEFINE_BUF_EVENT(xfs_buf_bdwrite);
+DEFINE_BUF_EVENT(xfs_buf_lock);
+DEFINE_BUF_EVENT(xfs_buf_lock_done);
+DEFINE_BUF_EVENT(xfs_buf_trylock);
+DEFINE_BUF_EVENT(xfs_buf_unlock);
+DEFINE_BUF_EVENT(xfs_buf_iowait);
+DEFINE_BUF_EVENT(xfs_buf_iowait_done);
+DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
+DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue);
+DEFINE_BUF_EVENT(xfs_buf_delwri_split);
+DEFINE_BUF_EVENT(xfs_buf_get_uncached);
+DEFINE_BUF_EVENT(xfs_bdstrat_shut);
+DEFINE_BUF_EVENT(xfs_buf_item_relse);
+DEFINE_BUF_EVENT(xfs_buf_item_iodone);
+DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
+DEFINE_BUF_EVENT(xfs_buf_error_relse);
+DEFINE_BUF_EVENT(xfs_trans_read_buf_io);
+DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
+
+/* not really buffer traces, but the buf provides useful information */
+DEFINE_BUF_EVENT(xfs_btree_corrupt);
+DEFINE_BUF_EVENT(xfs_da_btree_corrupt);
+DEFINE_BUF_EVENT(xfs_reset_dqcounts);
+DEFINE_BUF_EVENT(xfs_inode_item_push);
+
+/* pass flags explicitly */
+DECLARE_EVENT_CLASS(xfs_buf_flags_class,
+       TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip),
+       TP_ARGS(bp, flags, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_daddr_t, bno)
+               __field(size_t, buffer_length)
+               __field(int, hold)
+               __field(int, pincount)
+               __field(unsigned, lockval)
+               __field(unsigned, flags)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = bp->b_target->bt_dev;
+               __entry->bno = bp->b_bn;
+               __entry->buffer_length = bp->b_buffer_length;
+               __entry->flags = flags;
+               __entry->hold = atomic_read(&bp->b_hold);
+               __entry->pincount = atomic_read(&bp->b_pin_count);
+               __entry->lockval = bp->b_sema.count;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
+                 "lock %d flags %s caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long long)__entry->bno,
+                 __entry->buffer_length,
+                 __entry->hold,
+                 __entry->pincount,
+                 __entry->lockval,
+                 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
+                 (void *)__entry->caller_ip)
+)
+
+#define DEFINE_BUF_FLAGS_EVENT(name) \
+DEFINE_EVENT(xfs_buf_flags_class, name, \
+       TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \
+       TP_ARGS(bp, flags, caller_ip))
+DEFINE_BUF_FLAGS_EVENT(xfs_buf_find);
+DEFINE_BUF_FLAGS_EVENT(xfs_buf_get);
+DEFINE_BUF_FLAGS_EVENT(xfs_buf_read);
+
+TRACE_EVENT(xfs_buf_ioerror,
+       TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip),
+       TP_ARGS(bp, error, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_daddr_t, bno)
+               __field(size_t, buffer_length)
+               __field(unsigned, flags)
+               __field(int, hold)
+               __field(int, pincount)
+               __field(unsigned, lockval)
+               __field(int, error)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = bp->b_target->bt_dev;
+               __entry->bno = bp->b_bn;
+               __entry->buffer_length = bp->b_buffer_length;
+               __entry->hold = atomic_read(&bp->b_hold);
+               __entry->pincount = atomic_read(&bp->b_pin_count);
+               __entry->lockval = bp->b_sema.count;
+               __entry->error = error;
+               __entry->flags = bp->b_flags;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
+                 "lock %d error %d flags %s caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long long)__entry->bno,
+                 __entry->buffer_length,
+                 __entry->hold,
+                 __entry->pincount,
+                 __entry->lockval,
+                 __entry->error,
+                 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
+                 (void *)__entry->caller_ip)
+);
+
+DECLARE_EVENT_CLASS(xfs_buf_item_class,
+       TP_PROTO(struct xfs_buf_log_item *bip),
+       TP_ARGS(bip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_daddr_t, buf_bno)
+               __field(size_t, buf_len)
+               __field(int, buf_hold)
+               __field(int, buf_pincount)
+               __field(int, buf_lockval)
+               __field(unsigned, buf_flags)
+               __field(unsigned, bli_recur)
+               __field(int, bli_refcount)
+               __field(unsigned, bli_flags)
+               __field(void *, li_desc)
+               __field(unsigned, li_flags)
+       ),
+       TP_fast_assign(
+               __entry->dev = bip->bli_buf->b_target->bt_dev;
+               __entry->bli_flags = bip->bli_flags;
+               __entry->bli_recur = bip->bli_recur;
+               __entry->bli_refcount = atomic_read(&bip->bli_refcount);
+               __entry->buf_bno = bip->bli_buf->b_bn;
+               __entry->buf_len = bip->bli_buf->b_buffer_length;
+               __entry->buf_flags = bip->bli_buf->b_flags;
+               __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
+               __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
+               __entry->buf_lockval = bip->bli_buf->b_sema.count;
+               __entry->li_desc = bip->bli_item.li_desc;
+               __entry->li_flags = bip->bli_item.li_flags;
+       ),
+       TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
+                 "lock %d flags %s recur %d refcount %d bliflags %s "
+                 "lidesc 0x%p liflags %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long long)__entry->buf_bno,
+                 __entry->buf_len,
+                 __entry->buf_hold,
+                 __entry->buf_pincount,
+                 __entry->buf_lockval,
+                 __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS),
+                 __entry->bli_recur,
+                 __entry->bli_refcount,
+                 __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS),
+                 __entry->li_desc,
+                 __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS))
+)
+
+#define DEFINE_BUF_ITEM_EVENT(name) \
+DEFINE_EVENT(xfs_buf_item_class, name, \
+       TP_PROTO(struct xfs_buf_log_item *bip), \
+       TP_ARGS(bip))
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb_recur);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
+DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
+
+DECLARE_EVENT_CLASS(xfs_lock_class,
+       TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,
+                unsigned long caller_ip),
+       TP_ARGS(ip,  lock_flags, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(int, lock_flags)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->lock_flags = lock_flags;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
+                 (void *)__entry->caller_ip)
+)
+
+#define DEFINE_LOCK_EVENT(name) \
+DEFINE_EVENT(xfs_lock_class, name, \
+       TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \
+                unsigned long caller_ip), \
+       TP_ARGS(ip,  lock_flags, caller_ip))
+DEFINE_LOCK_EVENT(xfs_ilock);
+DEFINE_LOCK_EVENT(xfs_ilock_nowait);
+DEFINE_LOCK_EVENT(xfs_ilock_demote);
+DEFINE_LOCK_EVENT(xfs_iunlock);
+
+DECLARE_EVENT_CLASS(xfs_inode_class,
+       TP_PROTO(struct xfs_inode *ip),
+       TP_ARGS(ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino)
+)
+
+#define DEFINE_INODE_EVENT(name) \
+DEFINE_EVENT(xfs_inode_class, name, \
+       TP_PROTO(struct xfs_inode *ip), \
+       TP_ARGS(ip))
+DEFINE_INODE_EVENT(xfs_iget_skip);
+DEFINE_INODE_EVENT(xfs_iget_reclaim);
+DEFINE_INODE_EVENT(xfs_iget_reclaim_fail);
+DEFINE_INODE_EVENT(xfs_iget_hit);
+DEFINE_INODE_EVENT(xfs_iget_miss);
+
+DEFINE_INODE_EVENT(xfs_getattr);
+DEFINE_INODE_EVENT(xfs_setattr);
+DEFINE_INODE_EVENT(xfs_readlink);
+DEFINE_INODE_EVENT(xfs_alloc_file_space);
+DEFINE_INODE_EVENT(xfs_free_file_space);
+DEFINE_INODE_EVENT(xfs_readdir);
+#ifdef CONFIG_XFS_POSIX_ACL
+DEFINE_INODE_EVENT(xfs_get_acl);
+#endif
+DEFINE_INODE_EVENT(xfs_vm_bmap);
+DEFINE_INODE_EVENT(xfs_file_ioctl);
+DEFINE_INODE_EVENT(xfs_file_compat_ioctl);
+DEFINE_INODE_EVENT(xfs_ioctl_setattr);
+DEFINE_INODE_EVENT(xfs_file_fsync);
+DEFINE_INODE_EVENT(xfs_destroy_inode);
+DEFINE_INODE_EVENT(xfs_write_inode);
+DEFINE_INODE_EVENT(xfs_evict_inode);
+
+DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
+DEFINE_INODE_EVENT(xfs_dquot_dqdetach);
+
+DECLARE_EVENT_CLASS(xfs_iref_class,
+       TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
+       TP_ARGS(ip, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(int, count)
+               __field(int, pincount)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->count = atomic_read(&VFS_I(ip)->i_count);
+               __entry->pincount = atomic_read(&ip->i_pincount);
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->count,
+                 __entry->pincount,
+                 (char *)__entry->caller_ip)
+)
+
+#define DEFINE_IREF_EVENT(name) \
+DEFINE_EVENT(xfs_iref_class, name, \
+       TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
+       TP_ARGS(ip, caller_ip))
+DEFINE_IREF_EVENT(xfs_ihold);
+DEFINE_IREF_EVENT(xfs_irele);
+DEFINE_IREF_EVENT(xfs_inode_pin);
+DEFINE_IREF_EVENT(xfs_inode_unpin);
+DEFINE_IREF_EVENT(xfs_inode_unpin_nowait);
+
+DECLARE_EVENT_CLASS(xfs_namespace_class,
+       TP_PROTO(struct xfs_inode *dp, struct xfs_name *name),
+       TP_ARGS(dp, name),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, dp_ino)
+               __dynamic_array(char, name, name->len)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(dp)->i_sb->s_dev;
+               __entry->dp_ino = dp->i_ino;
+               memcpy(__get_str(name), name->name, name->len);
+       ),
+       TP_printk("dev %d:%d dp ino 0x%llx name %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->dp_ino,
+                 __get_str(name))
+)
+
+#define DEFINE_NAMESPACE_EVENT(name) \
+DEFINE_EVENT(xfs_namespace_class, name, \
+       TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), \
+       TP_ARGS(dp, name))
+DEFINE_NAMESPACE_EVENT(xfs_remove);
+DEFINE_NAMESPACE_EVENT(xfs_link);
+DEFINE_NAMESPACE_EVENT(xfs_lookup);
+DEFINE_NAMESPACE_EVENT(xfs_create);
+DEFINE_NAMESPACE_EVENT(xfs_symlink);
+
+TRACE_EVENT(xfs_rename,
+       TP_PROTO(struct xfs_inode *src_dp, struct xfs_inode *target_dp,
+                struct xfs_name *src_name, struct xfs_name *target_name),
+       TP_ARGS(src_dp, target_dp, src_name, target_name),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, src_dp_ino)
+               __field(xfs_ino_t, target_dp_ino)
+               __dynamic_array(char, src_name, src_name->len)
+               __dynamic_array(char, target_name, target_name->len)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(src_dp)->i_sb->s_dev;
+               __entry->src_dp_ino = src_dp->i_ino;
+               __entry->target_dp_ino = target_dp->i_ino;
+               memcpy(__get_str(src_name), src_name->name, src_name->len);
+               memcpy(__get_str(target_name), target_name->name, target_name->len);
+       ),
+       TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx"
+                 " src name %s target name %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->src_dp_ino,
+                 __entry->target_dp_ino,
+                 __get_str(src_name),
+                 __get_str(target_name))
+)
+
+DECLARE_EVENT_CLASS(xfs_dquot_class,
+       TP_PROTO(struct xfs_dquot *dqp),
+       TP_ARGS(dqp),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(u32, id)
+               __field(unsigned, flags)
+               __field(unsigned, nrefs)
+               __field(unsigned long long, res_bcount)
+               __field(unsigned long long, bcount)
+               __field(unsigned long long, icount)
+               __field(unsigned long long, blk_hardlimit)
+               __field(unsigned long long, blk_softlimit)
+               __field(unsigned long long, ino_hardlimit)
+               __field(unsigned long long, ino_softlimit)
+       ), \
+       TP_fast_assign(
+               __entry->dev = dqp->q_mount->m_super->s_dev;
+               __entry->id = be32_to_cpu(dqp->q_core.d_id);
+               __entry->flags = dqp->dq_flags;
+               __entry->nrefs = dqp->q_nrefs;
+               __entry->res_bcount = dqp->q_res_bcount;
+               __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount);
+               __entry->icount = be64_to_cpu(dqp->q_core.d_icount);
+               __entry->blk_hardlimit =
+                       be64_to_cpu(dqp->q_core.d_blk_hardlimit);
+               __entry->blk_softlimit =
+                       be64_to_cpu(dqp->q_core.d_blk_softlimit);
+               __entry->ino_hardlimit =
+                       be64_to_cpu(dqp->q_core.d_ino_hardlimit);
+               __entry->ino_softlimit =
+                       be64_to_cpu(dqp->q_core.d_ino_softlimit);
+       ),
+       TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx "
+                 "bcnt 0x%llx bhardlimit 0x%llx bsoftlimit 0x%llx "
+                 "icnt 0x%llx ihardlimit 0x%llx isoftlimit 0x%llx]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->id,
+                 __print_flags(__entry->flags, "|", XFS_DQ_FLAGS),
+                 __entry->nrefs,
+                 __entry->res_bcount,
+                 __entry->bcount,
+                 __entry->blk_hardlimit,
+                 __entry->blk_softlimit,
+                 __entry->icount,
+                 __entry->ino_hardlimit,
+                 __entry->ino_softlimit)
+)
+
+#define DEFINE_DQUOT_EVENT(name) \
+DEFINE_EVENT(xfs_dquot_class, name, \
+       TP_PROTO(struct xfs_dquot *dqp), \
+       TP_ARGS(dqp))
+DEFINE_DQUOT_EVENT(xfs_dqadjust);
+DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
+DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
+DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink);
+DEFINE_DQUOT_EVENT(xfs_dqattach_found);
+DEFINE_DQUOT_EVENT(xfs_dqattach_get);
+DEFINE_DQUOT_EVENT(xfs_dqinit);
+DEFINE_DQUOT_EVENT(xfs_dqreuse);
+DEFINE_DQUOT_EVENT(xfs_dqalloc);
+DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
+DEFINE_DQUOT_EVENT(xfs_dqread);
+DEFINE_DQUOT_EVENT(xfs_dqread_fail);
+DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
+DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
+DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
+DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
+DEFINE_DQUOT_EVENT(xfs_dqget_hit);
+DEFINE_DQUOT_EVENT(xfs_dqget_miss);
+DEFINE_DQUOT_EVENT(xfs_dqput);
+DEFINE_DQUOT_EVENT(xfs_dqput_wait);
+DEFINE_DQUOT_EVENT(xfs_dqput_free);
+DEFINE_DQUOT_EVENT(xfs_dqrele);
+DEFINE_DQUOT_EVENT(xfs_dqflush);
+DEFINE_DQUOT_EVENT(xfs_dqflush_force);
+DEFINE_DQUOT_EVENT(xfs_dqflush_done);
+
+DECLARE_EVENT_CLASS(xfs_loggrant_class,
+       TP_PROTO(struct log *log, struct xlog_ticket *tic),
+       TP_ARGS(log, tic),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(unsigned, trans_type)
+               __field(char, ocnt)
+               __field(char, cnt)
+               __field(int, curr_res)
+               __field(int, unit_res)
+               __field(unsigned int, flags)
+               __field(int, reserveq)
+               __field(int, writeq)
+               __field(int, grant_reserve_cycle)
+               __field(int, grant_reserve_bytes)
+               __field(int, grant_write_cycle)
+               __field(int, grant_write_bytes)
+               __field(int, curr_cycle)
+               __field(int, curr_block)
+               __field(xfs_lsn_t, tail_lsn)
+       ),
+       TP_fast_assign(
+               __entry->dev = log->l_mp->m_super->s_dev;
+               __entry->trans_type = tic->t_trans_type;
+               __entry->ocnt = tic->t_ocnt;
+               __entry->cnt = tic->t_cnt;
+               __entry->curr_res = tic->t_curr_res;
+               __entry->unit_res = tic->t_unit_res;
+               __entry->flags = tic->t_flags;
+               __entry->reserveq = list_empty(&log->l_reserveq);
+               __entry->writeq = list_empty(&log->l_writeq);
+               xlog_crack_grant_head(&log->l_grant_reserve_head,
+                               &__entry->grant_reserve_cycle,
+                               &__entry->grant_reserve_bytes);
+               xlog_crack_grant_head(&log->l_grant_write_head,
+                               &__entry->grant_write_cycle,
+                               &__entry->grant_write_bytes);
+               __entry->curr_cycle = log->l_curr_cycle;
+               __entry->curr_block = log->l_curr_block;
+               __entry->tail_lsn = atomic64_read(&log->l_tail_lsn);
+       ),
+       TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
+                 "t_unit_res %u t_flags %s reserveq %s "
+                 "writeq %s grant_reserve_cycle %d "
+                 "grant_reserve_bytes %d grant_write_cycle %d "
+                 "grant_write_bytes %d curr_cycle %d curr_block %d "
+                 "tail_cycle %d tail_block %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES),
+                 __entry->ocnt,
+                 __entry->cnt,
+                 __entry->curr_res,
+                 __entry->unit_res,
+                 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
+                 __entry->reserveq ? "empty" : "active",
+                 __entry->writeq ? "empty" : "active",
+                 __entry->grant_reserve_cycle,
+                 __entry->grant_reserve_bytes,
+                 __entry->grant_write_cycle,
+                 __entry->grant_write_bytes,
+                 __entry->curr_cycle,
+                 __entry->curr_block,
+                 CYCLE_LSN(__entry->tail_lsn),
+                 BLOCK_LSN(__entry->tail_lsn)
+       )
+)
+
+#define DEFINE_LOGGRANT_EVENT(name) \
+DEFINE_EVENT(xfs_loggrant_class, name, \
+       TP_PROTO(struct log *log, struct xlog_ticket *tic), \
+       TP_ARGS(log, tic))
+DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
+DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
+DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
+DEFINE_LOGGRANT_EVENT(xfs_log_umount_write);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_error);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
+DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter);
+DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit);
+DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub);
+
+DECLARE_EVENT_CLASS(xfs_file_class,
+       TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),
+       TP_ARGS(ip, count, offset, flags),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_fsize_t, size)
+               __field(xfs_fsize_t, new_size)
+               __field(loff_t, offset)
+               __field(size_t, count)
+               __field(int, flags)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->size = ip->i_d.di_size;
+               __entry->new_size = ip->i_new_size;
+               __entry->offset = offset;
+               __entry->count = count;
+               __entry->flags = flags;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+                 "offset 0x%llx count 0x%zx ioflags %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->size,
+                 __entry->new_size,
+                 __entry->offset,
+                 __entry->count,
+                 __print_flags(__entry->flags, "|", XFS_IO_FLAGS))
+)
+
+#define DEFINE_RW_EVENT(name)          \
+DEFINE_EVENT(xfs_file_class, name,     \
+       TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \
+       TP_ARGS(ip, count, offset, flags))
+DEFINE_RW_EVENT(xfs_file_read);
+DEFINE_RW_EVENT(xfs_file_buffered_write);
+DEFINE_RW_EVENT(xfs_file_direct_write);
+DEFINE_RW_EVENT(xfs_file_splice_read);
+DEFINE_RW_EVENT(xfs_file_splice_write);
+
+DECLARE_EVENT_CLASS(xfs_page_class,
+       TP_PROTO(struct inode *inode, struct page *page, unsigned long off),
+       TP_ARGS(inode, page, off),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(pgoff_t, pgoff)
+               __field(loff_t, size)
+               __field(unsigned long, offset)
+               __field(int, delalloc)
+               __field(int, unwritten)
+       ),
+       TP_fast_assign(
+               int delalloc = -1, unwritten = -1;
+
+               if (page_has_buffers(page))
+                       xfs_count_page_state(page, &delalloc, &unwritten);
+               __entry->dev = inode->i_sb->s_dev;
+               __entry->ino = XFS_I(inode)->i_ino;
+               __entry->pgoff = page_offset(page);
+               __entry->size = i_size_read(inode);
+               __entry->offset = off;
+               __entry->delalloc = delalloc;
+               __entry->unwritten = unwritten;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
+                 "delalloc %d unwritten %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->pgoff,
+                 __entry->size,
+                 __entry->offset,
+                 __entry->delalloc,
+                 __entry->unwritten)
+)
+
+#define DEFINE_PAGE_EVENT(name)                \
+DEFINE_EVENT(xfs_page_class, name,     \
+       TP_PROTO(struct inode *inode, struct page *page, unsigned long off),    \
+       TP_ARGS(inode, page, off))
+DEFINE_PAGE_EVENT(xfs_writepage);
+DEFINE_PAGE_EVENT(xfs_releasepage);
+DEFINE_PAGE_EVENT(xfs_invalidatepage);
+
+DECLARE_EVENT_CLASS(xfs_imap_class,
+       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
+                int type, struct xfs_bmbt_irec *irec),
+       TP_ARGS(ip, offset, count, type, irec),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(loff_t, size)
+               __field(loff_t, new_size)
+               __field(loff_t, offset)
+               __field(size_t, count)
+               __field(int, type)
+               __field(xfs_fileoff_t, startoff)
+               __field(xfs_fsblock_t, startblock)
+               __field(xfs_filblks_t, blockcount)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->size = ip->i_d.di_size;
+               __entry->new_size = ip->i_new_size;
+               __entry->offset = offset;
+               __entry->count = count;
+               __entry->type = type;
+               __entry->startoff = irec ? irec->br_startoff : 0;
+               __entry->startblock = irec ? irec->br_startblock : 0;
+               __entry->blockcount = irec ? irec->br_blockcount : 0;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+                 "offset 0x%llx count %zd type %s "
+                 "startoff 0x%llx startblock %lld blockcount 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->size,
+                 __entry->new_size,
+                 __entry->offset,
+                 __entry->count,
+                 __print_symbolic(__entry->type, XFS_IO_TYPES),
+                 __entry->startoff,
+                 (__int64_t)__entry->startblock,
+                 __entry->blockcount)
+)
+
+#define DEFINE_IOMAP_EVENT(name)       \
+DEFINE_EVENT(xfs_imap_class, name,     \
+       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
+                int type, struct xfs_bmbt_irec *irec),         \
+       TP_ARGS(ip, offset, count, type, irec))
+DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
+DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
+DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
+DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
+
+DECLARE_EVENT_CLASS(xfs_simple_io_class,
+       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
+       TP_ARGS(ip, offset, count),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(loff_t, isize)
+               __field(loff_t, disize)
+               __field(loff_t, new_size)
+               __field(loff_t, offset)
+               __field(size_t, count)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->isize = ip->i_size;
+               __entry->disize = ip->i_d.di_size;
+               __entry->new_size = ip->i_new_size;
+               __entry->offset = offset;
+               __entry->count = count;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx "
+                 "offset 0x%llx count %zd",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->isize,
+                 __entry->disize,
+                 __entry->new_size,
+                 __entry->offset,
+                 __entry->count)
+);
+
+#define DEFINE_SIMPLE_IO_EVENT(name)   \
+DEFINE_EVENT(xfs_simple_io_class, name,        \
+       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),        \
+       TP_ARGS(ip, offset, count))
+DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
+DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
+DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
+DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
+
+DECLARE_EVENT_CLASS(xfs_itrunc_class,
+       TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
+       TP_ARGS(ip, new_size),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_fsize_t, size)
+               __field(xfs_fsize_t, new_size)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->size = ip->i_d.di_size;
+               __entry->new_size = new_size;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->size,
+                 __entry->new_size)
+)
+
+#define DEFINE_ITRUNC_EVENT(name) \
+DEFINE_EVENT(xfs_itrunc_class, name, \
+       TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
+       TP_ARGS(ip, new_size))
+DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start);
+DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end);
+
+TRACE_EVENT(xfs_pagecache_inval,
+       TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
+       TP_ARGS(ip, start, finish),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_fsize_t, size)
+               __field(xfs_off_t, start)
+               __field(xfs_off_t, finish)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->size = ip->i_d.di_size;
+               __entry->start = start;
+               __entry->finish = finish;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx size 0x%llx start 0x%llx finish 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->size,
+                 __entry->start,
+                 __entry->finish)
+);
+
+TRACE_EVENT(xfs_bunmap,
+       TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len,
+                int flags, unsigned long caller_ip),
+       TP_ARGS(ip, bno, len, flags, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_fsize_t, size)
+               __field(xfs_fileoff_t, bno)
+               __field(xfs_filblks_t, len)
+               __field(unsigned long, caller_ip)
+               __field(int, flags)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->size = ip->i_d.di_size;
+               __entry->bno = bno;
+               __entry->len = len;
+               __entry->caller_ip = caller_ip;
+               __entry->flags = flags;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
+                 "flags %s caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->size,
+                 __entry->bno,
+                 __entry->len,
+                 __print_flags(__entry->flags, "|", XFS_BMAPI_FLAGS),
+                 (void *)__entry->caller_ip)
+
+);
+
+DECLARE_EVENT_CLASS(xfs_busy_class,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                xfs_agblock_t agbno, xfs_extlen_t len),
+       TP_ARGS(mp, agno, agbno, len),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, agbno)
+               __field(xfs_extlen_t, len)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->agbno = agbno;
+               __entry->len = len;
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u len %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->agbno,
+                 __entry->len)
+);
+#define DEFINE_BUSY_EVENT(name) \
+DEFINE_EVENT(xfs_busy_class, name, \
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+                xfs_agblock_t agbno, xfs_extlen_t len), \
+       TP_ARGS(mp, agno, agbno, len))
+DEFINE_BUSY_EVENT(xfs_alloc_busy);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_force);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse);
+DEFINE_BUSY_EVENT(xfs_alloc_busy_clear);
+
+TRACE_EVENT(xfs_alloc_busy_trim,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                xfs_agblock_t agbno, xfs_extlen_t len,
+                xfs_agblock_t tbno, xfs_extlen_t tlen),
+       TP_ARGS(mp, agno, agbno, len, tbno, tlen),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, agbno)
+               __field(xfs_extlen_t, len)
+               __field(xfs_agblock_t, tbno)
+               __field(xfs_extlen_t, tlen)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->agbno = agbno;
+               __entry->len = len;
+               __entry->tbno = tbno;
+               __entry->tlen = tlen;
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->agbno,
+                 __entry->len,
+                 __entry->tbno,
+                 __entry->tlen)
+);
+
+TRACE_EVENT(xfs_trans_commit_lsn,
+       TP_PROTO(struct xfs_trans *trans),
+       TP_ARGS(trans),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(struct xfs_trans *, tp)
+               __field(xfs_lsn_t, lsn)
+       ),
+       TP_fast_assign(
+               __entry->dev = trans->t_mountp->m_super->s_dev;
+               __entry->tp = trans;
+               __entry->lsn = trans->t_commit_lsn;
+       ),
+       TP_printk("dev %d:%d trans 0x%p commit_lsn 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->tp,
+                 __entry->lsn)
+);
+
+TRACE_EVENT(xfs_agf,
+       TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
+                unsigned long caller_ip),
+       TP_ARGS(mp, agf, flags, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(int, flags)
+               __field(__u32, length)
+               __field(__u32, bno_root)
+               __field(__u32, cnt_root)
+               __field(__u32, bno_level)
+               __field(__u32, cnt_level)
+               __field(__u32, flfirst)
+               __field(__u32, fllast)
+               __field(__u32, flcount)
+               __field(__u32, freeblks)
+               __field(__u32, longest)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = be32_to_cpu(agf->agf_seqno),
+               __entry->flags = flags;
+               __entry->length = be32_to_cpu(agf->agf_length),
+               __entry->bno_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]),
+               __entry->cnt_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
+               __entry->bno_level =
+                               be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
+               __entry->cnt_level =
+                               be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
+               __entry->flfirst = be32_to_cpu(agf->agf_flfirst),
+               __entry->fllast = be32_to_cpu(agf->agf_fllast),
+               __entry->flcount = be32_to_cpu(agf->agf_flcount),
+               __entry->freeblks = be32_to_cpu(agf->agf_freeblks),
+               __entry->longest = be32_to_cpu(agf->agf_longest);
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
+                 "levels b %u c %u flfirst %u fllast %u flcount %u "
+                 "freeblks %u longest %u caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __print_flags(__entry->flags, "|", XFS_AGF_FLAGS),
+                 __entry->length,
+                 __entry->bno_root,
+                 __entry->cnt_root,
+                 __entry->bno_level,
+                 __entry->cnt_level,
+                 __entry->flfirst,
+                 __entry->fllast,
+                 __entry->flcount,
+                 __entry->freeblks,
+                 __entry->longest,
+                 (void *)__entry->caller_ip)
+);
+
+TRACE_EVENT(xfs_free_extent,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
+                xfs_extlen_t len, bool isfl, int haveleft, int haveright),
+       TP_ARGS(mp, agno, agbno, len, isfl, haveleft, haveright),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, agbno)
+               __field(xfs_extlen_t, len)
+               __field(int, isfl)
+               __field(int, haveleft)
+               __field(int, haveright)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->agbno = agbno;
+               __entry->len = len;
+               __entry->isfl = isfl;
+               __entry->haveleft = haveleft;
+               __entry->haveright = haveright;
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u len %u isfl %d %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->agbno,
+                 __entry->len,
+                 __entry->isfl,
+                 __entry->haveleft ?
+                       (__entry->haveright ? "both" : "left") :
+                       (__entry->haveright ? "right" : "none"))
+
+);
+
+DECLARE_EVENT_CLASS(xfs_alloc_class,
+       TP_PROTO(struct xfs_alloc_arg *args),
+       TP_ARGS(args),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, agbno)
+               __field(xfs_extlen_t, minlen)
+               __field(xfs_extlen_t, maxlen)
+               __field(xfs_extlen_t, mod)
+               __field(xfs_extlen_t, prod)
+               __field(xfs_extlen_t, minleft)
+               __field(xfs_extlen_t, total)
+               __field(xfs_extlen_t, alignment)
+               __field(xfs_extlen_t, minalignslop)
+               __field(xfs_extlen_t, len)
+               __field(short, type)
+               __field(short, otype)
+               __field(char, wasdel)
+               __field(char, wasfromfl)
+               __field(char, isfl)
+               __field(char, userdata)
+               __field(xfs_fsblock_t, firstblock)
+       ),
+       TP_fast_assign(
+               __entry->dev = args->mp->m_super->s_dev;
+               __entry->agno = args->agno;
+               __entry->agbno = args->agbno;
+               __entry->minlen = args->minlen;
+               __entry->maxlen = args->maxlen;
+               __entry->mod = args->mod;
+               __entry->prod = args->prod;
+               __entry->minleft = args->minleft;
+               __entry->total = args->total;
+               __entry->alignment = args->alignment;
+               __entry->minalignslop = args->minalignslop;
+               __entry->len = args->len;
+               __entry->type = args->type;
+               __entry->otype = args->otype;
+               __entry->wasdel = args->wasdel;
+               __entry->wasfromfl = args->wasfromfl;
+               __entry->isfl = args->isfl;
+               __entry->userdata = args->userdata;
+               __entry->firstblock = args->firstblock;
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
+                 "prod %u minleft %u total %u alignment %u minalignslop %u "
+                 "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d "
+                 "userdata %d firstblock 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->agbno,
+                 __entry->minlen,
+                 __entry->maxlen,
+                 __entry->mod,
+                 __entry->prod,
+                 __entry->minleft,
+                 __entry->total,
+                 __entry->alignment,
+                 __entry->minalignslop,
+                 __entry->len,
+                 __print_symbolic(__entry->type, XFS_ALLOC_TYPES),
+                 __print_symbolic(__entry->otype, XFS_ALLOC_TYPES),
+                 __entry->wasdel,
+                 __entry->wasfromfl,
+                 __entry->isfl,
+                 __entry->userdata,
+                 (unsigned long long)__entry->firstblock)
+)
+
+#define DEFINE_ALLOC_EVENT(name) \
+DEFINE_EVENT(xfs_alloc_class, name, \
+       TP_PROTO(struct xfs_alloc_arg *args), \
+       TP_ARGS(args))
+DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
+DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound);
+DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry);
+DEFINE_ALLOC_EVENT(xfs_alloc_near_busy);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_neither);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_done);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_error);
+DEFINE_ALLOC_EVENT(xfs_alloc_size_busy);
+DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist);
+DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
+DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
+DEFINE_ALLOC_EVENT(xfs_alloc_small_error);
+DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs);
+DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix);
+DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
+DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
+DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed);
+
+DECLARE_EVENT_CLASS(xfs_dir2_class,
+       TP_PROTO(struct xfs_da_args *args),
+       TP_ARGS(args),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __dynamic_array(char, name, args->namelen)
+               __field(int, namelen)
+               __field(xfs_dahash_t, hashval)
+               __field(xfs_ino_t, inumber)
+               __field(int, op_flags)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
+               __entry->ino = args->dp->i_ino;
+               if (args->namelen)
+                       memcpy(__get_str(name), args->name, args->namelen);
+               __entry->namelen = args->namelen;
+               __entry->hashval = args->hashval;
+               __entry->inumber = args->inumber;
+               __entry->op_flags = args->op_flags;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x "
+                 "inumber 0x%llx op_flags %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->namelen,
+                 __entry->namelen ? __get_str(name) : NULL,
+                 __entry->namelen,
+                 __entry->hashval,
+                 __entry->inumber,
+                 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS))
+)
+
+#define DEFINE_DIR2_EVENT(name) \
+DEFINE_EVENT(xfs_dir2_class, name, \
+       TP_PROTO(struct xfs_da_args *args), \
+       TP_ARGS(args))
+DEFINE_DIR2_EVENT(xfs_dir2_sf_addname);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_create);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_replace);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_removename);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block);
+DEFINE_DIR2_EVENT(xfs_dir2_block_addname);
+DEFINE_DIR2_EVENT(xfs_dir2_block_lookup);
+DEFINE_DIR2_EVENT(xfs_dir2_block_replace);
+DEFINE_DIR2_EVENT(xfs_dir2_block_removename);
+DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf);
+DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node);
+DEFINE_DIR2_EVENT(xfs_dir2_node_addname);
+DEFINE_DIR2_EVENT(xfs_dir2_node_lookup);
+DEFINE_DIR2_EVENT(xfs_dir2_node_replace);
+DEFINE_DIR2_EVENT(xfs_dir2_node_removename);
+DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf);
+
+DECLARE_EVENT_CLASS(xfs_dir2_space_class,
+       TP_PROTO(struct xfs_da_args *args, int idx),
+       TP_ARGS(args, idx),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(int, op_flags)
+               __field(int, idx)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
+               __entry->ino = args->dp->i_ino;
+               __entry->op_flags = args->op_flags;
+               __entry->idx = idx;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
+                 __entry->idx)
+)
+
+#define DEFINE_DIR2_SPACE_EVENT(name) \
+DEFINE_EVENT(xfs_dir2_space_class, name, \
+       TP_PROTO(struct xfs_da_args *args, int idx), \
+       TP_ARGS(args, idx))
+DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add);
+DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove);
+DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode);
+DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode);
+
+TRACE_EVENT(xfs_dir2_leafn_moveents,
+       TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count),
+       TP_ARGS(args, src_idx, dst_idx, count),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(int, op_flags)
+               __field(int, src_idx)
+               __field(int, dst_idx)
+               __field(int, count)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
+               __entry->ino = args->dp->i_ino;
+               __entry->op_flags = args->op_flags;
+               __entry->src_idx = src_idx;
+               __entry->dst_idx = dst_idx;
+               __entry->count = count;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx op_flags %s "
+                 "src_idx %d dst_idx %d count %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
+                 __entry->src_idx,
+                 __entry->dst_idx,
+                 __entry->count)
+);
+
+#define XFS_SWAPEXT_INODES \
+       { 0,    "target" }, \
+       { 1,    "temp" }
+
+#define XFS_INODE_FORMAT_STR \
+       { 0,    "invalid" }, \
+       { 1,    "local" }, \
+       { 2,    "extent" }, \
+       { 3,    "btree" }
+
+DECLARE_EVENT_CLASS(xfs_swap_extent_class,
+       TP_PROTO(struct xfs_inode *ip, int which),
+       TP_ARGS(ip, which),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(int, which)
+               __field(xfs_ino_t, ino)
+               __field(int, format)
+               __field(int, nex)
+               __field(int, max_nex)
+               __field(int, broot_size)
+               __field(int, fork_off)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->which = which;
+               __entry->ino = ip->i_ino;
+               __entry->format = ip->i_d.di_format;
+               __entry->nex = ip->i_d.di_nextents;
+               __entry->max_nex = ip->i_df.if_ext_max;
+               __entry->broot_size = ip->i_df.if_broot_bytes;
+               __entry->fork_off = XFS_IFORK_BOFF(ip);
+       ),
+       TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, "
+                 "Max in-fork extents %d, broot size %d, fork offset %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __print_symbolic(__entry->which, XFS_SWAPEXT_INODES),
+                 __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR),
+                 __entry->nex,
+                 __entry->max_nex,
+                 __entry->broot_size,
+                 __entry->fork_off)
+)
+
+#define DEFINE_SWAPEXT_EVENT(name) \
+DEFINE_EVENT(xfs_swap_extent_class, name, \
+       TP_PROTO(struct xfs_inode *ip, int which), \
+       TP_ARGS(ip, which))
+
+DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
+DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
+
+DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
+       TP_PROTO(struct log *log, struct xlog_recover *trans,
+               struct xlog_recover_item *item, int pass),
+       TP_ARGS(log, trans, item, pass),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(unsigned long, item)
+               __field(xlog_tid_t, tid)
+               __field(int, type)
+               __field(int, pass)
+               __field(int, count)
+               __field(int, total)
+       ),
+       TP_fast_assign(
+               __entry->dev = log->l_mp->m_super->s_dev;
+               __entry->item = (unsigned long)item;
+               __entry->tid = trans->r_log_tid;
+               __entry->type = ITEM_TYPE(item);
+               __entry->pass = pass;
+               __entry->count = item->ri_cnt;
+               __entry->total = item->ri_total;
+       ),
+       TP_printk("dev %d:%d trans 0x%x, pass %d, item 0x%p, item type %s "
+                 "item region count/total %d/%d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->tid,
+                 __entry->pass,
+                 (void *)__entry->item,
+                 __print_symbolic(__entry->type, XFS_LI_TYPE_DESC),
+                 __entry->count,
+                 __entry->total)
+)
+
+#define DEFINE_LOG_RECOVER_ITEM(name) \
+DEFINE_EVENT(xfs_log_recover_item_class, name, \
+       TP_PROTO(struct log *log, struct xlog_recover *trans, \
+               struct xlog_recover_item *item, int pass), \
+       TP_ARGS(log, trans, item, pass))
+
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add_cont);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_head);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover);
+
+DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
+       TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f),
+       TP_ARGS(log, buf_f),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(__int64_t, blkno)
+               __field(unsigned short, len)
+               __field(unsigned short, flags)
+               __field(unsigned short, size)
+               __field(unsigned int, map_size)
+       ),
+       TP_fast_assign(
+               __entry->dev = log->l_mp->m_super->s_dev;
+               __entry->blkno = buf_f->blf_blkno;
+               __entry->len = buf_f->blf_len;
+               __entry->flags = buf_f->blf_flags;
+               __entry->size = buf_f->blf_size;
+               __entry->map_size = buf_f->blf_map_size;
+       ),
+       TP_printk("dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, "
+                       "map_size %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->blkno,
+                 __entry->len,
+                 __entry->flags,
+                 __entry->size,
+                 __entry->map_size)
+)
+
+#define DEFINE_LOG_RECOVER_BUF_ITEM(name) \
+DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \
+       TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \
+       TP_ARGS(log, buf_f))
+
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf);
+
+DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
+       TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f),
+       TP_ARGS(log, in_f),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(unsigned short, size)
+               __field(int, fields)
+               __field(unsigned short, asize)
+               __field(unsigned short, dsize)
+               __field(__int64_t, blkno)
+               __field(int, len)
+               __field(int, boffset)
+       ),
+       TP_fast_assign(
+               __entry->dev = log->l_mp->m_super->s_dev;
+               __entry->ino = in_f->ilf_ino;
+               __entry->size = in_f->ilf_size;
+               __entry->fields = in_f->ilf_fields;
+               __entry->asize = in_f->ilf_asize;
+               __entry->dsize = in_f->ilf_dsize;
+               __entry->blkno = in_f->ilf_blkno;
+               __entry->len = in_f->ilf_len;
+               __entry->boffset = in_f->ilf_boffset;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, "
+                       "dsize %d, blkno 0x%llx, len %d, boffset %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->size,
+                 __entry->fields,
+                 __entry->asize,
+                 __entry->dsize,
+                 __entry->blkno,
+                 __entry->len,
+                 __entry->boffset)
+)
+#define DEFINE_LOG_RECOVER_INO_ITEM(name) \
+DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \
+       TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \
+       TP_ARGS(log, in_f))
+
+DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover);
+DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel);
+DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip);
+
+DECLARE_EVENT_CLASS(xfs_discard_class,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+                xfs_agblock_t agbno, xfs_extlen_t len),
+       TP_ARGS(mp, agno, agbno, len),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, agbno)
+               __field(xfs_extlen_t, len)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->agbno = agbno;
+               __entry->len = len;
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u len %u\n",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->agbno,
+                 __entry->len)
+)
+
+#define DEFINE_DISCARD_EVENT(name) \
+DEFINE_EVENT(xfs_discard_class, name, \
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+                xfs_agblock_t agbno, xfs_extlen_t len), \
+       TP_ARGS(mp, agno, agbno, len))
+DEFINE_DISCARD_EVENT(xfs_discard_extent);
+DEFINE_DISCARD_EVENT(xfs_discard_toosmall);
+DEFINE_DISCARD_EVENT(xfs_discard_exclude);
+DEFINE_DISCARD_EVENT(xfs_discard_busy);
+
+#endif /* _TRACE_XFS_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE xfs_trace
+#include <trace/define_trace.h>
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c

index 43233e92f0f688c9004db8c023ee09576f266667..c15aa29fa1696e33608601329be58770f9da78b0 100644 (file)
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -299,7 +299,7 @@ xfs_trans_ail_cursor_last(
   * Splice the log item list into the AIL at the given LSN. We splice to the
   * tail of the given LSN to maintain insert order for push traversals. The
   * cursor is optional, allowing repeated updates to the same LSN to avoid
- * repeated traversals.
+ * repeated traversals.  This should not be called with an empty list.
   */
  static void
  xfs_ail_splice(
@@ -308,50 +308,39 @@ xfs_ail_splice(
         struct list_head        *list,
         xfs_lsn_t               lsn)
  {
-       struct xfs_log_item     *lip = cur ? cur->item : NULL;
-       struct xfs_log_item     *next_lip;
+       struct xfs_log_item     *lip;
+
+       ASSERT(!list_empty(list));
  
         /*
-        * Get a new cursor if we don't have a placeholder or the existing one
-        * has been invalidated.
+        * Use the cursor to determine the insertion point if one is
+        * provided.  If not, or if the one we got is not valid,
+        * find the place in the AIL where the items belong.
          */
-       if (!lip || (__psint_t)lip & 1) {
+       lip = cur ? cur->item : NULL;
+       if (!lip || (__psint_t) lip & 1)
                 lip = __xfs_trans_ail_cursor_last(ailp, lsn);
  
-               if (!lip) {
-                       /* The list is empty, so just splice and return.  */
-                       if (cur)
-                               cur->item = NULL;
-                       list_splice(list, &ailp->xa_ail);
-                       return;
-               }
-       }
+       /*
+        * If a cursor is provided, we know we're processing the AIL
+        * in lsn order, and future items to be spliced in will
+        * follow the last one being inserted now.  Update the
+        * cursor to point to that last item, now while we have a
+        * reliable pointer to it.
+        */
+       if (cur)
+               cur->item = list_entry(list->prev, struct xfs_log_item, li_ail);
  
         /*
-        * Our cursor points to the item we want to insert _after_, so we have
-        * to update the cursor to point to the end of the list we are splicing
-        * in so that it points to the correct location for the next splice.
-        * i.e. before the splice
-        *
-        *  lsn -> lsn -> lsn + x -> lsn + x ...
-        *          ^
-        *          | cursor points here
-        *
-        * After the splice we have:
-        *
-        *  lsn -> lsn -> lsn -> lsn -> .... -> lsn -> lsn + x -> lsn + x ...
-        *          ^                            ^
-        *          | cursor points here         | needs to move here
-        *
-        * So we set the cursor to the last item in the list to be spliced
-        * before we execute the splice, resulting in the cursor pointing to
-        * the correct item after the splice occurs.
+        * Finally perform the splice.  Unless the AIL was empty,
+        * lip points to the item in the AIL _after_ which the new
+        * items should go.  If lip is null the AIL was empty, so
+        * the new items go at the head of the AIL.
          */
-       if (cur) {
-               next_lip = list_entry(list->prev, struct xfs_log_item, li_ail);
-               cur->item = next_lip;
-       }
-       list_splice(list, &lip->li_ail);
+       if (lip)
+               list_splice(list, &lip->li_ail);
+       else
+               list_splice(list, &ailp->xa_ail);
  }
  
  /*
@@ -682,6 +671,7 @@ xfs_trans_ail_update_bulk(
         int                     i;
         LIST_HEAD(tmp);
  
+       ASSERT(nr_items > 0);           /* Not required, but true. */
         mlip = xfs_ail_min(ailp);
  
         for (i = 0; i < nr_items; i++) {
@@ -701,7 +691,8 @@ xfs_trans_ail_update_bulk(
                 list_add(&lip->li_ail, &tmp);
         }
  
-       xfs_ail_splice(ailp, cur, &tmp, lsn);
+       if (!list_empty(&tmp))
+               xfs_ail_splice(ailp, cur, &tmp, lsn);
  
         if (!mlip_changed) {
                 spin_unlock(&ailp->xa_lock);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c

index 15584fc3ed7d5d87a1627e9156f7694c02f522c9..137e2b9e2948ae29c41eb07867c5ea813f19bdea 100644 (file)
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -54,7 +54,7 @@ xfs_trans_buf_item_match(
         list_for_each_entry(lidp, &tp->t_items, lid_trans) {
                 blip = (struct xfs_buf_log_item *)lidp->lid_item;
                 if (blip->bli_item.li_type == XFS_LI_BUF &&
-                   XFS_BUF_TARGET(blip->bli_buf) == target &&
+                   blip->bli_buf->b_target == target &&
                     XFS_BUF_ADDR(blip->bli_buf) == blkno &&
                     XFS_BUF_COUNT(blip->bli_buf) == len)
                         return blip->bli_buf;
@@ -80,7 +80,6 @@ _xfs_trans_bjoin(
  {
         struct xfs_buf_log_item *bip;
  
-       ASSERT(XFS_BUF_ISBUSY(bp));
         ASSERT(bp->b_transp == NULL);
  
         /*
@@ -194,7 +193,7 @@ xfs_trans_get_buf(xfs_trans_t       *tp,
                 return NULL;
         }
  
-       ASSERT(!XFS_BUF_GETERROR(bp));
+       ASSERT(!bp->b_error);
  
         _xfs_trans_bjoin(tp, bp, 1);
         trace_xfs_trans_get_buf(bp->b_fspriv);
@@ -293,10 +292,10 @@ xfs_trans_read_buf(
                         return (flags & XBF_TRYLOCK) ?
                                         EAGAIN : XFS_ERROR(ENOMEM);
  
-               if (XFS_BUF_GETERROR(bp) != 0) {
+               if (bp->b_error) {
+                       error = bp->b_error;
                         xfs_ioerror_alert("xfs_trans_read_buf", mp,
                                           bp, blkno);
-                       error = XFS_BUF_GETERROR(bp);
                         xfs_buf_relse(bp);
                         return error;
                 }
@@ -330,7 +329,7 @@ xfs_trans_read_buf(
                 ASSERT(xfs_buf_islocked(bp));
                 ASSERT(bp->b_transp == tp);
                 ASSERT(bp->b_fspriv != NULL);
-               ASSERT((XFS_BUF_ISERROR(bp)) == 0);
+               ASSERT(!bp->b_error);
                 if (!(XFS_BUF_ISDONE(bp))) {
                         trace_xfs_trans_read_buf_io(bp, _RET_IP_);
                         ASSERT(!XFS_BUF_ISASYNC(bp));
@@ -386,10 +385,9 @@ xfs_trans_read_buf(
                 return (flags & XBF_TRYLOCK) ?
                                         0 : XFS_ERROR(ENOMEM);
         }
-       if (XFS_BUF_GETERROR(bp) != 0) {
-           XFS_BUF_SUPER_STALE(bp);
-               error = XFS_BUF_GETERROR(bp);
-
+       if (bp->b_error) {
+               error = bp->b_error;
+               XFS_BUF_SUPER_STALE(bp);
                 xfs_ioerror_alert("xfs_trans_read_buf", mp,
                                   bp, blkno);
                 if (tp->t_flags & XFS_TRANS_DIRTY)
@@ -430,7 +428,7 @@ shutdown_abort:
         if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp))
                 xfs_notice(mp, "about to pop assert, bp == 0x%p", bp);
  #endif
-       ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) !=
+       ASSERT((bp->b_flags & (XBF_STALE|XBF_DELWRI)) !=
                                      (XBF_STALE|XBF_DELWRI));
  
         trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
@@ -581,7 +579,6 @@ xfs_trans_bhold(xfs_trans_t *tp,
  {
         xfs_buf_log_item_t      *bip = bp->b_fspriv;
  
-       ASSERT(XFS_BUF_ISBUSY(bp));
         ASSERT(bp->b_transp == tp);
         ASSERT(bip != NULL);
         ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
@@ -602,7 +599,6 @@ xfs_trans_bhold_release(xfs_trans_t *tp,
  {
         xfs_buf_log_item_t      *bip = bp->b_fspriv;
  
-       ASSERT(XFS_BUF_ISBUSY(bp));
         ASSERT(bp->b_transp == tp);
         ASSERT(bip != NULL);
         ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
@@ -631,7 +627,6 @@ xfs_trans_log_buf(xfs_trans_t       *tp,
  {
         xfs_buf_log_item_t      *bip = bp->b_fspriv;
  
-       ASSERT(XFS_BUF_ISBUSY(bp));
         ASSERT(bp->b_transp == tp);
         ASSERT(bip != NULL);
         ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp)));
@@ -702,7 +697,6 @@ xfs_trans_binval(
  {
         xfs_buf_log_item_t      *bip = bp->b_fspriv;
  
-       ASSERT(XFS_BUF_ISBUSY(bp));
         ASSERT(bp->b_transp == tp);
         ASSERT(bip != NULL);
         ASSERT(atomic_read(&bip->bli_refcount) > 0);
@@ -774,7 +768,6 @@ xfs_trans_inode_buf(
  {
         xfs_buf_log_item_t      *bip = bp->b_fspriv;
  
-       ASSERT(XFS_BUF_ISBUSY(bp));
         ASSERT(bp->b_transp == tp);
         ASSERT(bip != NULL);
         ASSERT(atomic_read(&bip->bli_refcount) > 0);
@@ -798,7 +791,6 @@ xfs_trans_stale_inode_buf(
  {
         xfs_buf_log_item_t      *bip = bp->b_fspriv;
  
-       ASSERT(XFS_BUF_ISBUSY(bp));
         ASSERT(bp->b_transp == tp);
         ASSERT(bip != NULL);
         ASSERT(atomic_read(&bip->bli_refcount) > 0);
@@ -823,7 +815,6 @@ xfs_trans_inode_alloc_buf(
  {
         xfs_buf_log_item_t      *bip = bp->b_fspriv;
  
-       ASSERT(XFS_BUF_ISBUSY(bp));
         ASSERT(bp->b_transp == tp);
         ASSERT(bip != NULL);
         ASSERT(atomic_read(&bip->bli_refcount) > 0);
@@ -851,7 +842,6 @@ xfs_trans_dquot_buf(
  {
         xfs_buf_log_item_t      *bip = bp->b_fspriv;
  
-       ASSERT(XFS_BUF_ISBUSY(bp));
         ASSERT(bp->b_transp == tp);
         ASSERT(bip != NULL);
         ASSERT(type == XFS_BLF_UDQUOT_BUF ||
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c

new file mode 100644 (file)

index 0000000..4d00ee6
--- /dev/null
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -0,0 +1,890 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_alloc.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_itable.h"
+#include "xfs_bmap.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_priv.h"
+#include "xfs_qm.h"
+
+STATIC void    xfs_trans_alloc_dqinfo(xfs_trans_t *);
+
+/*
+ * Add the locked dquot to the transaction.
+ * The dquot must be locked, and it cannot be associated with any
+ * transaction.
+ */
+void
+xfs_trans_dqjoin(
+       xfs_trans_t     *tp,
+       xfs_dquot_t     *dqp)
+{
+       ASSERT(dqp->q_transp != tp);
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+       ASSERT(dqp->q_logitem.qli_dquot == dqp);
+
+       /*
+        * Get a log_item_desc to point at the new item.
+        */
+       xfs_trans_add_item(tp, &dqp->q_logitem.qli_item);
+
+       /*
+        * Initialize d_transp so we can later determine if this dquot is
+        * associated with this transaction.
+        */
+       dqp->q_transp = tp;
+}
+
+
+/*
+ * This is called to mark the dquot as needing
+ * to be logged when the transaction is committed.  The dquot must
+ * already be associated with the given transaction.
+ * Note that it marks the entire transaction as dirty. In the ordinary
+ * case, this gets called via xfs_trans_commit, after the transaction
+ * is already dirty. However, there's nothing stop this from getting
+ * called directly, as done by xfs_qm_scall_setqlim. Hence, the TRANS_DIRTY
+ * flag.
+ */
+void
+xfs_trans_log_dquot(
+       xfs_trans_t     *tp,
+       xfs_dquot_t     *dqp)
+{
+       ASSERT(dqp->q_transp == tp);
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       dqp->q_logitem.qli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+}
+
+/*
+ * Carry forward whatever is left of the quota blk reservation to
+ * the spanky new transaction
+ */
+void
+xfs_trans_dup_dqinfo(
+       xfs_trans_t     *otp,
+       xfs_trans_t     *ntp)
+{
+       xfs_dqtrx_t     *oq, *nq;
+       int             i,j;
+       xfs_dqtrx_t     *oqa, *nqa;
+
+       if (!otp->t_dqinfo)
+               return;
+
+       xfs_trans_alloc_dqinfo(ntp);
+       oqa = otp->t_dqinfo->dqa_usrdquots;
+       nqa = ntp->t_dqinfo->dqa_usrdquots;
+
+       /*
+        * Because the quota blk reservation is carried forward,
+        * it is also necessary to carry forward the DQ_DIRTY flag.
+        */
+       if(otp->t_flags & XFS_TRANS_DQ_DIRTY)
+               ntp->t_flags |= XFS_TRANS_DQ_DIRTY;
+
+       for (j = 0; j < 2; j++) {
+               for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+                       if (oqa[i].qt_dquot == NULL)
+                               break;
+                       oq = &oqa[i];
+                       nq = &nqa[i];
+
+                       nq->qt_dquot = oq->qt_dquot;
+                       nq->qt_bcount_delta = nq->qt_icount_delta = 0;
+                       nq->qt_rtbcount_delta = 0;
+
+                       /*
+                        * Transfer whatever is left of the reservations.
+                        */
+                       nq->qt_blk_res = oq->qt_blk_res - oq->qt_blk_res_used;
+                       oq->qt_blk_res = oq->qt_blk_res_used;
+
+                       nq->qt_rtblk_res = oq->qt_rtblk_res -
+                               oq->qt_rtblk_res_used;
+                       oq->qt_rtblk_res = oq->qt_rtblk_res_used;
+
+                       nq->qt_ino_res = oq->qt_ino_res - oq->qt_ino_res_used;
+                       oq->qt_ino_res = oq->qt_ino_res_used;
+
+               }
+               oqa = otp->t_dqinfo->dqa_grpdquots;
+               nqa = ntp->t_dqinfo->dqa_grpdquots;
+       }
+}
+
+/*
+ * Wrap around mod_dquot to account for both user and group quotas.
+ */
+void
+xfs_trans_mod_dquot_byino(
+       xfs_trans_t     *tp,
+       xfs_inode_t     *ip,
+       uint            field,
+       long            delta)
+{
+       xfs_mount_t     *mp = tp->t_mountp;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) ||
+           !XFS_IS_QUOTA_ON(mp) ||
+           ip->i_ino == mp->m_sb.sb_uquotino ||
+           ip->i_ino == mp->m_sb.sb_gquotino)
+               return;
+
+       if (tp->t_dqinfo == NULL)
+               xfs_trans_alloc_dqinfo(tp);
+
+       if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot)
+               (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta);
+       if (XFS_IS_OQUOTA_ON(mp) && ip->i_gdquot)
+               (void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta);
+}
+
+STATIC xfs_dqtrx_t *
+xfs_trans_get_dqtrx(
+       xfs_trans_t     *tp,
+       xfs_dquot_t     *dqp)
+{
+       int             i;
+       xfs_dqtrx_t     *qa;
+
+       qa = XFS_QM_ISUDQ(dqp) ?
+               tp->t_dqinfo->dqa_usrdquots : tp->t_dqinfo->dqa_grpdquots;
+
+       for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+               if (qa[i].qt_dquot == NULL ||
+                   qa[i].qt_dquot == dqp)
+                       return &qa[i];
+       }
+
+       return NULL;
+}
+
+/*
+ * Make the changes in the transaction structure.
+ * The moral equivalent to xfs_trans_mod_sb().
+ * We don't touch any fields in the dquot, so we don't care
+ * if it's locked or not (most of the time it won't be).
+ */
+void
+xfs_trans_mod_dquot(
+       xfs_trans_t     *tp,
+       xfs_dquot_t     *dqp,
+       uint            field,
+       long            delta)
+{
+       xfs_dqtrx_t     *qtrx;
+
+       ASSERT(tp);
+       ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
+       qtrx = NULL;
+
+       if (tp->t_dqinfo == NULL)
+               xfs_trans_alloc_dqinfo(tp);
+       /*
+        * Find either the first free slot or the slot that belongs
+        * to this dquot.
+        */
+       qtrx = xfs_trans_get_dqtrx(tp, dqp);
+       ASSERT(qtrx);
+       if (qtrx->qt_dquot == NULL)
+               qtrx->qt_dquot = dqp;
+
+       switch (field) {
+
+               /*
+                * regular disk blk reservation
+                */
+             case XFS_TRANS_DQ_RES_BLKS:
+               qtrx->qt_blk_res += (ulong)delta;
+               break;
+
+               /*
+                * inode reservation
+                */
+             case XFS_TRANS_DQ_RES_INOS:
+               qtrx->qt_ino_res += (ulong)delta;
+               break;
+
+               /*
+                * disk blocks used.
+                */
+             case XFS_TRANS_DQ_BCOUNT:
+               if (qtrx->qt_blk_res && delta > 0) {
+                       qtrx->qt_blk_res_used += (ulong)delta;
+                       ASSERT(qtrx->qt_blk_res >= qtrx->qt_blk_res_used);
+               }
+               qtrx->qt_bcount_delta += delta;
+               break;
+
+             case XFS_TRANS_DQ_DELBCOUNT:
+               qtrx->qt_delbcnt_delta += delta;
+               break;
+
+               /*
+                * Inode Count
+                */
+             case XFS_TRANS_DQ_ICOUNT:
+               if (qtrx->qt_ino_res && delta > 0) {
+                       qtrx->qt_ino_res_used += (ulong)delta;
+                       ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used);
+               }
+               qtrx->qt_icount_delta += delta;
+               break;
+
+               /*
+                * rtblk reservation
+                */
+             case XFS_TRANS_DQ_RES_RTBLKS:
+               qtrx->qt_rtblk_res += (ulong)delta;
+               break;
+
+               /*
+                * rtblk count
+                */
+             case XFS_TRANS_DQ_RTBCOUNT:
+               if (qtrx->qt_rtblk_res && delta > 0) {
+                       qtrx->qt_rtblk_res_used += (ulong)delta;
+                       ASSERT(qtrx->qt_rtblk_res >= qtrx->qt_rtblk_res_used);
+               }
+               qtrx->qt_rtbcount_delta += delta;
+               break;
+
+             case XFS_TRANS_DQ_DELRTBCOUNT:
+               qtrx->qt_delrtb_delta += delta;
+               break;
+
+             default:
+               ASSERT(0);
+       }
+       tp->t_flags |= XFS_TRANS_DQ_DIRTY;
+}
+
+
+/*
+ * Given an array of dqtrx structures, lock all the dquots associated
+ * and join them to the transaction, provided they have been modified.
+ * We know that the highest number of dquots (of one type - usr OR grp),
+ * involved in a transaction is 2 and that both usr and grp combined - 3.
+ * So, we don't attempt to make this very generic.
+ */
+STATIC void
+xfs_trans_dqlockedjoin(
+       xfs_trans_t     *tp,
+       xfs_dqtrx_t     *q)
+{
+       ASSERT(q[0].qt_dquot != NULL);
+       if (q[1].qt_dquot == NULL) {
+               xfs_dqlock(q[0].qt_dquot);
+               xfs_trans_dqjoin(tp, q[0].qt_dquot);
+       } else {
+               ASSERT(XFS_QM_TRANS_MAXDQS == 2);
+               xfs_dqlock2(q[0].qt_dquot, q[1].qt_dquot);
+               xfs_trans_dqjoin(tp, q[0].qt_dquot);
+               xfs_trans_dqjoin(tp, q[1].qt_dquot);
+       }
+}
+
+
+/*
+ * Called by xfs_trans_commit() and similar in spirit to
+ * xfs_trans_apply_sb_deltas().
+ * Go thru all the dquots belonging to this transaction and modify the
+ * INCORE dquot to reflect the actual usages.
+ * Unreserve just the reservations done by this transaction.
+ * dquot is still left locked at exit.
+ */
+void
+xfs_trans_apply_dquot_deltas(
+       xfs_trans_t             *tp)
+{
+       int                     i, j;
+       xfs_dquot_t             *dqp;
+       xfs_dqtrx_t             *qtrx, *qa;
+       xfs_disk_dquot_t        *d;
+       long                    totalbdelta;
+       long                    totalrtbdelta;
+
+       if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY))
+               return;
+
+       ASSERT(tp->t_dqinfo);
+       qa = tp->t_dqinfo->dqa_usrdquots;
+       for (j = 0; j < 2; j++) {
+               if (qa[0].qt_dquot == NULL) {
+                       qa = tp->t_dqinfo->dqa_grpdquots;
+                       continue;
+               }
+
+               /*
+                * Lock all of the dquots and join them to the transaction.
+                */
+               xfs_trans_dqlockedjoin(tp, qa);
+
+               for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+                       qtrx = &qa[i];
+                       /*
+                        * The array of dquots is filled
+                        * sequentially, not sparsely.
+                        */
+                       if ((dqp = qtrx->qt_dquot) == NULL)
+                               break;
+
+                       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+                       ASSERT(dqp->q_transp == tp);
+
+                       /*
+                        * adjust the actual number of blocks used
+                        */
+                       d = &dqp->q_core;
+
+                       /*
+                        * The issue here is - sometimes we don't make a blkquota
+                        * reservation intentionally to be fair to users
+                        * (when the amount is small). On the other hand,
+                        * delayed allocs do make reservations, but that's
+                        * outside of a transaction, so we have no
+                        * idea how much was really reserved.
+                        * So, here we've accumulated delayed allocation blks and
+                        * non-delay blks. The assumption is that the
+                        * delayed ones are always reserved (outside of a
+                        * transaction), and the others may or may not have
+                        * quota reservations.
+                        */
+                       totalbdelta = qtrx->qt_bcount_delta +
+                               qtrx->qt_delbcnt_delta;
+                       totalrtbdelta = qtrx->qt_rtbcount_delta +
+                               qtrx->qt_delrtb_delta;
+#ifdef DEBUG
+                       if (totalbdelta < 0)
+                               ASSERT(be64_to_cpu(d->d_bcount) >=
+                                      -totalbdelta);
+
+                       if (totalrtbdelta < 0)
+                               ASSERT(be64_to_cpu(d->d_rtbcount) >=
+                                      -totalrtbdelta);
+
+                       if (qtrx->qt_icount_delta < 0)
+                               ASSERT(be64_to_cpu(d->d_icount) >=
+                                      -qtrx->qt_icount_delta);
+#endif
+                       if (totalbdelta)
+                               be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta);
+
+                       if (qtrx->qt_icount_delta)
+                               be64_add_cpu(&d->d_icount, (xfs_qcnt_t)qtrx->qt_icount_delta);
+
+                       if (totalrtbdelta)
+                               be64_add_cpu(&d->d_rtbcount, (xfs_qcnt_t)totalrtbdelta);
+
+                       /*
+                        * Get any default limits in use.
+                        * Start/reset the timer(s) if needed.
+                        */
+                       if (d->d_id) {
+                               xfs_qm_adjust_dqlimits(tp->t_mountp, d);
+                               xfs_qm_adjust_dqtimers(tp->t_mountp, d);
+                       }
+
+                       dqp->dq_flags |= XFS_DQ_DIRTY;
+                       /*
+                        * add this to the list of items to get logged
+                        */
+                       xfs_trans_log_dquot(tp, dqp);
+                       /*
+                        * Take off what's left of the original reservation.
+                        * In case of delayed allocations, there's no
+                        * reservation that a transaction structure knows of.
+                        */
+                       if (qtrx->qt_blk_res != 0) {
+                               if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) {
+                                       if (qtrx->qt_blk_res >
+                                           qtrx->qt_blk_res_used)
+                                               dqp->q_res_bcount -= (xfs_qcnt_t)
+                                                       (qtrx->qt_blk_res -
+                                                        qtrx->qt_blk_res_used);
+                                       else
+                                               dqp->q_res_bcount -= (xfs_qcnt_t)
+                                                       (qtrx->qt_blk_res_used -
+                                                        qtrx->qt_blk_res);
+                               }
+                       } else {
+                               /*
+                                * These blks were never reserved, either inside
+                                * a transaction or outside one (in a delayed
+                                * allocation). Also, this isn't always a
+                                * negative number since we sometimes
+                                * deliberately skip quota reservations.
+                                */
+                               if (qtrx->qt_bcount_delta) {
+                                       dqp->q_res_bcount +=
+                                             (xfs_qcnt_t)qtrx->qt_bcount_delta;
+                               }
+                       }
+                       /*
+                        * Adjust the RT reservation.
+                        */
+                       if (qtrx->qt_rtblk_res != 0) {
+                               if (qtrx->qt_rtblk_res != qtrx->qt_rtblk_res_used) {
+                                       if (qtrx->qt_rtblk_res >
+                                           qtrx->qt_rtblk_res_used)
+                                              dqp->q_res_rtbcount -= (xfs_qcnt_t)
+                                                      (qtrx->qt_rtblk_res -
+                                                       qtrx->qt_rtblk_res_used);
+                                       else
+                                              dqp->q_res_rtbcount -= (xfs_qcnt_t)
+                                                      (qtrx->qt_rtblk_res_used -
+                                                       qtrx->qt_rtblk_res);
+                               }
+                       } else {
+                               if (qtrx->qt_rtbcount_delta)
+                                       dqp->q_res_rtbcount +=
+                                           (xfs_qcnt_t)qtrx->qt_rtbcount_delta;
+                       }
+
+                       /*
+                        * Adjust the inode reservation.
+                        */
+                       if (qtrx->qt_ino_res != 0) {
+                               ASSERT(qtrx->qt_ino_res >=
+                                      qtrx->qt_ino_res_used);
+                               if (qtrx->qt_ino_res > qtrx->qt_ino_res_used)
+                                       dqp->q_res_icount -= (xfs_qcnt_t)
+                                               (qtrx->qt_ino_res -
+                                                qtrx->qt_ino_res_used);
+                       } else {
+                               if (qtrx->qt_icount_delta)
+                                       dqp->q_res_icount +=
+                                           (xfs_qcnt_t)qtrx->qt_icount_delta;
+                       }
+
+                       ASSERT(dqp->q_res_bcount >=
+                               be64_to_cpu(dqp->q_core.d_bcount));
+                       ASSERT(dqp->q_res_icount >=
+                               be64_to_cpu(dqp->q_core.d_icount));
+                       ASSERT(dqp->q_res_rtbcount >=
+                               be64_to_cpu(dqp->q_core.d_rtbcount));
+               }
+               /*
+                * Do the group quotas next
+                */
+               qa = tp->t_dqinfo->dqa_grpdquots;
+       }
+}
+
+/*
+ * Release the reservations, and adjust the dquots accordingly.
+ * This is called only when the transaction is being aborted. If by
+ * any chance we have done dquot modifications incore (ie. deltas) already,
+ * we simply throw those away, since that's the expected behavior
+ * when a transaction is curtailed without a commit.
+ */
+void
+xfs_trans_unreserve_and_mod_dquots(
+       xfs_trans_t             *tp)
+{
+       int                     i, j;
+       xfs_dquot_t             *dqp;
+       xfs_dqtrx_t             *qtrx, *qa;
+       boolean_t               locked;
+
+       if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))
+               return;
+
+       qa = tp->t_dqinfo->dqa_usrdquots;
+
+       for (j = 0; j < 2; j++) {
+               for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
+                       qtrx = &qa[i];
+                       /*
+                        * We assume that the array of dquots is filled
+                        * sequentially, not sparsely.
+                        */
+                       if ((dqp = qtrx->qt_dquot) == NULL)
+                               break;
+                       /*
+                        * Unreserve the original reservation. We don't care
+                        * about the number of blocks used field, or deltas.
+                        * Also we don't bother to zero the fields.
+                        */
+                       locked = B_FALSE;
+                       if (qtrx->qt_blk_res) {
+                               xfs_dqlock(dqp);
+                               locked = B_TRUE;
+                               dqp->q_res_bcount -=
+                                       (xfs_qcnt_t)qtrx->qt_blk_res;
+                       }
+                       if (qtrx->qt_ino_res) {
+                               if (!locked) {
+                                       xfs_dqlock(dqp);
+                                       locked = B_TRUE;
+                               }
+                               dqp->q_res_icount -=
+                                       (xfs_qcnt_t)qtrx->qt_ino_res;
+                       }
+
+                       if (qtrx->qt_rtblk_res) {
+                               if (!locked) {
+                                       xfs_dqlock(dqp);
+                                       locked = B_TRUE;
+                               }
+                               dqp->q_res_rtbcount -=
+                                       (xfs_qcnt_t)qtrx->qt_rtblk_res;
+                       }
+                       if (locked)
+                               xfs_dqunlock(dqp);
+
+               }
+               qa = tp->t_dqinfo->dqa_grpdquots;
+       }
+}
+
+STATIC void
+xfs_quota_warn(
+       struct xfs_mount        *mp,
+       struct xfs_dquot        *dqp,
+       int                     type)
+{
+       /* no warnings for project quotas - we just return ENOSPC later */
+       if (dqp->dq_flags & XFS_DQ_PROJ)
+               return;
+       quota_send_warning((dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA,
+                          be32_to_cpu(dqp->q_core.d_id), mp->m_super->s_dev,
+                          type);
+}
+
+/*
+ * This reserves disk blocks and inodes against a dquot.
+ * Flags indicate if the dquot is to be locked here and also
+ * if the blk reservation is for RT or regular blocks.
+ * Sending in XFS_QMOPT_FORCE_RES flag skips the quota check.
+ */
+STATIC int
+xfs_trans_dqresv(
+       xfs_trans_t     *tp,
+       xfs_mount_t     *mp,
+       xfs_dquot_t     *dqp,
+       long            nblks,
+       long            ninos,
+       uint            flags)
+{
+       xfs_qcnt_t      hardlimit;
+       xfs_qcnt_t      softlimit;
+       time_t          timer;
+       xfs_qwarncnt_t  warns;
+       xfs_qwarncnt_t  warnlimit;
+       xfs_qcnt_t      count;
+       xfs_qcnt_t      *resbcountp;
+       xfs_quotainfo_t *q = mp->m_quotainfo;
+
+
+       xfs_dqlock(dqp);
+
+       if (flags & XFS_TRANS_DQ_RES_BLKS) {
+               hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
+               if (!hardlimit)
+                       hardlimit = q->qi_bhardlimit;
+               softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit);
+               if (!softlimit)
+                       softlimit = q->qi_bsoftlimit;
+               timer = be32_to_cpu(dqp->q_core.d_btimer);
+               warns = be16_to_cpu(dqp->q_core.d_bwarns);
+               warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit;
+               resbcountp = &dqp->q_res_bcount;
+       } else {
+               ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
+               hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit);
+               if (!hardlimit)
+                       hardlimit = q->qi_rtbhardlimit;
+               softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit);
+               if (!softlimit)
+                       softlimit = q->qi_rtbsoftlimit;
+               timer = be32_to_cpu(dqp->q_core.d_rtbtimer);
+               warns = be16_to_cpu(dqp->q_core.d_rtbwarns);
+               warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit;
+               resbcountp = &dqp->q_res_rtbcount;
+       }
+
+       if ((flags & XFS_QMOPT_FORCE_RES) == 0 &&
+           dqp->q_core.d_id &&
+           ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) ||
+            (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) &&
+             (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) {
+               if (nblks > 0) {
+                       /*
+                        * dquot is locked already. See if we'd go over the
+                        * hardlimit or exceed the timelimit if we allocate
+                        * nblks.
+                        */
+                       if (hardlimit > 0ULL &&
+                           hardlimit <= nblks + *resbcountp) {
+                               xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
+                               goto error_return;
+                       }
+                       if (softlimit > 0ULL &&
+                           softlimit <= nblks + *resbcountp) {
+                               if ((timer != 0 && get_seconds() > timer) ||
+                                   (warns != 0 && warns >= warnlimit)) {
+                                       xfs_quota_warn(mp, dqp,
+                                                      QUOTA_NL_BSOFTLONGWARN);
+                                       goto error_return;
+                               }
+
+                               xfs_quota_warn(mp, dqp, QUOTA_NL_BSOFTWARN);
+                       }
+               }
+               if (ninos > 0) {
+                       count = be64_to_cpu(dqp->q_core.d_icount);
+                       timer = be32_to_cpu(dqp->q_core.d_itimer);
+                       warns = be16_to_cpu(dqp->q_core.d_iwarns);
+                       warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
+                       hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
+                       if (!hardlimit)
+                               hardlimit = q->qi_ihardlimit;
+                       softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
+                       if (!softlimit)
+                               softlimit = q->qi_isoftlimit;
+
+                       if (hardlimit > 0ULL && count >= hardlimit) {
+                               xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
+                               goto error_return;
+                       }
+                       if (softlimit > 0ULL && count >= softlimit) {
+                               if  ((timer != 0 && get_seconds() > timer) ||
+                                    (warns != 0 && warns >= warnlimit)) {
+                                       xfs_quota_warn(mp, dqp,
+                                                      QUOTA_NL_ISOFTLONGWARN);
+                                       goto error_return;
+                               }
+                               xfs_quota_warn(mp, dqp, QUOTA_NL_ISOFTWARN);
+                       }
+               }
+       }
+
+       /*
+        * Change the reservation, but not the actual usage.
+        * Note that q_res_bcount = q_core.d_bcount + resv
+        */
+       (*resbcountp) += (xfs_qcnt_t)nblks;
+       if (ninos != 0)
+               dqp->q_res_icount += (xfs_qcnt_t)ninos;
+
+       /*
+        * note the reservation amt in the trans struct too,
+        * so that the transaction knows how much was reserved by
+        * it against this particular dquot.
+        * We don't do this when we are reserving for a delayed allocation,
+        * because we don't have the luxury of a transaction envelope then.
+        */
+       if (tp) {
+               ASSERT(tp->t_dqinfo);
+               ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
+               if (nblks != 0)
+                       xfs_trans_mod_dquot(tp, dqp,
+                                           flags & XFS_QMOPT_RESBLK_MASK,
+                                           nblks);
+               if (ninos != 0)
+                       xfs_trans_mod_dquot(tp, dqp,
+                                           XFS_TRANS_DQ_RES_INOS,
+                                           ninos);
+       }
+       ASSERT(dqp->q_res_bcount >= be64_to_cpu(dqp->q_core.d_bcount));
+       ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount));
+       ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount));
+
+       xfs_dqunlock(dqp);
+       return 0;
+
+error_return:
+       xfs_dqunlock(dqp);
+       if (flags & XFS_QMOPT_ENOSPC)
+               return ENOSPC;
+       return EDQUOT;
+}
+
+
+/*
+ * Given dquot(s), make disk block and/or inode reservations against them.
+ * The fact that this does the reservation against both the usr and
+ * grp/prj quotas is important, because this follows a both-or-nothing
+ * approach.
+ *
+ * flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown.
+ *        XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT.  Used by pquota.
+ *        XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks
+ *        XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks
+ * dquots are unlocked on return, if they were not locked by caller.
+ */
+int
+xfs_trans_reserve_quota_bydquots(
+       xfs_trans_t     *tp,
+       xfs_mount_t     *mp,
+       xfs_dquot_t     *udqp,
+       xfs_dquot_t     *gdqp,
+       long            nblks,
+       long            ninos,
+       uint            flags)
+{
+       int             resvd = 0, error;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+               return 0;
+
+       if (tp && tp->t_dqinfo == NULL)
+               xfs_trans_alloc_dqinfo(tp);
+
+       ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
+
+       if (udqp) {
+               error = xfs_trans_dqresv(tp, mp, udqp, nblks, ninos,
+                                       (flags & ~XFS_QMOPT_ENOSPC));
+               if (error)
+                       return error;
+               resvd = 1;
+       }
+
+       if (gdqp) {
+               error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags);
+               if (error) {
+                       /*
+                        * can't do it, so backout previous reservation
+                        */
+                       if (resvd) {
+                               flags |= XFS_QMOPT_FORCE_RES;
+                               xfs_trans_dqresv(tp, mp, udqp,
+                                                -nblks, -ninos, flags);
+                       }
+                       return error;
+               }
+       }
+
+       /*
+        * Didn't change anything critical, so, no need to log
+        */
+       return 0;
+}
+
+
+/*
+ * Lock the dquot and change the reservation if we can.
+ * This doesn't change the actual usage, just the reservation.
+ * The inode sent in is locked.
+ */
+int
+xfs_trans_reserve_quota_nblks(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip,
+       long                    nblks,
+       long                    ninos,
+       uint                    flags)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+
+       if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
+               return 0;
+       if (XFS_IS_PQUOTA_ON(mp))
+               flags |= XFS_QMOPT_ENOSPC;
+
+       ASSERT(ip->i_ino != mp->m_sb.sb_uquotino);
+       ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
+
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
+                               XFS_TRANS_DQ_RES_RTBLKS ||
+              (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
+                               XFS_TRANS_DQ_RES_BLKS);
+
+       /*
+        * Reserve nblks against these dquots, with trans as the mediator.
+        */
+       return xfs_trans_reserve_quota_bydquots(tp, mp,
+                                               ip->i_udquot, ip->i_gdquot,
+                                               nblks, ninos, flags);
+}
+
+/*
+ * This routine is called to allocate a quotaoff log item.
+ */
+xfs_qoff_logitem_t *
+xfs_trans_get_qoff_item(
+       xfs_trans_t             *tp,
+       xfs_qoff_logitem_t      *startqoff,
+       uint                    flags)
+{
+       xfs_qoff_logitem_t      *q;
+
+       ASSERT(tp != NULL);
+
+       q = xfs_qm_qoff_logitem_init(tp->t_mountp, startqoff, flags);
+       ASSERT(q != NULL);
+
+       /*
+        * Get a log_item_desc to point at the new item.
+        */
+       xfs_trans_add_item(tp, &q->qql_item);
+       return q;
+}
+
+
+/*
+ * This is called to mark the quotaoff logitem as needing
+ * to be logged when the transaction is committed.  The logitem must
+ * already be associated with the given transaction.
+ */
+void
+xfs_trans_log_quotaoff_item(
+       xfs_trans_t             *tp,
+       xfs_qoff_logitem_t      *qlp)
+{
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       qlp->qql_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+}
+
+STATIC void
+xfs_trans_alloc_dqinfo(
+       xfs_trans_t     *tp)
+{
+       tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
+}
+
+void
+xfs_trans_free_dqinfo(
+       xfs_trans_t     *tp)
+{
+       if (!tp->t_dqinfo)
+               return;
+       kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo);
+       tp->t_dqinfo = NULL;
+}
diff --git a/fs/xfs/xfs_vnode.h b/fs/xfs/xfs_vnode.h

new file mode 100644 (file)

index 0000000..7c220b4
--- /dev/null
+++ b/fs/xfs/xfs_vnode.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_VNODE_H__
+#define __XFS_VNODE_H__
+
+#include "xfs_fs.h"
+
+struct file;
+struct xfs_inode;
+struct xfs_iomap;
+struct attrlist_cursor_kern;
+
+/*
+ * Return values for xfs_inactive.  A return value of
+ * VN_INACTIVE_NOCACHE implies that the file system behavior
+ * has disassociated its state and bhv_desc_t from the vnode.
+ */
+#define        VN_INACTIVE_CACHE       0
+#define        VN_INACTIVE_NOCACHE     1
+
+/*
+ * Flags for read/write calls - same values as IRIX
+ */
+#define IO_ISDIRECT    0x00004         /* bypass page cache */
+#define IO_INVIS       0x00020         /* don't update inode timestamps */
+
+#define XFS_IO_FLAGS \
+       { IO_ISDIRECT,  "DIRECT" }, \
+       { IO_INVIS,     "INVIS"}
+
+/*
+ * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
+ */
+#define FI_NONE                        0       /* none */
+#define FI_REMAPF              1       /* Do a remapf prior to the operation */
+#define FI_REMAPF_LOCKED       2       /* Do a remapf prior to the operation.
+                                          Prevent VM access to the pages until
+                                          the operation completes. */
+
+/*
+ * Some useful predicates.
+ */
+#define VN_MAPPED(vp)  mapping_mapped(vp->i_mapping)
+#define VN_CACHED(vp)  (vp->i_mapping->nrpages)
+#define VN_DIRTY(vp)   mapping_tagged(vp->i_mapping, \
+                                       PAGECACHE_TAG_DIRTY)
+
+
+#endif /* __XFS_VNODE_H__ */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c

index 9322e13f0c637724c10a6491e2b2841534817c87..51fc429527bc0cc31839888c8e9597de551abac5 100644 (file)
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -83,7 +83,9 @@ xfs_readlink_bmap(
  
                 bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt),
                                   XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK);
-               error = XFS_BUF_GETERROR(bp);
+               if (!bp)
+                       return XFS_ERROR(ENOMEM);
+               error = bp->b_error;
                 if (error) {
                         xfs_ioerror_alert("xfs_readlink",
                                   ip->i_mount, bp, XFS_BUF_ADDR(bp));
@@ -94,7 +96,7 @@ xfs_readlink_bmap(
                         byte_cnt = pathlen;
                 pathlen -= byte_cnt;
  
-               memcpy(link, XFS_BUF_PTR(bp), byte_cnt);
+               memcpy(link, bp->b_addr, byte_cnt);
                 xfs_buf_relse(bp);
         }
  
@@ -1648,13 +1650,13 @@ xfs_symlink(
                         byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
                         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
                                                BTOBB(byte_cnt), 0);
-                       ASSERT(bp && !XFS_BUF_GETERROR(bp));
+                       ASSERT(!xfs_buf_geterror(bp));
                         if (pathlen < byte_cnt) {
                                 byte_cnt = pathlen;
                         }
                         pathlen -= byte_cnt;
  
-                       memcpy(XFS_BUF_PTR(bp), cur_chunk, byte_cnt);
+                       memcpy(bp->b_addr, cur_chunk, byte_cnt);
                         cur_chunk += byte_cnt;
  
                         xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1);
@@ -1999,7 +2001,7 @@ xfs_zero_remaining_bytes(
                                           mp, bp, XFS_BUF_ADDR(bp));
                         break;
                 }
-               memset(XFS_BUF_PTR(bp) +
+               memset(bp->b_addr +
                         (offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
                       0, lastoffset - offset + 1);
                 XFS_BUF_UNDONE(bp);
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c

new file mode 100644 (file)

index 0000000..87d3e03
--- /dev/null
+++ b/fs/xfs/xfs_xattr.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright (C) 2008 Christoph Hellwig.
+ * Portions Copyright (C) 2000-2008 Silicon Graphics, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "xfs.h"
+#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_attr.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_acl.h"
+#include "xfs_vnodeops.h"
+
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+
+
+static int
+xfs_xattr_get(struct dentry *dentry, const char *name,
+               void *value, size_t size, int xflags)
+{
+       struct xfs_inode *ip = XFS_I(dentry->d_inode);
+       int error, asize = size;
+
+       if (strcmp(name, "") == 0)
+               return -EINVAL;
+
+       /* Convert Linux syscall to XFS internal ATTR flags */
+       if (!size) {
+               xflags |= ATTR_KERNOVAL;
+               value = NULL;
+       }
+
+       error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags);
+       if (error)
+               return error;
+       return asize;
+}
+
+static int
+xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
+               size_t size, int flags, int xflags)
+{
+       struct xfs_inode *ip = XFS_I(dentry->d_inode);
+
+       if (strcmp(name, "") == 0)
+               return -EINVAL;
+
+       /* Convert Linux syscall to XFS internal ATTR flags */
+       if (flags & XATTR_CREATE)
+               xflags |= ATTR_CREATE;
+       if (flags & XATTR_REPLACE)
+               xflags |= ATTR_REPLACE;
+
+       if (!value)
+               return -xfs_attr_remove(ip, (unsigned char *)name, xflags);
+       return -xfs_attr_set(ip, (unsigned char *)name,
+                               (void *)value, size, xflags);
+}
+
+static const struct xattr_handler xfs_xattr_user_handler = {
+       .prefix = XATTR_USER_PREFIX,
+       .flags  = 0, /* no flags implies user namespace */
+       .get    = xfs_xattr_get,
+       .set    = xfs_xattr_set,
+};
+
+static const struct xattr_handler xfs_xattr_trusted_handler = {
+       .prefix = XATTR_TRUSTED_PREFIX,
+       .flags  = ATTR_ROOT,
+       .get    = xfs_xattr_get,
+       .set    = xfs_xattr_set,
+};
+
+static const struct xattr_handler xfs_xattr_security_handler = {
+       .prefix = XATTR_SECURITY_PREFIX,
+       .flags  = ATTR_SECURE,
+       .get    = xfs_xattr_get,
+       .set    = xfs_xattr_set,
+};
+
+const struct xattr_handler *xfs_xattr_handlers[] = {
+       &xfs_xattr_user_handler,
+       &xfs_xattr_trusted_handler,
+       &xfs_xattr_security_handler,
+#ifdef CONFIG_XFS_POSIX_ACL
+       &xfs_xattr_acl_access_handler,
+       &xfs_xattr_acl_default_handler,
+#endif
+       NULL
+};
+
+static unsigned int xfs_xattr_prefix_len(int flags)
+{
+       if (flags & XFS_ATTR_SECURE)
+               return sizeof("security");
+       else if (flags & XFS_ATTR_ROOT)
+               return sizeof("trusted");
+       else
+               return sizeof("user");
+}
+
+static const char *xfs_xattr_prefix(int flags)
+{
+       if (flags & XFS_ATTR_SECURE)
+               return xfs_xattr_security_handler.prefix;
+       else if (flags & XFS_ATTR_ROOT)
+               return xfs_xattr_trusted_handler.prefix;
+       else
+               return xfs_xattr_user_handler.prefix;
+}
+
+static int
+xfs_xattr_put_listent(
+       struct xfs_attr_list_context *context,
+       int             flags,
+       unsigned char   *name,
+       int             namelen,
+       int             valuelen,
+       unsigned char   *value)
+{
+       unsigned int prefix_len = xfs_xattr_prefix_len(flags);
+       char *offset;
+       int arraytop;
+
+       ASSERT(context->count >= 0);
+
+       /*
+        * Only show root namespace entries if we are actually allowed to
+        * see them.
+        */
+       if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN))
+               return 0;
+
+       arraytop = context->count + prefix_len + namelen + 1;
+       if (arraytop > context->firstu) {
+               context->count = -1;    /* insufficient space */
+               return 1;
+       }
+       offset = (char *)context->alist + context->count;
+       strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
+       offset += prefix_len;
+       strncpy(offset, (char *)name, namelen);                 /* real name */
+       offset += namelen;
+       *offset = '\0';
+       context->count += prefix_len + namelen + 1;
+       return 0;
+}
+
+static int
+xfs_xattr_put_listent_sizes(
+       struct xfs_attr_list_context *context,
+       int             flags,
+       unsigned char   *name,
+       int             namelen,
+       int             valuelen,
+       unsigned char   *value)
+{
+       context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
+       return 0;
+}
+
+static int
+list_one_attr(const char *name, const size_t len, void *data,
+               size_t size, ssize_t *result)
+{
+       char *p = data + *result;
+
+       *result += len;
+       if (!size)
+               return 0;
+       if (*result > size)
+               return -ERANGE;
+
+       strcpy(p, name);
+       return 0;
+}
+
+ssize_t
+xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
+{
+       struct xfs_attr_list_context context;
+       struct attrlist_cursor_kern cursor = { 0 };
+       struct inode            *inode = dentry->d_inode;
+       int                     error;
+
+       /*
+        * First read the regular on-disk attributes.
+        */
+       memset(&context, 0, sizeof(context));
+       context.dp = XFS_I(inode);
+       context.cursor = &cursor;
+       context.resynch = 1;
+       context.alist = data;
+       context.bufsize = size;
+       context.firstu = context.bufsize;
+
+       if (size)
+               context.put_listent = xfs_xattr_put_listent;
+       else
+               context.put_listent = xfs_xattr_put_listent_sizes;
+
+       xfs_attr_list_int(&context);
+       if (context.count < 0)
+               return -ERANGE;
+
+       /*
+        * Then add the two synthetic ACL attributes.
+        */
+       if (posix_acl_access_exists(inode)) {
+               error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
+                               strlen(POSIX_ACL_XATTR_ACCESS) + 1,
+                               data, size, &context.count);
+               if (error)
+                       return error;
+       }
+
+       if (posix_acl_default_exists(inode)) {
+               error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
+                               strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
+                               data, size, &context.count);
+               if (error)
+                       return error;
+       }
+
+       return context.count;
+}
diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h

index fb2d63f13f4c0aa7d5caf5d2bce8171f22f5b07b..aea9e45efce63572eee4daafc2ecfffcdd104b6e 100644 (file)
--- a/include/asm-generic/memory_model.h
+++ b/include/asm-generic/memory_model.h
@@ -39,7 +39,7 @@
  })
  
  #define __page_to_pfn(pg)                                              \
-({     struct page *__pg = (pg);                                       \
+({     const struct page *__pg = (pg);                                 \
         struct pglist_data *__pgdat = NODE_DATA(page_to_nid(__pg));     \
         (unsigned long)(__pg - __pgdat->node_mem_map) +                 \
          __pgdat->node_start_pfn;                                       \
@@ -57,7 +57,7 @@
   * section[i].section_mem_map == mem_map's address - start_pfn;
   */
  #define __page_to_pfn(pg)                                      \
-({     struct page *__pg = (pg);                               \
+({     const struct page *__pg = (pg);                         \
         int __sec = page_to_section(__pg);                      \
         (unsigned long)(__pg - __section_mem_map_addr(__nr_to_section(__sec))); \
  })
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h

index 4f76959397fa88a869dff276ba5ab0ccc9494071..f4c38d8c6674a3dd71ea08a47dc68e89e81e0d92 100644 (file)
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -143,7 +143,7 @@ __SYSCALL(__NR_pivot_root, sys_pivot_root)
  
  /* fs/nfsctl.c */
  #define __NR_nfsservctl 42
-__SC_COMP(__NR_nfsservctl, sys_nfsservctl, compat_sys_nfsservctl)
+__SYSCALL(__NR_nfsservctl, sys_ni_syscall)
  
  /* fs/open.c */
  #define __NR3264_statfs 43
diff --git a/include/linux/basic_mmio_gpio.h b/include/linux/basic_mmio_gpio.h

index 98999cf107ce01fec1fb5dc2559a73f308e31de2..feb9121967458e8c99515389d2923cb573547535 100644 (file)
--- a/include/linux/basic_mmio_gpio.h
+++ b/include/linux/basic_mmio_gpio.h
@@ -63,15 +63,10 @@ static inline struct bgpio_chip *to_bgpio_chip(struct gpio_chip *gc)
         return container_of(gc, struct bgpio_chip, gc);
  }
  
-int __devexit bgpio_remove(struct bgpio_chip *bgc);
-int __devinit bgpio_init(struct bgpio_chip *bgc,
-                        struct device *dev,
-                        unsigned long sz,
-                        void __iomem *dat,
-                        void __iomem *set,
-                        void __iomem *clr,
-                        void __iomem *dirout,
-                        void __iomem *dirin,
-                        bool big_endian);
+int bgpio_remove(struct bgpio_chip *bgc);
+int bgpio_init(struct bgpio_chip *bgc, struct device *dev,
+              unsigned long sz, void __iomem *dat, void __iomem *set,
+              void __iomem *clr, void __iomem *dirout, void __iomem *dirin,
+              bool big_endian);
  
  #endif /* __BASIC_MMIO_GPIO_H */
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h

index 6395692b2e7a9dec9725dcf42dbf244585f7c83e..32f0076e844b7da4768a709b95a4bba1b135195d 100644 (file)
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -125,7 +125,11 @@ enum rq_flag_bits {
         __REQ_SYNC,             /* request is sync (sync write or read) */
         __REQ_META,             /* metadata io request */
         __REQ_DISCARD,          /* request to discard sectors */
+       __REQ_SECURE,           /* secure discard (used with __REQ_DISCARD) */
+
         __REQ_NOIDLE,           /* don't anticipate more IO after this one */
+       __REQ_FUA,              /* forced unit access */
+       __REQ_FLUSH,            /* request for cache flush */
  
         /* bio only flags */
         __REQ_RAHEAD,           /* read ahead, can fail anytime */
@@ -135,7 +139,6 @@ enum rq_flag_bits {
         /* request only flags */
         __REQ_SORTED,           /* elevator knows about this request */
         __REQ_SOFTBARRIER,      /* may not be passed by ioscheduler */
-       __REQ_FUA,              /* forced unit access */
         __REQ_NOMERGE,          /* don't touch this for merging */
         __REQ_STARTED,          /* drive already may have started this one */
         __REQ_DONTPREP,         /* don't call prep for this one */
@@ -146,11 +149,9 @@ enum rq_flag_bits {
         __REQ_PREEMPT,          /* set for "ide_preempt" requests */
         __REQ_ALLOCED,          /* request came from our alloc pool */
         __REQ_COPY_USER,        /* contains copies of user pages */
-       __REQ_FLUSH,            /* request for cache flush */
         __REQ_FLUSH_SEQ,        /* request for flush sequence */
         __REQ_IO_STAT,          /* account I/O stat */
         __REQ_MIXED_MERGE,      /* merge of different types, fail separately */
-       __REQ_SECURE,           /* secure discard (used with __REQ_DISCARD) */
         __REQ_NR_BITS,          /* stops here */
  };
  
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index 0e67c45b3bc95ec70c38836cd8801bb4e568f804..84b15d54f8c23887aef4600dd6f87fed0a99104e 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -30,6 +30,7 @@ struct request_pm_state;
  struct blk_trace;
  struct request;
  struct sg_io_hdr;
+struct bsg_job;
  
  #define BLKDEV_MIN_RQ  4
  #define BLKDEV_MAX_RQ  128     /* Default maximum */
@@ -117,6 +118,7 @@ struct request {
                 struct {
                         unsigned int            seq;
                         struct list_head        list;
+                       rq_end_io_fn            *saved_end_io;
                 } flush;
         };
  
@@ -209,6 +211,7 @@ typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *,
  typedef void (softirq_done_fn)(struct request *);
  typedef int (dma_drain_needed_fn)(struct request *);
  typedef int (lld_busy_fn) (struct request_queue *q);
+typedef int (bsg_job_fn) (struct bsg_job *);
  
  enum blk_eh_timer_return {
         BLK_EH_NOT_HANDLED,
@@ -375,6 +378,8 @@ struct request_queue {
         struct mutex            sysfs_lock;
  
  #if defined(CONFIG_BLK_DEV_BSG)
+       bsg_job_fn              *bsg_job_fn;
+       int                     bsg_job_size;
         struct bsg_class_device bsg_dev;
  #endif
  
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h

index 8c7c2de7631a6fb59ab8f4e86334516a7f07e7d1..8e9e4bc6d73b4b5351748afca97a2b4a43b66654 100644 (file)
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -14,7 +14,7 @@
  enum blktrace_cat {
         BLK_TC_READ     = 1 << 0,       /* reads */
         BLK_TC_WRITE    = 1 << 1,       /* writes */
-       BLK_TC_BARRIER  = 1 << 2,       /* barrier */
+       BLK_TC_FLUSH    = 1 << 2,       /* flush */
         BLK_TC_SYNC     = 1 << 3,       /* sync IO */
         BLK_TC_SYNCIO   = BLK_TC_SYNC,
         BLK_TC_QUEUE    = 1 << 4,       /* queueing/merging */
@@ -28,8 +28,9 @@ enum blktrace_cat {
         BLK_TC_META     = 1 << 12,      /* metadata */
         BLK_TC_DISCARD  = 1 << 13,      /* discard requests */
         BLK_TC_DRV_DATA = 1 << 14,      /* binary per-driver data */
+       BLK_TC_FUA      = 1 << 15,      /* fua requests */
  
-       BLK_TC_END      = 1 << 15,      /* only 16-bits, reminder */
+       BLK_TC_END      = 1 << 15,      /* we've run out of bits! */
  };
  
  #define BLK_TC_SHIFT           (16)
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h

new file mode 100644 (file)

index 0000000..f55ab8c
--- /dev/null
+++ b/include/linux/bsg-lib.h
@@ -0,0 +1,73 @@
+/*
+ *  BSG helper library
+ *
+ *  Copyright (C) 2008   James Smart, Emulex Corporation
+ *  Copyright (C) 2011   Red Hat, Inc.  All rights reserved.
+ *  Copyright (C) 2011   Mike Christie
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+#ifndef _BLK_BSG_
+#define _BLK_BSG_
+
+#include <linux/blkdev.h>
+
+struct request;
+struct device;
+struct scatterlist;
+struct request_queue;
+
+struct bsg_buffer {
+       unsigned int payload_len;
+       int sg_cnt;
+       struct scatterlist *sg_list;
+};
+
+struct bsg_job {
+       struct device *dev;
+       struct request *req;
+
+       /* Transport/driver specific request/reply structs */
+       void *request;
+       void *reply;
+
+       unsigned int request_len;
+       unsigned int reply_len;
+       /*
+        * On entry : reply_len indicates the buffer size allocated for
+        * the reply.
+        *
+        * Upon completion : the message handler must set reply_len
+        *  to indicates the size of the reply to be returned to the
+        *  caller.
+        */
+
+       /* DMA payloads for the request/response */
+       struct bsg_buffer request_payload;
+       struct bsg_buffer reply_payload;
+
+       void *dd_data;          /* Used for driver-specific storage */
+};
+
+void bsg_job_done(struct bsg_job *job, int result,
+                 unsigned int reply_payload_rcv_len);
+int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name,
+                   bsg_job_fn *job_fn, int dd_job_size);
+void bsg_request_fn(struct request_queue *q);
+void bsg_remove_queue(struct request_queue *q);
+void bsg_goose_queue(struct request_queue *q);
+
+#endif
diff --git a/include/linux/compat.h b/include/linux/compat.h

index 8779405e15a871eba64681021065ed1ab0e71853..c6e7523bf7652ab050285c2c02dad3715a010cab 100644 (file)
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -438,7 +438,6 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
                                  struct compat_timespec __user *tsp,
                                  const compat_sigset_t __user *sigmask,
                                  compat_size_t sigsetsize);
-asmlinkage long compat_sys_nfsservctl(int cmd, void *notused, void *notused2);
  asmlinkage long compat_sys_signalfd4(int ufd,
                                      const compat_sigset_t __user *sigmask,
                                      compat_size_t sigsetsize, int flags);
diff --git a/include/linux/connector.h b/include/linux/connector.h

index 0c69ad825b39f656d3599d1c79dc16661772057f..3c9c54fd5690a6142cf155091a2e5546600037df 100644 (file)
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -1,7 +1,7 @@
  /*
   *     connector.h
   * 
- * 2004-2005 Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * 2004-2005 Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
   * All rights reserved.
   * 
   * This program is free software; you can redistribute it and/or modify
diff --git a/include/linux/cred.h b/include/linux/cred.h

index 98f46efbe2d21a422802d7fd7bbee06d0b993116..40308969ed005239b1b27889c8eccf3f89d6fcbe 100644 (file)
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -269,7 +269,7 @@ static inline void put_cred(const struct cred *_cred)
   * since nobody else can modify it.
   */
  #define current_cred() \
-       (*(__force struct cred **)&current->cred)
+       rcu_dereference_protected(current->cred, 1)
  
  /**
   * __task_cred - Access a task's objective credentials
@@ -307,7 +307,7 @@ static inline void put_cred(const struct cred *_cred)
  #define get_current_user()                             \
  ({                                                     \
         struct user_struct *__u;                        \
-       struct cred *__cred;                            \
+       const struct cred *__cred;                      \
         __cred = current_cred();                        \
         __u = get_uid(__cred->user);                    \
         __u;                                            \
@@ -322,7 +322,7 @@ static inline void put_cred(const struct cred *_cred)
  #define get_current_groups()                           \
  ({                                                     \
         struct group_info *__groups;                    \
-       struct cred *__cred;                            \
+       const struct cred *__cred;                      \
         __cred = current_cred();                        \
         __groups = get_group_info(__cred->group_info);  \
         __groups;                                       \
diff --git a/include/linux/fs.h b/include/linux/fs.h

index 178cdb4f1d4afe81a66e631872de4b587e95d1c4..c2bd68f2277a4b533f804d5dc3c6890a4485d95d 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2318,6 +2318,11 @@ extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*te
  extern struct inode * iget_locked(struct super_block *, unsigned long);
  extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
  extern int insert_inode_locked(struct inode *);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
+#else
+static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { };
+#endif
  extern void unlock_new_inode(struct inode *);
  extern unsigned int get_next_ino(void);
  
diff --git a/include/linux/fuse.h b/include/linux/fuse.h

index d464de53db4399c598ec3793f75290856b049255..464cff52686092c0f862f551b2db53ed3050b629 100644 (file)
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -47,6 +47,9 @@
   *  - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct
   *    fuse_ioctl_iovec' instead of ambiguous 'struct iovec'
   *  - add FUSE_IOCTL_32BIT flag
+ *
+ * 7.17
+ *  - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK
   */
  
  #ifndef _LINUX_FUSE_H
@@ -78,7 +81,7 @@
  #define FUSE_KERNEL_VERSION 7
  
  /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 16
+#define FUSE_KERNEL_MINOR_VERSION 17
  
  /** The node ID of the root inode */
  #define FUSE_ROOT_ID 1
@@ -153,8 +156,10 @@ struct fuse_file_lock {
  /**
   * INIT request/reply flags
   *
+ * FUSE_POSIX_LOCKS: remote locking for POSIX file locks
   * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
   * FUSE_DONT_MASK: don't apply umask to file mode on create operations
+ * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks
   */
  #define FUSE_ASYNC_READ                (1 << 0)
  #define FUSE_POSIX_LOCKS       (1 << 1)
@@ -163,6 +168,7 @@ struct fuse_file_lock {
  #define FUSE_EXPORT_SUPPORT    (1 << 4)
  #define FUSE_BIG_WRITES                (1 << 5)
  #define FUSE_DONT_MASK         (1 << 6)
+#define FUSE_FLOCK_LOCKS       (1 << 10)
  
  /**
   * CUSE INIT request/reply flags
@@ -175,6 +181,7 @@ struct fuse_file_lock {
   * Release flags
   */
  #define FUSE_RELEASE_FLUSH     (1 << 0)
+#define FUSE_RELEASE_FLOCK_UNLOCK      (1 << 1)
  
  /**
   * Getattr flags
diff --git a/include/linux/hash.h b/include/linux/hash.h

index 06d25c189cc5679d0933747efa4a00b9758a4dca..b80506bdd733ee181f202ddb6322529d42299d1b 100644 (file)
--- a/include/linux/hash.h
+++ b/include/linux/hash.h
@@ -63,7 +63,7 @@ static inline u32 hash_32(u32 val, unsigned int bits)
         return hash >> (32 - bits);
  }
  
-static inline unsigned long hash_ptr(void *ptr, unsigned int bits)
+static inline unsigned long hash_ptr(const void *ptr, unsigned int bits)
  {
         return hash_long((unsigned long)ptr, bits);
  }
diff --git a/include/linux/irq.h b/include/linux/irq.h

index 87a06f345bd2c5820e61ec5a5f9e4d5a77e55d77..59517300a315978e6f41398dc69a6a4f84c0f19e 100644 (file)
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -23,6 +23,7 @@
  #include <linux/errno.h>
  #include <linux/topology.h>
  #include <linux/wait.h>
+#include <linux/module.h>
  
  #include <asm/irq.h>
  #include <asm/ptrace.h>
@@ -547,7 +548,15 @@ static inline struct msi_desc *irq_data_get_msi(struct irq_data *d)
         return d->msi_desc;
  }
  
-int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node);
+int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
+               struct module *owner);
+
+static inline int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt,
+               int node)
+{
+       return __irq_alloc_descs(irq, from, cnt, node, THIS_MODULE);
+}
+
  void irq_free_descs(unsigned int irq, unsigned int cnt);
  int irq_reserve_irqs(unsigned int from, unsigned int cnt);
  
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h

index 2d921b35212c42bbac394430b50fd90914439e24..150134ac709ab28372eb874f9afed2741f150988 100644 (file)
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -66,6 +66,7 @@ struct irq_desc {
  #ifdef CONFIG_PROC_FS
         struct proc_dir_entry   *dir;
  #endif
+       struct module           *owner;
         const char              *name;
  } ____cacheline_internodealigned_in_smp;
  
diff --git a/include/linux/loop.h b/include/linux/loop.h

index 66c194e2d9b9c072fc201b864506ae12c010112d..683d698901198935d12370c82dbdcb8e0b90cb0e 100644 (file)
--- a/include/linux/loop.h
+++ b/include/linux/loop.h
@@ -64,7 +64,6 @@ struct loop_device {
  
         struct request_queue    *lo_queue;
         struct gendisk          *lo_disk;
-       struct list_head        lo_list;
  };
  
  #endif /* __KERNEL__ */
@@ -161,4 +160,8 @@ int loop_unregister_transfer(int number);
  #define LOOP_CHANGE_FD         0x4C06
  #define LOOP_SET_CAPACITY      0x4C07
  
+/* /dev/loop-control interface */
+#define LOOP_CTL_ADD           0x4C80
+#define LOOP_CTL_REMOVE                0x4C81
+#define LOOP_CTL_GET_FREE      0x4C82
  #endif
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h

index 3b535db00a94983324d42106fae5bd05670f3157..343bd7661f2ac648360daf5ca96a1c87ee24567d 100644 (file)
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -39,16 +39,6 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
                                         struct mem_cgroup *mem_cont,
                                         int active, int file);
  
-struct memcg_scanrecord {
-       struct mem_cgroup *mem; /* scanend memory cgroup */
-       struct mem_cgroup *root; /* scan target hierarchy root */
-       int context;            /* scanning context (see memcontrol.c) */
-       unsigned long nr_scanned[2]; /* the number of scanned pages */
-       unsigned long nr_rotated[2]; /* the number of rotated pages */
-       unsigned long nr_freed[2]; /* the number of freed pages */
-       unsigned long elapsed; /* nsec of time elapsed while scanning */
-};
-
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR
  /*
   * All "charge" functions with gfp_mask should use GFP_KERNEL or
@@ -127,15 +117,6 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page);
  extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
                                         struct task_struct *p);
  
-extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
-                                                 gfp_t gfp_mask, bool noswap,
-                                                 struct memcg_scanrecord *rec);
-extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
-                                               gfp_t gfp_mask, bool noswap,
-                                               struct zone *zone,
-                                               struct memcg_scanrecord *rec,
-                                               unsigned long *nr_scanned);
-
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
  extern int do_swap_account;
  #endif
diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h

index d12f8d635a8159b8ce54a144dbf9eda1d90e29d8..97cf4f27d6470120a389bb8a2e1217871b8e3411 100644 (file)
--- a/include/linux/mfd/wm8994/pdata.h
+++ b/include/linux/mfd/wm8994/pdata.h
@@ -26,7 +26,7 @@ struct wm8994_ldo_pdata {
         struct regulator_init_data *init_data;
  };
  
-#define WM8994_CONFIGURE_GPIO 0x8000
+#define WM8994_CONFIGURE_GPIO 0x10000
  
  #define WM8994_DRC_REGS 5
  #define WM8994_EQ_REGS  20
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h

index 18fd13028ba1aab8207a1baa9c5b1527435d3a01..c309b1ecdc1c8b089725258cce9613e5ff7d9bbc 100644 (file)
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -40,6 +40,7 @@
  #define BTRFS_MINOR            234
  #define AUTOFS_MINOR           235
  #define MAPPER_CTRL_MINOR      236
+#define LOOP_CTRL_MINOR                237
  #define MISC_DYNAMIC_MINOR     255
  
  struct device;
diff --git a/include/linux/mm.h b/include/linux/mm.h

index f2690cf49827abc641d2bf59d8dc359b7bbce15e..7438071b44aa7d3c5f12a60824941de05940eb42 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -685,7 +685,7 @@ static inline void set_page_section(struct page *page, unsigned long section)
         page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
  }
  
-static inline unsigned long page_to_section(struct page *page)
+static inline unsigned long page_to_section(const struct page *page)
  {
         return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
  }
@@ -720,7 +720,7 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
  
  static __always_inline void *lowmem_page_address(const struct page *page)
  {
-       return __va(PFN_PHYS(page_to_pfn((struct page *)page)));
+       return __va(PFN_PHYS(page_to_pfn(page)));
  }
  
  #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL)
@@ -737,7 +737,7 @@ static __always_inline void *lowmem_page_address(const struct page *page)
  #endif
  
  #if defined(HASHED_PAGE_VIRTUAL)
-void *page_address(struct page *page);
+void *page_address(const struct page *page);
  void set_page_address(struct page *page, void *virtual);
  void page_address_init(void);
  #endif
@@ -962,6 +962,8 @@ int invalidate_inode_page(struct page *page);
  #ifdef CONFIG_MMU
  extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                         unsigned long address, unsigned int flags);
+extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
+                           unsigned long address, unsigned int fault_flags);
  #else
  static inline int handle_mm_fault(struct mm_struct *mm,
                         struct vm_area_struct *vma, unsigned long address,
@@ -971,6 +973,14 @@ static inline int handle_mm_fault(struct mm_struct *mm,
         BUG();
         return VM_FAULT_SIGBUS;
  }
+static inline int fixup_user_fault(struct task_struct *tsk,
+               struct mm_struct *mm, unsigned long address,
+               unsigned int fault_flags)
+{
+       /* should never happen if there's no MMU */
+       BUG();
+       return -EFAULT;
+}
  #endif
  
  extern int make_pages_present(unsigned long addr, unsigned long end);
@@ -988,8 +998,6 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
  int get_user_pages_fast(unsigned long start, int nr_pages, int write,
                         struct page **pages);
  struct page *get_dump_page(unsigned long addr);
-extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
-                           unsigned long address, unsigned int fault_flags);
  
  extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
  extern void do_invalidatepage(struct page *page, unsigned long offset);
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h

index 0f83858147a626c1a0bf463597e4f6ff82adfa3c..1d09562ccf7394b2ae0a2cdf38cf52e17ccd53ec 100644 (file)
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -56,8 +56,6 @@ struct mmc_ios {
  #define MMC_TIMING_UHS_SDR104  4
  #define MMC_TIMING_UHS_DDR50   5
  
-       unsigned char   ddr;                    /* dual data rate used */
-
  #define MMC_SDR_MODE           0
  #define MMC_1_2V_DDR_MODE      1
  #define MMC_1_8V_DDR_MODE      2
diff --git a/include/linux/netlink.h b/include/linux/netlink.h

index 2e17c5dbdcb8a97b0e243868e43179d599637e18..180540a84d3736041b9030e39eaf00889dfbfbb3 100644 (file)
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -29,7 +29,7 @@
  #define MAX_LINKS 32           
  
  struct sockaddr_nl {
-       sa_family_t     nl_family;      /* AF_NETLINK   */
+       __kernel_sa_family_t    nl_family;      /* AF_NETLINK   */
         unsigned short  nl_pad;         /* zero         */
         __u32           nl_pid;         /* port ID      */
                 __u32           nl_groups;      /* multicast groups mask */
diff --git a/include/linux/of.h b/include/linux/of.h

index 0085bb01c041e969276b2ab5107dc4e2446d445c..9180dc5cb00b02d4e586641ec4f1e64b10d99131 100644 (file)
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -256,6 +256,13 @@ static inline int of_property_read_string(struct device_node *np,
         return -ENOSYS;
  }
  
+static inline const void *of_get_property(const struct device_node *node,
+                               const char *name,
+                               int *lenp)
+{
+       return NULL;
+}
+
  #endif /* CONFIG_OF */
  
  static inline int of_property_read_u32(const struct device_node *np,
diff --git a/include/linux/pci.h b/include/linux/pci.h

index f27893b3b724205b3411e9081b99b4ca8e4429c2..8c230cbcbb486dd7cafb28cce85a1b0ee76a4995 100644 (file)
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -251,7 +251,8 @@ struct pci_dev {
         u8              revision;       /* PCI revision, low byte of class word */
         u8              hdr_type;       /* PCI header type (`multi' flag masked out) */
         u8              pcie_cap;       /* PCI-E capability offset */
-       u8              pcie_type;      /* PCI-E device/port type */
+       u8              pcie_type:4;    /* PCI-E device/port type */
+       u8              pcie_mpss:3;    /* PCI-E Max Payload Size Supported */
         u8              rom_base_reg;   /* which config register controls the ROM */
         u8              pin;            /* which interrupt pin this device uses */
  
@@ -617,6 +618,16 @@ struct pci_driver {
  /* these external functions are only available when PCI support is enabled */
  #ifdef CONFIG_PCI
  
+extern void pcie_bus_configure_settings(struct pci_bus *bus, u8 smpss);
+
+enum pcie_bus_config_types {
+       PCIE_BUS_PERFORMANCE,
+       PCIE_BUS_SAFE,
+       PCIE_BUS_PEER2PEER,
+};
+
+extern enum pcie_bus_config_types pcie_bus_config;
+
  extern struct bus_type pci_bus_type;
  
  /* Do NOT directly access these two variables, unless you are arch specific pci
@@ -796,10 +807,13 @@ int pcix_get_mmrbc(struct pci_dev *dev);
  int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc);
  int pcie_get_readrq(struct pci_dev *dev);
  int pcie_set_readrq(struct pci_dev *dev, int rq);
+int pcie_get_mps(struct pci_dev *dev);
+int pcie_set_mps(struct pci_dev *dev, int mps);
  int __pci_reset_function(struct pci_dev *dev);
  int pci_reset_function(struct pci_dev *dev);
  void pci_update_resource(struct pci_dev *dev, int resno);
  int __must_check pci_assign_resource(struct pci_dev *dev, int i);
+int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align);
  int pci_select_bars(struct pci_dev *dev, unsigned long flags);
  
  /* ROM control related routines */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h

index 245bafdafd5ec78e1461b7cbb5817714a28952ed..c816075c01ceb3772241e17729dabd244be62764 100644 (file)
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -944,8 +944,10 @@ extern void perf_pmu_unregister(struct pmu *pmu);
  
  extern int perf_num_counters(void);
  extern const char *perf_pmu_name(void);
-extern void __perf_event_task_sched_in(struct task_struct *task);
-extern void __perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
+extern void __perf_event_task_sched_in(struct task_struct *prev,
+                                      struct task_struct *task);
+extern void __perf_event_task_sched_out(struct task_struct *prev,
+                                       struct task_struct *next);
  extern int perf_event_init_task(struct task_struct *child);
  extern void perf_event_exit_task(struct task_struct *child);
  extern void perf_event_free_task(struct task_struct *task);
@@ -1059,17 +1061,20 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
  
  extern struct jump_label_key perf_sched_events;
  
-static inline void perf_event_task_sched_in(struct task_struct *task)
+static inline void perf_event_task_sched_in(struct task_struct *prev,
+                                           struct task_struct *task)
  {
         if (static_branch(&perf_sched_events))
-               __perf_event_task_sched_in(task);
+               __perf_event_task_sched_in(prev, task);
  }
  
-static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
+static inline void perf_event_task_sched_out(struct task_struct *prev,
+                                            struct task_struct *next)
  {
         perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0);
  
-       __perf_event_task_sched_out(task, next);
+       if (static_branch(&perf_sched_events))
+               __perf_event_task_sched_out(prev, next);
  }
  
  extern void perf_event_mmap(struct vm_area_struct *vma);
@@ -1139,10 +1144,11 @@ extern void perf_event_disable(struct perf_event *event);
  extern void perf_event_task_tick(void);
  #else
  static inline void
-perf_event_task_sched_in(struct task_struct *task)                     { }
+perf_event_task_sched_in(struct task_struct *prev,
+                        struct task_struct *task)                      { }
  static inline void
-perf_event_task_sched_out(struct task_struct *task,
-                           struct task_struct *next)                   { }
+perf_event_task_sched_out(struct task_struct *prev,
+                         struct task_struct *next)                     { }
  static inline int perf_event_init_task(struct task_struct *child)      { return 0; }
  static inline void perf_event_exit_task(struct task_struct *child)     { }
  static inline void perf_event_free_task(struct task_struct *task)      { }
diff --git a/include/linux/personality.h b/include/linux/personality.h

index eec3bae164d451a38b23906a283af54965074ec0..8fc7dd1a57ff29cb96722a01290f07de6dd1d23a 100644 (file)
--- a/include/linux/personality.h
+++ b/include/linux/personality.h
@@ -22,6 +22,7 @@ extern int            __set_personality(unsigned int);
   * These occupy the top three bytes.
   */
  enum {
+       UNAME26 =               0x0020000,
         ADDR_NO_RANDOMIZE =     0x0040000,      /* disable randomization of VA space */
         FDPIC_FUNCPTRS =        0x0080000,      /* userspace function ptrs point to descriptors
                                                  * (signal handling)
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h

index 21097cb086fe68c42d1be61f13c9d9c48d8802e8..f9ec1736a116a3f157d0abb49234be0597572c2c 100644 (file)
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -72,8 +72,6 @@ extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
  extern void pm_genpd_init(struct generic_pm_domain *genpd,
                           struct dev_power_governor *gov, bool is_off);
  extern int pm_genpd_poweron(struct generic_pm_domain *genpd);
-extern void pm_genpd_poweroff_unused(void);
-extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd);
  #else
  static inline int pm_genpd_add_device(struct generic_pm_domain *genpd,
                                       struct device *dev)
@@ -101,8 +99,14 @@ static inline int pm_genpd_poweron(struct generic_pm_domain *genpd)
  {
         return -ENOSYS;
  }
-static inline void pm_genpd_poweroff_unused(void) {}
+#endif
+
+#ifdef CONFIG_PM_GENERIC_DOMAINS_RUNTIME
+extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd);
+extern void pm_genpd_poweroff_unused(void);
+#else
  static inline void genpd_queue_power_off_work(struct generic_pm_domain *gpd) {}
+static inline void pm_genpd_poweroff_unused(void) {}
  #endif
  
  #endif /* _LINUX_PM_DOMAIN_H */
diff --git a/include/linux/pwm_backlight.h b/include/linux/pwm_backlight.h

index 5e3e25a3c9c38d3c3c5be63d2b40cd740f62cfd7..63d2df43e61a1999206d15b6d06f3567b9ac8f9e 100644 (file)
--- a/include/linux/pwm_backlight.h
+++ b/include/linux/pwm_backlight.h
@@ -14,6 +14,7 @@ struct platform_pwm_backlight_data {
         unsigned int pwm_period_ns;
         int (*init)(struct device *dev);
         int (*notify)(struct device *dev, int brightness);
+       void (*notify_after)(struct device *dev, int brightness);
         void (*exit)(struct device *dev);
         int (*check_fb)(struct device *dev, struct fb_info *info);
  };
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h

index 26f6ea4444e39fedda935b5505439a6de3301f41..b47771aa57180b6d553fbdb9363973f28569fde6 100644 (file)
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -123,7 +123,7 @@ struct regulator_bulk_data {
         const char *supply;
         struct regulator *consumer;
  
-       /* Internal use */
+       /* private: Internal use */
         int ret;
  };
  
diff --git a/include/linux/rio_regs.h b/include/linux/rio_regs.h

index 9026b30238f32de96612bee16d8579022b8e8e10..218168a2b5e9c4fb94055b844b7326996f245447 100644 (file)
--- a/include/linux/rio_regs.h
+++ b/include/linux/rio_regs.h
@@ -36,12 +36,12 @@
  #define  RIO_PEF_PROCESSOR             0x20000000      /* [I] Processor */
  #define  RIO_PEF_SWITCH                        0x10000000      /* [I] Switch */
  #define  RIO_PEF_MULTIPORT             0x08000000      /* [VI, 2.1] Multiport */
-#define  RIO_PEF_INB_MBOX              0x00f00000      /* [II] Mailboxes */
-#define  RIO_PEF_INB_MBOX0             0x00800000      /* [II] Mailbox 0 */
-#define  RIO_PEF_INB_MBOX1             0x00400000      /* [II] Mailbox 1 */
-#define  RIO_PEF_INB_MBOX2             0x00200000      /* [II] Mailbox 2 */
-#define  RIO_PEF_INB_MBOX3             0x00100000      /* [II] Mailbox 3 */
-#define  RIO_PEF_INB_DOORBELL          0x00080000      /* [II] Doorbells */
+#define  RIO_PEF_INB_MBOX              0x00f00000      /* [II, <= 1.2] Mailboxes */
+#define  RIO_PEF_INB_MBOX0             0x00800000      /* [II, <= 1.2] Mailbox 0 */
+#define  RIO_PEF_INB_MBOX1             0x00400000      /* [II, <= 1.2] Mailbox 1 */
+#define  RIO_PEF_INB_MBOX2             0x00200000      /* [II, <= 1.2] Mailbox 2 */
+#define  RIO_PEF_INB_MBOX3             0x00100000      /* [II, <= 1.2] Mailbox 3 */
+#define  RIO_PEF_INB_DOORBELL          0x00080000      /* [II, <= 1.2] Doorbells */
  #define  RIO_PEF_EXT_RT                        0x00000200      /* [III, 1.3] Extended route table support */
  #define  RIO_PEF_STD_RT                        0x00000100      /* [III, 1.3] Standard route table support */
  #define  RIO_PEF_CTLS                  0x00000010      /* [III] CTLS */
@@ -102,7 +102,7 @@
  #define        RIO_SWITCH_RT_LIMIT     0x34    /* [III, 1.3] Switch Route Table Destination ID Limit CAR */
  #define         RIO_RT_MAX_DESTID              0x0000ffff
  
-#define RIO_MBOX_CSR           0x40    /* [II] Mailbox CSR */
+#define RIO_MBOX_CSR           0x40    /* [II, <= 1.2] Mailbox CSR */
  #define  RIO_MBOX0_AVAIL               0x80000000      /* [II] Mbox 0 avail */
  #define  RIO_MBOX0_FULL                        0x40000000      /* [II] Mbox 0 full */
  #define  RIO_MBOX0_EMPTY               0x20000000      /* [II] Mbox 0 empty */
@@ -128,8 +128,8 @@
  #define  RIO_MBOX3_FAIL                        0x00000008      /* [II] Mbox 3 fail */
  #define  RIO_MBOX3_ERROR               0x00000004      /* [II] Mbox 3 error */
  
-#define RIO_WRITE_PORT_CSR     0x44    /* [I] Write Port CSR */
-#define RIO_DOORBELL_CSR       0x44    /* [II] Doorbell CSR */
+#define RIO_WRITE_PORT_CSR     0x44    /* [I, <= 1.2] Write Port CSR */
+#define RIO_DOORBELL_CSR       0x44    /* [II, <= 1.2] Doorbell CSR */
  #define  RIO_DOORBELL_AVAIL            0x80000000      /* [II] Doorbell avail */
  #define  RIO_DOORBELL_FULL             0x40000000      /* [II] Doorbell full */
  #define  RIO_DOORBELL_EMPTY            0x20000000      /* [II] Doorbell empty */
diff --git a/include/linux/rtc.h b/include/linux/rtc.h

index b27ebea25660bff86dbdf2f86040f26904ab45c7..93f4d035076bc8f295fde5b0a6b5b89e4c5df9ff 100644 (file)
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -97,6 +97,9 @@ struct rtc_pll_info {
  #define RTC_AF 0x20    /* Alarm interrupt */
  #define RTC_UF 0x10    /* Update interrupt for 1Hz RTC */
  
+
+#define RTC_MAX_FREQ   8192
+
  #ifdef __KERNEL__
  
  #include <linux/types.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 20b03bf94748c4520bc7a87d7f2967227409c96b..4ac2c0578e0ff9133c4c761fcf3291879ae646d4 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1767,6 +1767,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
  #define PF_DUMPCORE    0x00000200      /* dumped core */
  #define PF_SIGNALED    0x00000400      /* killed by a signal */
  #define PF_MEMALLOC    0x00000800      /* Allocating memory */
+#define PF_NPROC_EXCEEDED 0x00001000   /* set_user noticed that RLIMIT_NPROC was exceeded */
  #define PF_USED_MATH   0x00002000      /* if unset the fpu must be initialized before use */
  #define PF_FREEZING    0x00004000      /* freeze in progress. do not account to load */
  #define PF_NOFREEZE    0x00008000      /* this thread should not be frozen */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h

index 7b996ed86d5b823d1b92a0df82da033cafa4d6c3..8bd383caa363ad31bd965b2f2316298042da8af5 100644 (file)
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -524,6 +524,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
  extern bool skb_recycle_check(struct sk_buff *skb, int skb_size);
  
  extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
+extern int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
  extern struct sk_buff *skb_clone(struct sk_buff *skb,
                                  gfp_t priority);
  extern struct sk_buff *skb_copy(const struct sk_buff *skb,
diff --git a/include/linux/snmp.h b/include/linux/snmp.h

index 12b2b18e50c1c321f208a83d4bbda61ce985450c..e16557a357e5dc919036dbb0a4ac1473c153e9b4 100644 (file)
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -231,6 +231,8 @@ enum
         LINUX_MIB_TCPDEFERACCEPTDROP,
         LINUX_MIB_IPRPFILTER, /* IP Reverse Path Filter (rp_filter) */
         LINUX_MIB_TCPTIMEWAITOVERFLOW,          /* TCPTimeWaitOverflow */
+       LINUX_MIB_TCPREQQFULLDOCOOKIES,         /* TCPReqQFullDoCookies */
+       LINUX_MIB_TCPREQQFULLDROP,              /* TCPReqQFullDrop */
         __LINUX_MIB_MAX
  };
  
diff --git a/include/linux/socket.h b/include/linux/socket.h

index e17f82266639e8b2a54763ebc089e482fb50a2ca..d0e77f607a7989ccc7336d9abf895b82abe88b61 100644 (file)
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -8,8 +8,10 @@
  #define _K_SS_ALIGNSIZE        (__alignof__ (struct sockaddr *))
                                 /* Implementation specific desired alignment */
  
+typedef unsigned short __kernel_sa_family_t;
+
  struct __kernel_sockaddr_storage {
-       unsigned short  ss_family;              /* address family */
+       __kernel_sa_family_t    ss_family;              /* address family */
         /* Following field(s) are implementation specific */
         char            __data[_K_SS_MAXSIZE - sizeof(unsigned short)];
                                 /* space to achieve desired size, */
@@ -35,7 +37,7 @@ struct seq_file;
  extern void socket_seq_show(struct seq_file *seq);
  #endif
  
-typedef unsigned short sa_family_t;
+typedef __kernel_sa_family_t   sa_family_t;
  
  /*
   *     1003.1g requires sa_family_t and that sa_data is char.
diff --git a/include/linux/swap.h b/include/linux/swap.h

index 14d62490922e12bccc319a0827ec22a1d5c3a1b3..c71f84bb62ecbae2b7ad6efac084eb175ad3a4ea 100644 (file)
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -252,6 +252,12 @@ static inline void lru_cache_add_file(struct page *page)
  extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
                                         gfp_t gfp_mask, nodemask_t *mask);
  extern int __isolate_lru_page(struct page *page, int mode, int file);
+extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
+                                                 gfp_t gfp_mask, bool noswap);
+extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
+                                               gfp_t gfp_mask, bool noswap,
+                                               struct zone *zone,
+                                               unsigned long *nr_scanned);
  extern unsigned long shrink_all_memory(unsigned long nr_pages);
  extern int vm_swappiness;
  extern int remove_mapping(struct address_space *mapping, struct page *page);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h

index 8c03b98df5f93d196d523c60f31a850ffe9a22e4..1ff0ec2a5e8d29a2a4519bb882900c56f39c0dab 100644 (file)
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -702,9 +702,6 @@ asmlinkage long sys_sysctl(struct __sysctl_args __user *args);
  asmlinkage long sys_sysinfo(struct sysinfo __user *info);
  asmlinkage long sys_sysfs(int option,
                                 unsigned long arg1, unsigned long arg2);
-asmlinkage long sys_nfsservctl(int cmd,
-                               struct nfsctl_arg __user *arg,
-                               void __user *res);
  asmlinkage long sys_syslog(int type, char __user *buf, int len);
  asmlinkage long sys_uselib(const char __user *library);
  asmlinkage long sys_ni_syscall(void);
diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h

index b004e557caa9c064c15b41b26bf773677f529913..2ef4385da6bf7a70ed60a8a01b7fdb54201c1840 100644 (file)
--- a/include/linux/ti_wilink_st.h
+++ b/include/linux/ti_wilink_st.h
@@ -410,7 +410,28 @@ struct gps_event_hdr {
         u16 plen;
  } __attribute__ ((packed));
  
-/* platform data */
+/**
+ * struct ti_st_plat_data - platform data shared between ST driver and
+ *     platform specific board file which adds the ST device.
+ * @nshutdown_gpio: Host's GPIO line to which chip's BT_EN is connected.
+ * @dev_name: The UART/TTY name to which chip is interfaced. (eg: /dev/ttyS1)
+ * @flow_cntrl: Should always be 1, since UART's CTS/RTS is used for PM
+ *     purposes.
+ * @baud_rate: The baud rate supported by the Host UART controller, this will
+ *     be shared across with the chip via a HCI VS command from User-Space Init
+ *     Mgr application.
+ * @suspend:
+ * @resume: legacy PM routines hooked to platform specific board file, so as
+ *     to take chip-host interface specific action.
+ * @chip_enable:
+ * @chip_disable: Platform/Interface specific mux mode setting, GPIO
+ *     configuring, Host side PM disabling etc.. can be done here.
+ * @chip_asleep:
+ * @chip_awake: Chip specific deep sleep states is communicated to Host
+ *     specific board-xx.c to take actions such as cut UART clocks when chip
+ *     asleep or run host faster when chip awake etc..
+ *
+ */
  struct ti_st_plat_data {
         long nshutdown_gpio;
         unsigned char dev_name[UART_DEV_NAME_LEN]; /* uart name */
@@ -418,6 +439,10 @@ struct ti_st_plat_data {
         unsigned long baud_rate;
         int (*suspend)(struct platform_device *, pm_message_t);
         int (*resume)(struct platform_device *);
+       int (*chip_enable) (struct kim_data_s *);
+       int (*chip_disable) (struct kim_data_s *);
+       int (*chip_asleep) (struct kim_data_s *);
+       int (*chip_awake) (struct kim_data_s *);
  };
  
  #endif /* TI_WILINK_ST_H */
diff --git a/include/linux/tty.h b/include/linux/tty.h

index 44bc0c5617e1c227df1b3699dbdac1ac7dbab191..5f2ede82b3d67e5223089f6db9cec489267991ef 100644 (file)
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -421,6 +421,8 @@ extern void tty_driver_flush_buffer(struct tty_struct *tty);
  extern void tty_throttle(struct tty_struct *tty);
  extern void tty_unthrottle(struct tty_struct *tty);
  extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws);
+extern void tty_driver_remove_tty(struct tty_driver *driver,
+                                 struct tty_struct *tty);
  extern void tty_shutdown(struct tty_struct *tty);
  extern void tty_free_termios(struct tty_struct *tty);
  extern int is_current_pgrp_orphaned(void);
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h

index 9deeac85524078f0ef97b140f270c39391509f51..ecdaeb98b293727274b6511ee7ef523c00324564 100644 (file)
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -47,6 +47,9 @@
   *
   *     This routine is called synchronously when a particular tty device
   *     is closed for the last time freeing up the resources.
+ *     Note that tty_shutdown() is not called if ops->shutdown is defined.
+ *     This means one is responsible to take care of calling ops->remove (e.g.
+ *     via tty_driver_remove_tty) and releasing tty->termios.
   *
   *
   * void (*cleanup)(struct tty_struct * tty);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h

index f1bfa12ea246209802624502c50da5380e66049e..2b8963ff0f359b4c6dab919e2ccc3c2c0324b5bb 100644 (file)
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -12,15 +12,6 @@
   *
   *     (thresh - thresh/DIRTY_FULL_SCOPE, thresh)
   *
- * The 1/16 region above the global dirty limit will be put to maximum pauses:
- *
- *     (limit, limit + limit/DIRTY_MAXPAUSE_AREA)
- *
- * The 1/16 region above the max-pause region, dirty exceeded bdi's will be put
- * to loops:
- *
- *     (limit + limit/DIRTY_MAXPAUSE_AREA, limit + limit/DIRTY_PASSGOOD_AREA)
- *
   * Further beyond, all dirtier tasks will enter a loop waiting (possibly long
   * time) for the dirty pages to drop, unless written enough pages.
   *
@@ -31,8 +22,6 @@
   */
  #define DIRTY_SCOPE            8
  #define DIRTY_FULL_SCOPE       (DIRTY_SCOPE / 2)
-#define DIRTY_MAXPAUSE_AREA            16
-#define DIRTY_PASSGOOD_AREA            8
  
  /*
   * 4MB minimal write chunk size
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h

index 342dcf13d039a8470ddcf4628b55c510c7fb13fe..a6326ef8ade6ccfbf19b615167f9863bc941d663 100644 (file)
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -288,6 +288,35 @@ enum p9_perm_t {
         P9_DMSETVTX = 0x00010000,
  };
  
+/* 9p2000.L open flags */
+#define P9_DOTL_RDONLY        00000000
+#define P9_DOTL_WRONLY        00000001
+#define P9_DOTL_RDWR          00000002
+#define P9_DOTL_NOACCESS      00000003
+#define P9_DOTL_CREATE        00000100
+#define P9_DOTL_EXCL          00000200
+#define P9_DOTL_NOCTTY        00000400
+#define P9_DOTL_TRUNC         00001000
+#define P9_DOTL_APPEND        00002000
+#define P9_DOTL_NONBLOCK      00004000
+#define P9_DOTL_DSYNC         00010000
+#define P9_DOTL_FASYNC        00020000
+#define P9_DOTL_DIRECT        00040000
+#define P9_DOTL_LARGEFILE     00100000
+#define P9_DOTL_DIRECTORY     00200000
+#define P9_DOTL_NOFOLLOW      00400000
+#define P9_DOTL_NOATIME       01000000
+#define P9_DOTL_CLOEXEC       02000000
+#define P9_DOTL_SYNC          04000000
+
+/* 9p2000.L at flags */
+#define P9_DOTL_AT_REMOVEDIR           0x200
+
+/* 9p2000.L lock type */
+#define P9_LOCK_TYPE_RDLCK 0
+#define P9_LOCK_TYPE_WRLCK 1
+#define P9_LOCK_TYPE_UNLCK 2
+
  /**
   * enum p9_qid_t - QID types
   * @P9_QTDIR: directory
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h

index d17f47fc9e31b9a3298b1911b5c46c9fb34d3400..401d73bd151f3a471d7be694531b0ae49837a00b 100644 (file)
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1744,6 +1744,8 @@ struct wiphy_wowlan_support {
   *     by default for perm_addr. In this case, the mask should be set to
   *     all-zeroes. In this case it is assumed that the device can handle
   *     the same number of arbitrary MAC addresses.
+ * @registered: protects ->resume and ->suspend sysfs callbacks against
+ *     unregister hardware
   * @debugfsdir: debugfs directory used for this wiphy, will be renamed
   *     automatically on wiphy renames
   * @dev: (virtual) struct device for this wiphy
@@ -1865,6 +1867,9 @@ struct wiphy {
          * you need use set_wiphy_dev() (see below) */
         struct device dev;
  
+       /* protects ->resume, ->suspend sysfs callbacks against unregister hw */
+       bool registered;
+
         /* dir in debugfs: ieee80211/<wiphyname> */
         struct dentry *debugfsdir;
  
diff --git a/include/net/flow.h b/include/net/flow.h

index 78113daadd631ef5d000f0b65eac968af738bf9a..a09447749e2d59a467c51cde514b35cef79c9b1b 100644 (file)
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -7,6 +7,7 @@
  #ifndef _NET_FLOW_H
  #define _NET_FLOW_H
  
+#include <linux/socket.h>
  #include <linux/in6.h>
  #include <linux/atomic.h>
  
@@ -68,7 +69,7 @@ struct flowi4 {
  #define fl4_ipsec_spi          uli.spi
  #define fl4_mh_type            uli.mht.type
  #define fl4_gre_key            uli.gre_key
-};
+} __attribute__((__aligned__(BITS_PER_LONG/8)));
  
  static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
                                       __u32 mark, __u8 tos, __u8 scope,
@@ -112,7 +113,7 @@ struct flowi6 {
  #define fl6_ipsec_spi          uli.spi
  #define fl6_mh_type            uli.mht.type
  #define fl6_gre_key            uli.gre_key
-};
+} __attribute__((__aligned__(BITS_PER_LONG/8)));
  
  struct flowidn {
         struct flowi_common     __fl_common;
@@ -127,7 +128,7 @@ struct flowidn {
         union flowi_uli         uli;
  #define fld_sport              uli.ports.sport
  #define fld_dport              uli.ports.dport
-};
+} __attribute__((__aligned__(BITS_PER_LONG/8)));
  
  struct flowi {
         union {
@@ -161,6 +162,24 @@ static inline struct flowi *flowidn_to_flowi(struct flowidn *fldn)
         return container_of(fldn, struct flowi, u.dn);
  }
  
+typedef unsigned long flow_compare_t;
+
+static inline size_t flow_key_size(u16 family)
+{
+       switch (family) {
+       case AF_INET:
+               BUILD_BUG_ON(sizeof(struct flowi4) % sizeof(flow_compare_t));
+               return sizeof(struct flowi4) / sizeof(flow_compare_t);
+       case AF_INET6:
+               BUILD_BUG_ON(sizeof(struct flowi6) % sizeof(flow_compare_t));
+               return sizeof(struct flowi6) / sizeof(flow_compare_t);
+       case AF_DECnet:
+               BUILD_BUG_ON(sizeof(struct flowidn) % sizeof(flow_compare_t));
+               return sizeof(struct flowidn) / sizeof(flow_compare_t);
+       }
+       return 0;
+}
+
  #define FLOW_DIR_IN    0
  #define FLOW_DIR_OUT   1
  #define FLOW_DIR_FWD   2
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h

index caaff5f5f39f503a750ad16ea942fd12adb761ed..b897d6e6d0a5a35f6a19b5403bbd7b5cefa47a39 100644 (file)
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -238,7 +238,7 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
  {
         __u8 flags = 0;
  
-       if (inet_sk(sk)->transparent)
+       if (inet_sk(sk)->transparent || inet_sk(sk)->hdrincl)
                 flags |= FLOWI_FLAG_ANYSRC;
         if (sk->sk_protocol == IPPROTO_TCP)
                 flags |= FLOWI_FLAG_PRECOW_METRICS;
diff --git a/include/net/request_sock.h b/include/net/request_sock.h

index 99e6e19b57c2188ed0a592789e68e00c2d0a55f8..4c0766e201e39b7650773e6ba8a6329d997269b4 100644 (file)
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -96,7 +96,8 @@ extern int sysctl_max_syn_backlog;
   */
  struct listen_sock {
         u8                      max_qlen_log;
-       /* 3 bytes hole, try to use */
+       u8                      synflood_warned;
+       /* 2 bytes hole, try to use */
         int                     qlen;
         int                     qlen_young;
         int                     clock_hand;
diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h

index 6506458ccd33bbc3df02f76f8ae661c2fafd18d9..712b3bebeda78dbb2d3dbb2387fd15f3dd6b82ac 100644 (file)
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -109,6 +109,7 @@ typedef enum {
         SCTP_CMD_SEND_MSG,       /* Send the whole use message */
         SCTP_CMD_SEND_NEXT_ASCONF, /* Send the next ASCONF after ACK */
         SCTP_CMD_PURGE_ASCONF_QUEUE, /* Purge all asconf queues.*/
+       SCTP_CMD_SET_ASOC,       /* Restore association context */
         SCTP_CMD_LAST
  } sctp_verb_t;
  
diff --git a/include/net/tcp.h b/include/net/tcp.h

index 149a415d1e0a1071dfc555e941489efb76071138..acc620a4a45f318f745c7164363c6fdf787df144 100644 (file)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -431,17 +431,34 @@ extern int tcp_disconnect(struct sock *sk, int flags);
  extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
  extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, 
                                     struct ip_options *opt);
+#ifdef CONFIG_SYN_COOKIES
  extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, 
                                      __u16 *mss);
+#else
+static inline __u32 cookie_v4_init_sequence(struct sock *sk,
+                                           struct sk_buff *skb,
+                                           __u16 *mss)
+{
+       return 0;
+}
+#endif
  
  extern __u32 cookie_init_timestamp(struct request_sock *req);
  extern bool cookie_check_timestamp(struct tcp_options_received *opt, bool *);
  
  /* From net/ipv6/syncookies.c */
  extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
+#ifdef CONFIG_SYN_COOKIES
  extern __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb,
                                      __u16 *mss);
-
+#else
+static inline __u32 cookie_v6_init_sequence(struct sock *sk,
+                                           struct sk_buff *skb,
+                                           __u16 *mss)
+{
+       return 0;
+}
+#endif
  /* tcp_output.c */
  
  extern void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
@@ -460,6 +477,9 @@ extern int tcp_write_wakeup(struct sock *);
  extern void tcp_send_fin(struct sock *sk);
  extern void tcp_send_active_reset(struct sock *sk, gfp_t priority);
  extern int tcp_send_synack(struct sock *);
+extern int tcp_syn_flood_action(struct sock *sk,
+                               const struct sk_buff *skb,
+                               const char *proto);
  extern void tcp_push_one(struct sock *, unsigned int mss_now);
  extern void tcp_send_ack(struct sock *sk);
  extern void tcp_send_delayed_ack(struct sock *sk);
diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h

index 5271a741c3a3f6c54b0bc7ec2cf78d53f8578eaa..498433dd067dd64eb6c728e566e61585093e960f 100644 (file)
--- a/include/net/transp_v6.h
+++ b/include/net/transp_v6.h
@@ -39,6 +39,7 @@ extern int                    datagram_recv_ctl(struct sock *sk,
                                                   struct sk_buff *skb);
  
  extern int                     datagram_send_ctl(struct net *net,
+                                                 struct sock *sk,
                                                   struct msghdr *msg,
                                                   struct flowi6 *fl6,
                                                   struct ipv6_txoptions *opt,
diff --git a/include/target/target_core_fabric_ops.h b/include/target/target_core_fabric_ops.h

index 2de8fe9075963350f8159759c118dedc70db1b57..126c675f4f14e5b41afd30694615b84d5eedbcbc 100644 (file)
--- a/include/target/target_core_fabric_ops.h
+++ b/include/target/target_core_fabric_ops.h
@@ -27,6 +27,12 @@ struct target_core_fabric_ops {
         int (*tpg_check_demo_mode_cache)(struct se_portal_group *);
         int (*tpg_check_demo_mode_write_protect)(struct se_portal_group *);
         int (*tpg_check_prod_mode_write_protect)(struct se_portal_group *);
+       /*
+        * Optionally used by fabrics to allow demo-mode login, but not
+        * expose any TPG LUNs, and return 'not connected' in standard
+        * inquiry response
+        */
+       int (*tpg_check_demo_mode_login_only)(struct se_portal_group *);
         struct se_node_acl *(*tpg_alloc_fabric_acl)(
                                         struct se_portal_group *);
         void (*tpg_release_fabric_acl)(struct se_portal_group *,
diff --git a/include/trace/events/block.h b/include/trace/events/block.h

index bf366547da252077cf168522a1ff22735a10d3a5..05c5e61f0a7ca23b03a6a965a375323ea2b37036 100644 (file)
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -8,6 +8,8 @@
  #include <linux/blkdev.h>
  #include <linux/tracepoint.h>
  
+#define RWBS_LEN       8
+
  DECLARE_EVENT_CLASS(block_rq_with_error,
  
         TP_PROTO(struct request_queue *q, struct request *rq),
@@ -19,7 +21,7 @@ DECLARE_EVENT_CLASS(block_rq_with_error,
                 __field(  sector_t,     sector                  )
                 __field(  unsigned int, nr_sector               )
                 __field(  int,          errors                  )
-               __array(  char,         rwbs,   6               )
+               __array(  char,         rwbs,   RWBS_LEN        )
                 __dynamic_array( char,  cmd,    blk_cmd_buf_len(rq)     )
         ),
  
@@ -104,7 +106,7 @@ DECLARE_EVENT_CLASS(block_rq,
                 __field(  sector_t,     sector                  )
                 __field(  unsigned int, nr_sector               )
                 __field(  unsigned int, bytes                   )
-               __array(  char,         rwbs,   6               )
+               __array(  char,         rwbs,   RWBS_LEN        )
                 __array(  char,         comm,   TASK_COMM_LEN   )
                 __dynamic_array( char,  cmd,    blk_cmd_buf_len(rq)     )
         ),
@@ -183,7 +185,7 @@ TRACE_EVENT(block_bio_bounce,
                 __field( dev_t,         dev                     )
                 __field( sector_t,      sector                  )
                 __field( unsigned int,  nr_sector               )
-               __array( char,          rwbs,   6               )
+               __array( char,          rwbs,   RWBS_LEN        )
                 __array( char,          comm,   TASK_COMM_LEN   )
         ),
  
@@ -222,7 +224,7 @@ TRACE_EVENT(block_bio_complete,
                 __field( sector_t,      sector          )
                 __field( unsigned,      nr_sector       )
                 __field( int,           error           )
-               __array( char,          rwbs,   6       )
+               __array( char,          rwbs,   RWBS_LEN)
         ),
  
         TP_fast_assign(
@@ -249,7 +251,7 @@ DECLARE_EVENT_CLASS(block_bio,
                 __field( dev_t,         dev                     )
                 __field( sector_t,      sector                  )
                 __field( unsigned int,  nr_sector               )
-               __array( char,          rwbs,   6               )
+               __array( char,          rwbs,   RWBS_LEN        )
                 __array( char,          comm,   TASK_COMM_LEN   )
         ),
  
@@ -321,7 +323,7 @@ DECLARE_EVENT_CLASS(block_get_rq,
                 __field( dev_t,         dev                     )
                 __field( sector_t,      sector                  )
                 __field( unsigned int,  nr_sector               )
-               __array( char,          rwbs,   6               )
+               __array( char,          rwbs,   RWBS_LEN        )
                 __array( char,          comm,   TASK_COMM_LEN   )
          ),
  
@@ -456,7 +458,7 @@ TRACE_EVENT(block_split,
                 __field( dev_t,         dev                             )
                 __field( sector_t,      sector                          )
                 __field( sector_t,      new_sector                      )
-               __array( char,          rwbs,           6               )
+               __array( char,          rwbs,           RWBS_LEN        )
                 __array( char,          comm,           TASK_COMM_LEN   )
         ),
  
@@ -498,7 +500,7 @@ TRACE_EVENT(block_bio_remap,
                 __field( unsigned int,  nr_sector       )
                 __field( dev_t,         old_dev         )
                 __field( sector_t,      old_sector      )
-               __array( char,          rwbs,   6       )
+               __array( char,          rwbs,   RWBS_LEN)
         ),
  
         TP_fast_assign(
@@ -542,7 +544,7 @@ TRACE_EVENT(block_rq_remap,
                 __field( unsigned int,  nr_sector       )
                 __field( dev_t,         old_dev         )
                 __field( sector_t,      old_sector      )
-               __array( char,          rwbs,   6       )
+               __array( char,          rwbs,   RWBS_LEN)
         ),
  
         TP_fast_assign(
diff --git a/kernel/Makefile b/kernel/Makefile

index d06467fc8f7c9669ad11d8930fc413adb4962c4f..eca595e2fd523e9dac8ac582b5fa2f4400887afa 100644 (file)
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,7 +10,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
             kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
             hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
             notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
-           async.o range.o jump_label.o
+           async.o range.o
  obj-y += groups.o
  
  ifdef CONFIG_FUNCTION_TRACER
@@ -107,6 +107,7 @@ obj-$(CONFIG_PERF_EVENTS) += events/
  obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
  obj-$(CONFIG_PADATA) += padata.o
  obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
+obj-$(CONFIG_JUMP_LABEL) += jump_label.o
  
  ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
  # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/cred.c b/kernel/cred.c

index 174fa84eca303ced39cc03910114d495f7b777b0..8ef31f53c44c9aa8e74d3b15649fdf7d246a2a36 100644 (file)
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -508,10 +508,8 @@ int commit_creds(struct cred *new)
                 key_fsgid_changed(task);
  
         /* do it
-        * - What if a process setreuid()'s and this brings the
-        *   new uid over his NPROC rlimit?  We can check this now
-        *   cheaply with the new uid cache, so if it matters
-        *   we should be checking for it.  -DaveM
+        * RLIMIT_NPROC limits on user->processes have already been checked
+        * in set_user().
          */
         alter_cred_subscribers(new, 2);
         if (new->user != old->user)
diff --git a/kernel/events/core.c b/kernel/events/core.c

index b8785e26ee1cd28c33a1c0429a49bb515c34c8d2..0f857782d06f45eace0bc2bedb7bb1ccf3bc3512 100644 (file)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -399,14 +399,54 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
         local_irq_restore(flags);
  }
  
-static inline void perf_cgroup_sched_out(struct task_struct *task)
+static inline void perf_cgroup_sched_out(struct task_struct *task,
+                                        struct task_struct *next)
  {
-       perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
+       struct perf_cgroup *cgrp1;
+       struct perf_cgroup *cgrp2 = NULL;
+
+       /*
+        * we come here when we know perf_cgroup_events > 0
+        */
+       cgrp1 = perf_cgroup_from_task(task);
+
+       /*
+        * next is NULL when called from perf_event_enable_on_exec()
+        * that will systematically cause a cgroup_switch()
+        */
+       if (next)
+               cgrp2 = perf_cgroup_from_task(next);
+
+       /*
+        * only schedule out current cgroup events if we know
+        * that we are switching to a different cgroup. Otherwise,
+        * do no touch the cgroup events.
+        */
+       if (cgrp1 != cgrp2)
+               perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
  }
  
-static inline void perf_cgroup_sched_in(struct task_struct *task)
+static inline void perf_cgroup_sched_in(struct task_struct *prev,
+                                       struct task_struct *task)
  {
-       perf_cgroup_switch(task, PERF_CGROUP_SWIN);
+       struct perf_cgroup *cgrp1;
+       struct perf_cgroup *cgrp2 = NULL;
+
+       /*
+        * we come here when we know perf_cgroup_events > 0
+        */
+       cgrp1 = perf_cgroup_from_task(task);
+
+       /* prev can never be NULL */
+       cgrp2 = perf_cgroup_from_task(prev);
+
+       /*
+        * only need to schedule in cgroup events if we are changing
+        * cgroup during ctxsw. Cgroup events were not scheduled
+        * out of ctxsw out if that was not the case.
+        */
+       if (cgrp1 != cgrp2)
+               perf_cgroup_switch(task, PERF_CGROUP_SWIN);
  }
  
  static inline int perf_cgroup_connect(int fd, struct perf_event *event,
@@ -518,11 +558,13 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
  {
  }
  
-static inline void perf_cgroup_sched_out(struct task_struct *task)
+static inline void perf_cgroup_sched_out(struct task_struct *task,
+                                        struct task_struct *next)
  {
  }
  
-static inline void perf_cgroup_sched_in(struct task_struct *task)
+static inline void perf_cgroup_sched_in(struct task_struct *prev,
+                                       struct task_struct *task)
  {
  }
  
@@ -1988,7 +2030,7 @@ void __perf_event_task_sched_out(struct task_struct *task,
          * cgroup event are system-wide mode only
          */
         if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
-               perf_cgroup_sched_out(task);
+               perf_cgroup_sched_out(task, next);
  }
  
  static void task_ctx_sched_out(struct perf_event_context *ctx)
@@ -2153,7 +2195,8 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
   * accessing the event control register. If a NMI hits, then it will
   * keep the event running.
   */
-void __perf_event_task_sched_in(struct task_struct *task)
+void __perf_event_task_sched_in(struct task_struct *prev,
+                               struct task_struct *task)
  {
         struct perf_event_context *ctx;
         int ctxn;
@@ -2171,7 +2214,7 @@ void __perf_event_task_sched_in(struct task_struct *task)
          * cgroup event are system-wide mode only
          */
         if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
-               perf_cgroup_sched_in(task);
+               perf_cgroup_sched_in(prev, task);
  }
  
  static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
@@ -2427,7 +2470,7 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
          * ctxswin cgroup events which are already scheduled
          * in.
          */
-       perf_cgroup_sched_out(current);
+       perf_cgroup_sched_out(current, NULL);
  
         raw_spin_lock(&ctx->lock);
         task_ctx_sched_out(ctx);
@@ -3353,8 +3396,8 @@ static int perf_event_index(struct perf_event *event)
  }
  
  static void calc_timer_values(struct perf_event *event,
-                               u64 *running,
-                               u64 *enabled)
+                               u64 *enabled,
+                               u64 *running)
  {
         u64 now, ctx_time;
  
diff --git a/kernel/fork.c b/kernel/fork.c

index e7ceaca896094eeff3da742b8442f54e0514ac2d..8e6b6f4fb272ba498a4acdbcb6a29c8f7e6c87dd 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1111,6 +1111,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                     p->real_cred->user != INIT_USER)
                         goto bad_fork_free;
         }
+       current->flags &= ~PF_NPROC_EXCEEDED;
  
         retval = copy_creds(p, clone_flags);
         if (retval < 0)
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c

index d5a3009da71aed6dd5f3a82895f2f21cecd656b6..dc5114b4c16cc6cd656290cdb03596a2ffd3f3b8 100644 (file)
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -178,7 +178,7 @@ void irq_shutdown(struct irq_desc *desc)
         desc->depth = 1;
         if (desc->irq_data.chip->irq_shutdown)
                 desc->irq_data.chip->irq_shutdown(&desc->irq_data);
-       if (desc->irq_data.chip->irq_disable)
+       else if (desc->irq_data.chip->irq_disable)
                 desc->irq_data.chip->irq_disable(&desc->irq_data);
         else
                 desc->irq_data.chip->irq_mask(&desc->irq_data);
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c

index 3a2cab407b93fd77024c2dff4165034b04ca832f..e38544dddb18a143e2a63f2827a56e45f8b1aa53 100644 (file)
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -246,7 +246,7 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
                 gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask);
  
         for (i = gc->irq_base; msk; msk >>= 1, i++) {
-               if (!msk & 0x01)
+               if (!(msk & 0x01))
                         continue;
  
                 if (flags & IRQ_GC_INIT_NESTED_LOCK)
@@ -301,7 +301,7 @@ void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk,
         raw_spin_unlock(&gc_lock);
  
         for (; msk; msk >>= 1, i++) {
-               if (!msk & 0x01)
+               if (!(msk & 0x01))
                         continue;
  
                 /* Remove handler first. That will mask the irq line */
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c

index 4c60a50e66b237922381c5b0d0d82d55b436b6dc..039b889ea053abf0ac72cd77809490d202ad0121 100644 (file)
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -70,7 +70,8 @@ static inline void desc_smp_init(struct irq_desc *desc, int node) { }
  static inline int desc_node(struct irq_desc *desc) { return 0; }
  #endif
  
-static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
+static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
+               struct module *owner)
  {
         int cpu;
  
@@ -86,6 +87,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
         desc->irq_count = 0;
         desc->irqs_unhandled = 0;
         desc->name = NULL;
+       desc->owner = owner;
         for_each_possible_cpu(cpu)
                 *per_cpu_ptr(desc->kstat_irqs, cpu) = 0;
         desc_smp_init(desc, node);
@@ -128,7 +130,7 @@ static void free_masks(struct irq_desc *desc)
  static inline void free_masks(struct irq_desc *desc) { }
  #endif
  
-static struct irq_desc *alloc_desc(int irq, int node)
+static struct irq_desc *alloc_desc(int irq, int node, struct module *owner)
  {
         struct irq_desc *desc;
         gfp_t gfp = GFP_KERNEL;
@@ -147,7 +149,7 @@ static struct irq_desc *alloc_desc(int irq, int node)
         raw_spin_lock_init(&desc->lock);
         lockdep_set_class(&desc->lock, &irq_desc_lock_class);
  
-       desc_set_defaults(irq, desc, node);
+       desc_set_defaults(irq, desc, node, owner);
  
         return desc;
  
@@ -173,13 +175,14 @@ static void free_desc(unsigned int irq)
         kfree(desc);
  }
  
-static int alloc_descs(unsigned int start, unsigned int cnt, int node)
+static int alloc_descs(unsigned int start, unsigned int cnt, int node,
+                      struct module *owner)
  {
         struct irq_desc *desc;
         int i;
  
         for (i = 0; i < cnt; i++) {
-               desc = alloc_desc(start + i, node);
+               desc = alloc_desc(start + i, node, owner);
                 if (!desc)
                         goto err;
                 mutex_lock(&sparse_irq_lock);
@@ -227,7 +230,7 @@ int __init early_irq_init(void)
                 nr_irqs = initcnt;
  
         for (i = 0; i < initcnt; i++) {
-               desc = alloc_desc(i, node);
+               desc = alloc_desc(i, node, NULL);
                 set_bit(i, allocated_irqs);
                 irq_insert_desc(i, desc);
         }
@@ -261,7 +264,7 @@ int __init early_irq_init(void)
                 alloc_masks(&desc[i], GFP_KERNEL, node);
                 raw_spin_lock_init(&desc[i].lock);
                 lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
-               desc_set_defaults(i, &desc[i], node);
+               desc_set_defaults(i, &desc[i], node, NULL);
         }
         return arch_early_irq_init();
  }
@@ -276,8 +279,16 @@ static void free_desc(unsigned int irq)
         dynamic_irq_cleanup(irq);
  }
  
-static inline int alloc_descs(unsigned int start, unsigned int cnt, int node)
+static inline int alloc_descs(unsigned int start, unsigned int cnt, int node,
+                             struct module *owner)
  {
+       u32 i;
+
+       for (i = 0; i < cnt; i++) {
+               struct irq_desc *desc = irq_to_desc(start + i);
+
+               desc->owner = owner;
+       }
         return start;
  }
  
@@ -333,11 +344,13 @@ EXPORT_SYMBOL_GPL(irq_free_descs);
   * @from:      Start the search from this irq number
   * @cnt:       Number of consecutive irqs to allocate.
   * @node:      Preferred node on which the irq descriptor should be allocated
+ * @owner:     Owning module (can be NULL)
   *
   * Returns the first irq number or error code
   */
  int __ref
-irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node)
+__irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
+                 struct module *owner)
  {
         int start, ret;
  
@@ -366,13 +379,13 @@ irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node)
  
         bitmap_set(allocated_irqs, start, cnt);
         mutex_unlock(&sparse_irq_lock);
-       return alloc_descs(start, cnt, node);
+       return alloc_descs(start, cnt, node, owner);
  
  err:
         mutex_unlock(&sparse_irq_lock);
         return ret;
  }
-EXPORT_SYMBOL_GPL(irq_alloc_descs);
+EXPORT_SYMBOL_GPL(__irq_alloc_descs);
  
  /**
   * irq_reserve_irqs - mark irqs allocated
@@ -440,7 +453,7 @@ void dynamic_irq_cleanup(unsigned int irq)
         unsigned long flags;
  
         raw_spin_lock_irqsave(&desc->lock, flags);
-       desc_set_defaults(irq, desc, desc_node(desc));
+       desc_set_defaults(irq, desc, desc_node(desc), NULL);
         raw_spin_unlock_irqrestore(&desc->lock, flags);
  }
  
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c

index 0a7840aeb0fb9efbc18a6e6e8e6f01de17ed91cd..9b956fa20308032c33c6b9550faacf9718b9fc6d 100644 (file)
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -883,6 +883,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
  
         if (desc->irq_data.chip == &no_irq_chip)
                 return -ENOSYS;
+       if (!try_module_get(desc->owner))
+               return -ENODEV;
         /*
          * Some drivers like serial.c use request_irq() heavily,
          * so we have to be careful not to interfere with a
@@ -906,8 +908,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
          */
         nested = irq_settings_is_nested_thread(desc);
         if (nested) {
-               if (!new->thread_fn)
-                       return -EINVAL;
+               if (!new->thread_fn) {
+                       ret = -EINVAL;
+                       goto out_mput;
+               }
                 /*
                  * Replace the primary handler which was provided from
                  * the driver for non nested interrupt handling by the
@@ -929,8 +933,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
  
                 t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
                                    new->name);
-               if (IS_ERR(t))
-                       return PTR_ERR(t);
+               if (IS_ERR(t)) {
+                       ret = PTR_ERR(t);
+                       goto out_mput;
+               }
                 /*
                  * We keep the reference to the task struct even if
                  * the thread dies to avoid that the interrupt code
@@ -1095,6 +1101,8 @@ out_thread:
                         kthread_stop(t);
                 put_task_struct(t);
         }
+out_mput:
+       module_put(desc->owner);
         return ret;
  }
  
@@ -1203,6 +1211,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
                 put_task_struct(action->thread);
         }
  
+       module_put(desc->owner);
         return action;
  }
  
diff --git a/kernel/lockdep.c b/kernel/lockdep.c

index 8c24294e477fe6578cb16ae8c3e270b0633cc6c7..91d67ce3a8d520a5cdc43d7abe6534c59710e79d 100644 (file)
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3111,7 +3111,13 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock)
                 if (!class)
                         class = look_up_lock_class(lock, 0);
  
-               if (DEBUG_LOCKS_WARN_ON(!class))
+               /*
+                * If look_up_lock_class() failed to find a class, we're trying
+                * to test if we hold a lock that has never yet been acquired.
+                * Clearly if the lock hasn't been acquired _ever_, we're not
+                * holding it either, so report failure.
+                */
+               if (!class)
                         return 0;
  
                 if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock))
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig

index b1914cb9095c98bfc9c95705c15e0c3e015820a4..3744c594b19b18b779b8d1f8660788a1e0aeacbf 100644 (file)
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -231,3 +231,7 @@ config PM_CLK
  config PM_GENERIC_DOMAINS
         bool
         depends on PM
+
+config PM_GENERIC_DOMAINS_RUNTIME
+       def_bool y
+       depends on PM_RUNTIME && PM_GENERIC_DOMAINS
diff --git a/kernel/printk.c b/kernel/printk.c

index 37dff3429adbbd4d63681e9978a7bbec1f349ec0..28a40d8171b8e67488efc6f383736d5f7532c5ff 100644 (file)
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -318,8 +318,10 @@ static int check_syslog_permissions(int type, bool from_file)
                         return 0;
                 /* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */
                 if (capable(CAP_SYS_ADMIN)) {
-                       WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN "
-                                "but no CAP_SYSLOG (deprecated).\n");
+                       printk_once(KERN_WARNING "%s (%d): "
+                                "Attempt to access syslog with CAP_SYS_ADMIN "
+                                "but no CAP_SYSLOG (deprecated).\n",
+                                current->comm, task_pid_nr(current));
                         return 0;
                 }
                 return -EPERM;
@@ -1602,7 +1604,7 @@ static int __init printk_late_init(void)
         struct console *con;
  
         for_each_console(con) {
-               if (con->flags & CON_BOOT) {
+               if (!keep_bootcon && con->flags & CON_BOOT) {
                         printk(KERN_INFO "turn off boot console %s%d\n",
                                 con->name, con->index);
                         unregister_console(con);
diff --git a/kernel/sched.c b/kernel/sched.c

index ccacdbdecf452bda8769878ca6e558d13ebb4e74..ec5f472bc5b9cec2a5c43ab7f8a054ecca75b39a 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3065,7 +3065,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
  #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
         local_irq_disable();
  #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
-       perf_event_task_sched_in(current);
+       perf_event_task_sched_in(prev, current);
  #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
         local_irq_enable();
  #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
@@ -4279,9 +4279,9 @@ pick_next_task(struct rq *rq)
  }
  
  /*
- * schedule() is the main scheduler function.
+ * __schedule() is the main scheduler function.
   */
-asmlinkage void __sched schedule(void)
+static void __sched __schedule(void)
  {
         struct task_struct *prev, *next;
         unsigned long *switch_count;
@@ -4322,16 +4322,6 @@ need_resched:
                                 if (to_wakeup)
                                         try_to_wake_up_local(to_wakeup);
                         }
-
-                       /*
-                        * If we are going to sleep and we have plugged IO
-                        * queued, make sure to submit it to avoid deadlocks.
-                        */
-                       if (blk_needs_flush_plug(prev)) {
-                               raw_spin_unlock(&rq->lock);
-                               blk_schedule_flush_plug(prev);
-                               raw_spin_lock(&rq->lock);
-                       }
                 }
                 switch_count = &prev->nvcsw;
         }
@@ -4369,6 +4359,26 @@ need_resched:
         if (need_resched())
                 goto need_resched;
  }
+
+static inline void sched_submit_work(struct task_struct *tsk)
+{
+       if (!tsk->state)
+               return;
+       /*
+        * If we are going to sleep and we have plugged IO queued,
+        * make sure to submit it to avoid deadlocks.
+        */
+       if (blk_needs_flush_plug(tsk))
+               blk_schedule_flush_plug(tsk);
+}
+
+asmlinkage void schedule(void)
+{
+       struct task_struct *tsk = current;
+
+       sched_submit_work(tsk);
+       __schedule();
+}
  EXPORT_SYMBOL(schedule);
  
  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
@@ -4435,7 +4445,7 @@ asmlinkage void __sched notrace preempt_schedule(void)
  
         do {
                 add_preempt_count_notrace(PREEMPT_ACTIVE);
-               schedule();
+               __schedule();
                 sub_preempt_count_notrace(PREEMPT_ACTIVE);
  
                 /*
@@ -4463,7 +4473,7 @@ asmlinkage void __sched preempt_schedule_irq(void)
         do {
                 add_preempt_count(PREEMPT_ACTIVE);
                 local_irq_enable();
-               schedule();
+               __schedule();
                 local_irq_disable();
                 sub_preempt_count(PREEMPT_ACTIVE);
  
@@ -5588,7 +5598,7 @@ static inline int should_resched(void)
  static void __cond_resched(void)
  {
         add_preempt_count(PREEMPT_ACTIVE);
-       schedule();
+       __schedule();
         sub_preempt_count(PREEMPT_ACTIVE);
  }
  
@@ -7443,6 +7453,7 @@ static void __sdt_free(const struct cpumask *cpu_map)
                         struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j);
                         if (sd && (sd->flags & SD_OVERLAP))
                                 free_sched_groups(sd->groups, 0);
+                       kfree(*per_cpu_ptr(sdd->sd, j));
                         kfree(*per_cpu_ptr(sdd->sg, j));
                         kfree(*per_cpu_ptr(sdd->sgp, j));
                 }
diff --git a/kernel/sys.c b/kernel/sys.c

index a101ba36c4441f5eca898b1da0d2ad9b5b2f51a5..18ee1d2f647408a6ffef7494ffdfef2043749dcf 100644 (file)
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -37,6 +37,8 @@
  #include <linux/fs_struct.h>
  #include <linux/gfp.h>
  #include <linux/syscore_ops.h>
+#include <linux/version.h>
+#include <linux/ctype.h>
  
  #include <linux/compat.h>
  #include <linux/syscalls.h>
@@ -44,6 +46,8 @@
  #include <linux/user_namespace.h>
  
  #include <linux/kmsg_dump.h>
+/* Move somewhere else to avoid recompiling? */
+#include <generated/utsrelease.h>
  
  #include <asm/uaccess.h>
  #include <asm/io.h>
@@ -621,11 +625,18 @@ static int set_user(struct cred *new)
         if (!new_user)
                 return -EAGAIN;
  
+       /*
+        * We don't fail in case of NPROC limit excess here because too many
+        * poorly written programs don't check set*uid() return code, assuming
+        * it never fails if called by root.  We may still enforce NPROC limit
+        * for programs doing set*uid()+execve() by harmlessly deferring the
+        * failure to the execve() stage.
+        */
         if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) &&
-                       new_user != INIT_USER) {
-               free_uid(new_user);
-               return -EAGAIN;
-       }
+                       new_user != INIT_USER)
+               current->flags |= PF_NPROC_EXCEEDED;
+       else
+               current->flags &= ~PF_NPROC_EXCEEDED;
  
         free_uid(new->user);
         new->user = new_user;
@@ -1154,6 +1165,34 @@ DECLARE_RWSEM(uts_sem);
  #define override_architecture(name)    0
  #endif
  
+/*
+ * Work around broken programs that cannot handle "Linux 3.0".
+ * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
+ */
+static int override_release(char __user *release, int len)
+{
+       int ret = 0;
+       char buf[len];
+
+       if (current->personality & UNAME26) {
+               char *rest = UTS_RELEASE;
+               int ndots = 0;
+               unsigned v;
+
+               while (*rest) {
+                       if (*rest == '.' && ++ndots >= 3)
+                               break;
+                       if (!isdigit(*rest) && *rest != '.')
+                               break;
+                       rest++;
+               }
+               v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40;
+               snprintf(buf, len, "2.6.%u%s", v, rest);
+               ret = copy_to_user(release, buf, len);
+       }
+       return ret;
+}
+
  SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
  {
         int errno = 0;
@@ -1163,6 +1202,8 @@ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
                 errno = -EFAULT;
         up_read(&uts_sem);
  
+       if (!errno && override_release(name->release, sizeof(name->release)))
+               errno = -EFAULT;
         if (!errno && override_architecture(name))
                 errno = -EFAULT;
         return errno;
@@ -1184,6 +1225,8 @@ SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
                 error = -EFAULT;
         up_read(&uts_sem);
  
+       if (!error && override_release(name->release, sizeof(name->release)))
+               error = -EFAULT;
         if (!error && override_architecture(name))
                 error = -EFAULT;
         return error;
@@ -1218,6 +1261,8 @@ SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
  
         if (!error && override_architecture(name))
                 error = -EFAULT;
+       if (!error && override_release(name->release, sizeof(name->release)))
+               error = -EFAULT;
         return error ? -EFAULT : 0;
  }
  #endif
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c

index 62cbc8877fef2564d2d7284d44959d08e64b08ce..a9a5de07c4f16e1310616cb9dd51903f710199ff 100644 (file)
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -16,7 +16,6 @@ asmlinkage long sys_ni_syscall(void)
         return -ENOSYS;
  }
  
-cond_syscall(sys_nfsservctl);
  cond_syscall(sys_quotactl);
  cond_syscall(sys32_quotactl);
  cond_syscall(sys_acct);
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c

index 3b8e028b96014a088b6227859b9163e8bceddb5a..e8bffbe2ba4b3c078594f34f4594769dfc18f2c3 100644 (file)
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1,6 +1,6 @@
  #include <linux/stat.h>
  #include <linux/sysctl.h>
-#include "../fs/xfs/linux-2.6/xfs_sysctl.h"
+#include "../fs/xfs/xfs_sysctl.h"
  #include <linux/sunrpc/debug.h>
  #include <linux/string.h>
  #include <net/ip_vs.h>
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c

index 4e4932a7b3608ac6e32175ee87481c1ac47a3572..362da653813da60e339d57c9d28262b0dc7c36cd 100644 (file)
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -1,6 +1,6 @@
  #include <linux/stat.h>
  #include <linux/sysctl.h>
-#include "../fs/xfs/linux-2.6/xfs_sysctl.h"
+#include "../fs/xfs/xfs_sysctl.h"
  #include <linux/sunrpc/debug.h>
  #include <linux/string.h>
  #include <net/ip_vs.h>
diff --git a/kernel/taskstats.c b/kernel/taskstats.c

index e19ce1454ee1d6d2e347e8c287abf2fe68fcc9e1..e66046456f4ffebab2ec0300e0537a1fb1911e56 100644 (file)
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -655,6 +655,7 @@ static struct genl_ops taskstats_ops = {
         .cmd            = TASKSTATS_CMD_GET,
         .doit           = taskstats_user_cmd,
         .policy         = taskstats_cmd_get_policy,
+       .flags          = GENL_ADMIN_PERM,
  };
  
  static struct genl_ops cgroupstats_ops = {
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c

index 59f369f98a04311f5bfa49d01e706d4b12ca97c4..ea5e1a928d5b08c04321ab1b39a87c486118b1d2 100644 (file)
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -441,6 +441,8 @@ static int alarm_timer_create(struct k_itimer *new_timer)
  static void alarm_timer_get(struct k_itimer *timr,
                                 struct itimerspec *cur_setting)
  {
+       memset(cur_setting, 0, sizeof(struct itimerspec));
+
         cur_setting->it_interval =
                         ktime_to_timespec(timr->it.alarmtimer.period);
         cur_setting->it_value =
@@ -479,11 +481,17 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
         if (!rtcdev)
                 return -ENOTSUPP;
  
-       /* Save old values */
-       old_setting->it_interval =
-                       ktime_to_timespec(timr->it.alarmtimer.period);
-       old_setting->it_value =
-                       ktime_to_timespec(timr->it.alarmtimer.node.expires);
+       /*
+        * XXX HACK! Currently we can DOS a system if the interval
+        * period on alarmtimers is too small. Cap the interval here
+        * to 100us and solve this properly in a future patch! -jstultz
+        */
+       if ((new_setting->it_interval.tv_sec == 0) &&
+                       (new_setting->it_interval.tv_nsec < 100000))
+               new_setting->it_interval.tv_nsec = 100000;
+
+       if (old_setting)
+               alarm_timer_get(timr, old_setting);
  
         /* If the timer was already set, cancel it */
         alarm_cancel(&timr->it.alarmtimer);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig

index 2ad39e556cb4a408ca09ba408e8a1ecf85fd756b..cd3134510f3d0b9d807a532d6d9ab271e98a40f1 100644 (file)
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -82,7 +82,7 @@ config EVENT_POWER_TRACING_DEPRECATED
           power:power_frequency
           This is for userspace compatibility
           and will vanish after 5 kernel iterations,
-         namely 2.6.41.
+         namely 3.1.
  
  config CONTEXT_SWITCH_TRACER
         bool
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c

index 6957aa298dfa45581b45832b7251a2ee4029000e..7c910a5593a6d850a4f43617faa27ea01c951b42 100644 (file)
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -206,6 +206,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
         what |= MASK_TC_BIT(rw, RAHEAD);
         what |= MASK_TC_BIT(rw, META);
         what |= MASK_TC_BIT(rw, DISCARD);
+       what |= MASK_TC_BIT(rw, FLUSH);
+       what |= MASK_TC_BIT(rw, FUA);
  
         pid = tsk->pid;
         if (act_log_check(bt, what, sector, pid))
@@ -1054,6 +1056,9 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
                 goto out;
         }
  
+       if (tc & BLK_TC_FLUSH)
+               rwbs[i++] = 'F';
+
         if (tc & BLK_TC_DISCARD)
                 rwbs[i++] = 'D';
         else if (tc & BLK_TC_WRITE)
@@ -1063,10 +1068,10 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
         else
                 rwbs[i++] = 'N';
  
+       if (tc & BLK_TC_FUA)
+               rwbs[i++] = 'F';
         if (tc & BLK_TC_AHEAD)
                 rwbs[i++] = 'A';
-       if (tc & BLK_TC_BARRIER)
-               rwbs[i++] = 'B';
         if (tc & BLK_TC_SYNC)
                 rwbs[i++] = 'S';
         if (tc & BLK_TC_META)
@@ -1132,7 +1137,7 @@ typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act);
  
  static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
  {
-       char rwbs[6];
+       char rwbs[RWBS_LEN];
         unsigned long long ts  = iter->ts;
         unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC);
         unsigned secs          = (unsigned long)ts;
@@ -1148,7 +1153,7 @@ static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
  
  static int blk_log_action(struct trace_iterator *iter, const char *act)
  {
-       char rwbs[6];
+       char rwbs[RWBS_LEN];
         const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
  
         fill_rwbs(rwbs, t);
@@ -1561,7 +1566,7 @@ static const struct {
  } mask_maps[] = {
         { BLK_TC_READ,          "read"          },
         { BLK_TC_WRITE,         "write"         },
-       { BLK_TC_BARRIER,       "barrier"       },
+       { BLK_TC_FLUSH,         "flush"         },
         { BLK_TC_SYNC,          "sync"          },
         { BLK_TC_QUEUE,         "queue"         },
         { BLK_TC_REQUEUE,       "requeue"       },
@@ -1573,6 +1578,7 @@ static const struct {
         { BLK_TC_META,          "meta"          },
         { BLK_TC_DISCARD,       "discard"       },
         { BLK_TC_DRV_DATA,      "drv_data"      },
+       { BLK_TC_FUA,           "fua"           },
  };
  
  static int blk_trace_str2mask(const char *str)
@@ -1788,6 +1794,9 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
  {
         int i = 0;
  
+       if (rw & REQ_FLUSH)
+               rwbs[i++] = 'F';
+
         if (rw & WRITE)
                 rwbs[i++] = 'W';
         else if (rw & REQ_DISCARD)
@@ -1797,6 +1806,8 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
         else
                 rwbs[i++] = 'N';
  
+       if (rw & REQ_FUA)
+               rwbs[i++] = 'F';
         if (rw & REQ_RAHEAD)
                 rwbs[i++] = 'A';
         if (rw & REQ_SYNC)
diff --git a/kernel/tsacct.c b/kernel/tsacct.c

index 24dc60d9fa1f2c09de3dcb12ac33e51a7fd22562..5bbfac85866e7c333657da55e02637a135192ac8 100644 (file)
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -78,6 +78,7 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
  
  #define KB 1024
  #define MB (1024*KB)
+#define KB_MASK (~(KB-1))
  /*
   * fill in extended accounting fields
   */
@@ -95,14 +96,14 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
                 stats->hiwater_vm    = get_mm_hiwater_vm(mm)  * PAGE_SIZE / KB;
                 mmput(mm);
         }
-       stats->read_char        = p->ioac.rchar;
-       stats->write_char       = p->ioac.wchar;
-       stats->read_syscalls    = p->ioac.syscr;
-       stats->write_syscalls   = p->ioac.syscw;
+       stats->read_char        = p->ioac.rchar & KB_MASK;
+       stats->write_char       = p->ioac.wchar & KB_MASK;
+       stats->read_syscalls    = p->ioac.syscr & KB_MASK;
+       stats->write_syscalls   = p->ioac.syscw & KB_MASK;
  #ifdef CONFIG_TASK_IO_ACCOUNTING
-       stats->read_bytes       = p->ioac.read_bytes;
-       stats->write_bytes      = p->ioac.write_bytes;
-       stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes;
+       stats->read_bytes       = p->ioac.read_bytes & KB_MASK;
+       stats->write_bytes      = p->ioac.write_bytes & KB_MASK;
+       stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes & KB_MASK;
  #else
         stats->read_bytes       = 0;
         stats->write_bytes      = 0;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index 25fb1b0e53faa2c0d008d37986ae495495482b96..1783aabc6128f3792b9a68d27481999c1211d783 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2412,8 +2412,13 @@ reflush:
  
         for_each_cwq_cpu(cpu, wq) {
                 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+               bool drained;
  
-               if (!cwq->nr_active && list_empty(&cwq->delayed_works))
+               spin_lock_irq(&cwq->gcwq->lock);
+               drained = !cwq->nr_active && list_empty(&cwq->delayed_works);
+               spin_unlock_irq(&cwq->gcwq->lock);
+
+               if (drained)
                         continue;
  
                 if (++flush_cnt == 10 ||
diff --git a/lib/Makefile b/lib/Makefile

index d5d175c8a6ca3242b91e9d804e2b3f35209caea8..3f5bc6d903e088e3a8d17dcc458aec006e7ada61 100644 (file)
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -12,7 +12,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
          idr.o int_sqrt.o extable.o prio_tree.o \
          sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
          proportions.o prio_heap.o ratelimit.o show_mem.o \
-        is_single_threaded.o plist.o decompress.o find_next_bit.o
+        is_single_threaded.o plist.o decompress.o
  
  lib-$(CONFIG_MMU) += ioremap.o
  lib-$(CONFIG_SMP) += cpumask.o
@@ -22,7 +22,7 @@ lib-y += kobject.o kref.o klist.o
  obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
          bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
          string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o \
-        bsearch.o find_last_bit.o
+        bsearch.o find_last_bit.o find_next_bit.o
  obj-y += kstrtox.o
  obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
  
diff --git a/lib/sha1.c b/lib/sha1.c

index f33271dd00cbc7ee39637a1edff2975358daf567..1de509a159c8db264b9e5c492db2794f4d77bdcf 100644 (file)
--- a/lib/sha1.c
+++ b/lib/sha1.c
@@ -8,6 +8,7 @@
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/bitops.h>
+#include <linux/cryptohash.h>
  #include <asm/unaligned.h>
  
  /*
diff --git a/mm/filemap.c b/mm/filemap.c

index 645a080ba4dfb3be15ba5ab03983f42715f637f9..7771871fa3535d4895ba0d5e92b6a84870980dfb 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -827,13 +827,14 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
  {
         unsigned int i;
         unsigned int ret;
-       unsigned int nr_found;
+       unsigned int nr_found, nr_skip;
  
         rcu_read_lock();
  restart:
         nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
                                 (void ***)pages, NULL, start, nr_pages);
         ret = 0;
+       nr_skip = 0;
         for (i = 0; i < nr_found; i++) {
                 struct page *page;
  repeat:
@@ -856,6 +857,7 @@ repeat:
                          * here as an exceptional entry: so skip over it -
                          * we only reach this from invalidate_mapping_pages().
                          */
+                       nr_skip++;
                         continue;
                 }
  
@@ -876,7 +878,7 @@ repeat:
          * If all entries were removed before we could secure them,
          * try again, because callers stop trying once 0 is returned.
          */
-       if (unlikely(!ret && nr_found))
+       if (unlikely(!ret && nr_found > nr_skip))
                 goto restart;
         rcu_read_unlock();
         return ret;
diff --git a/mm/highmem.c b/mm/highmem.c

index 693394daa2ed79439d4210c7f5c0bd345ff3692b..5ef672c07f752a68938623651b8caa574285222c 100644 (file)
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -326,7 +326,7 @@ static struct page_address_slot {
         spinlock_t lock;                        /* Protect this bucket's list */
  } ____cacheline_aligned_in_smp page_address_htable[1<<PA_HASH_ORDER];
  
-static struct page_address_slot *page_slot(struct page *page)
+static struct page_address_slot *page_slot(const struct page *page)
  {
         return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)];
  }
@@ -337,7 +337,7 @@ static struct page_address_slot *page_slot(struct page *page)
   *
   * Returns the page's virtual address.
   */
-void *page_address(struct page *page)
+void *page_address(const struct page *page)
  {
         unsigned long flags;
         void *ret;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index f4ec4e7ca4cd2b64f9cfb2ea8845eba90373e549..3508777837c70824a946e931f7cf7fa7b1424399 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -204,50 +204,6 @@ struct mem_cgroup_eventfd_list {
  static void mem_cgroup_threshold(struct mem_cgroup *mem);
  static void mem_cgroup_oom_notify(struct mem_cgroup *mem);
  
-enum {
-       SCAN_BY_LIMIT,
-       SCAN_BY_SYSTEM,
-       NR_SCAN_CONTEXT,
-       SCAN_BY_SHRINK, /* not recorded now */
-};
-
-enum {
-       SCAN,
-       SCAN_ANON,
-       SCAN_FILE,
-       ROTATE,
-       ROTATE_ANON,
-       ROTATE_FILE,
-       FREED,
-       FREED_ANON,
-       FREED_FILE,
-       ELAPSED,
-       NR_SCANSTATS,
-};
-
-struct scanstat {
-       spinlock_t      lock;
-       unsigned long   stats[NR_SCAN_CONTEXT][NR_SCANSTATS];
-       unsigned long   rootstats[NR_SCAN_CONTEXT][NR_SCANSTATS];
-};
-
-const char *scanstat_string[NR_SCANSTATS] = {
-       "scanned_pages",
-       "scanned_anon_pages",
-       "scanned_file_pages",
-       "rotated_pages",
-       "rotated_anon_pages",
-       "rotated_file_pages",
-       "freed_pages",
-       "freed_anon_pages",
-       "freed_file_pages",
-       "elapsed_ns",
-};
-#define SCANSTAT_WORD_LIMIT    "_by_limit"
-#define SCANSTAT_WORD_SYSTEM   "_by_system"
-#define SCANSTAT_WORD_HIERARCHY        "_under_hierarchy"
-
-
  /*
   * The memory controller data structure. The memory controller controls both
   * page cache and RSS per cgroup. We would eventually like to provide
@@ -313,8 +269,7 @@ struct mem_cgroup {
  
         /* For oom notifier event fd */
         struct list_head oom_notify;
-       /* For recording LRU-scan statistics */
-       struct scanstat scanstat;
+
         /*
          * Should we move charges of a task when a task is moved into this
          * mem_cgroup ? And what type of charges should we move ?
@@ -1678,44 +1633,6 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
  }
  #endif
  
-static void __mem_cgroup_record_scanstat(unsigned long *stats,
-                          struct memcg_scanrecord *rec)
-{
-
-       stats[SCAN] += rec->nr_scanned[0] + rec->nr_scanned[1];
-       stats[SCAN_ANON] += rec->nr_scanned[0];
-       stats[SCAN_FILE] += rec->nr_scanned[1];
-
-       stats[ROTATE] += rec->nr_rotated[0] + rec->nr_rotated[1];
-       stats[ROTATE_ANON] += rec->nr_rotated[0];
-       stats[ROTATE_FILE] += rec->nr_rotated[1];
-
-       stats[FREED] += rec->nr_freed[0] + rec->nr_freed[1];
-       stats[FREED_ANON] += rec->nr_freed[0];
-       stats[FREED_FILE] += rec->nr_freed[1];
-
-       stats[ELAPSED] += rec->elapsed;
-}
-
-static void mem_cgroup_record_scanstat(struct memcg_scanrecord *rec)
-{
-       struct mem_cgroup *mem;
-       int context = rec->context;
-
-       if (context >= NR_SCAN_CONTEXT)
-               return;
-
-       mem = rec->mem;
-       spin_lock(&mem->scanstat.lock);
-       __mem_cgroup_record_scanstat(mem->scanstat.stats[context], rec);
-       spin_unlock(&mem->scanstat.lock);
-
-       mem = rec->root;
-       spin_lock(&mem->scanstat.lock);
-       __mem_cgroup_record_scanstat(mem->scanstat.rootstats[context], rec);
-       spin_unlock(&mem->scanstat.lock);
-}
-
  /*
   * Scan the hierarchy if needed to reclaim memory. We remember the last child
   * we reclaimed from, so that we don't end up penalizing one child extensively
@@ -1740,9 +1657,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
         bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
         bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
         bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
-       struct memcg_scanrecord rec;
         unsigned long excess;
-       unsigned long scanned;
+       unsigned long nr_scanned;
  
         excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT;
  
@@ -1750,15 +1666,6 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
         if (!check_soft && !shrink && root_mem->memsw_is_minimum)
                 noswap = true;
  
-       if (shrink)
-               rec.context = SCAN_BY_SHRINK;
-       else if (check_soft)
-               rec.context = SCAN_BY_SYSTEM;
-       else
-               rec.context = SCAN_BY_LIMIT;
-
-       rec.root = root_mem;
-
         while (1) {
                 victim = mem_cgroup_select_victim(root_mem);
                 if (victim == root_mem) {
@@ -1799,23 +1706,14 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
                         css_put(&victim->css);
                         continue;
                 }
-               rec.mem = victim;
-               rec.nr_scanned[0] = 0;
-               rec.nr_scanned[1] = 0;
-               rec.nr_rotated[0] = 0;
-               rec.nr_rotated[1] = 0;
-               rec.nr_freed[0] = 0;
-               rec.nr_freed[1] = 0;
-               rec.elapsed = 0;
                 /* we use swappiness of local cgroup */
                 if (check_soft) {
                         ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
-                               noswap, zone, &rec, &scanned);
-                       *total_scanned += scanned;
+                               noswap, zone, &nr_scanned);
+                       *total_scanned += nr_scanned;
                 } else
                         ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
-                                               noswap, &rec);
-               mem_cgroup_record_scanstat(&rec);
+                                               noswap);
                 css_put(&victim->css);
                 /*
                  * At shrinking usage, we can't check we should stop here or
@@ -1841,29 +1739,23 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
   */
  static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
  {
-       int lock_count = -1;
         struct mem_cgroup *iter, *failed = NULL;
         bool cond = true;
  
         for_each_mem_cgroup_tree_cond(iter, mem, cond) {
-               bool locked = iter->oom_lock;
-
-               iter->oom_lock = true;
-               if (lock_count == -1)
-                       lock_count = iter->oom_lock;
-               else if (lock_count != locked) {
+               if (iter->oom_lock) {
                         /*
                          * this subtree of our hierarchy is already locked
                          * so we cannot give a lock.
                          */
-                       lock_count = 0;
                         failed = iter;
                         cond = false;
-               }
+               } else
+                       iter->oom_lock = true;
         }
  
         if (!failed)
-               goto done;
+               return true;
  
         /*
          * OK, we failed to lock the whole subtree so we have to clean up
@@ -1877,8 +1769,7 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
                 }
                 iter->oom_lock = false;
         }
-done:
-       return lock_count;
+       return false;
  }
  
  /*
@@ -2091,6 +1982,7 @@ struct memcg_stock_pcp {
  #define FLUSHING_CACHED_CHARGE (0)
  };
  static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock);
+static DEFINE_MUTEX(percpu_charge_mutex);
  
  /*
   * Try to consume stocked charge on this cpu. If success, one page is consumed
@@ -2168,13 +2060,7 @@ static void drain_all_stock(struct mem_cgroup *root_mem, bool sync)
  
         /* Notify other cpus that system-wide "drain" is running */
         get_online_cpus();
-       /*
-        * Get a hint for avoiding draining charges on the current cpu,
-        * which must be exhausted by our charging.  It is not required that
-        * this be a precise check, so we use raw_smp_processor_id() instead of
-        * getcpu()/putcpu().
-        */
-       curcpu = raw_smp_processor_id();
+       curcpu = get_cpu();
         for_each_online_cpu(cpu) {
                 struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
                 struct mem_cgroup *mem;
@@ -2191,14 +2077,14 @@ static void drain_all_stock(struct mem_cgroup *root_mem, bool sync)
                                 schedule_work_on(cpu, &stock->work);
                 }
         }
+       put_cpu();
  
         if (!sync)
                 goto out;
  
         for_each_online_cpu(cpu) {
                 struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
-               if (mem_cgroup_same_or_subtree(root_mem, stock->cached) &&
-                               test_bit(FLUSHING_CACHED_CHARGE, &stock->flags))
+               if (test_bit(FLUSHING_CACHED_CHARGE, &stock->flags))
                         flush_work(&stock->work);
         }
  out:
@@ -2213,14 +2099,22 @@ out:
   */
  static void drain_all_stock_async(struct mem_cgroup *root_mem)
  {
+       /*
+        * If someone calls draining, avoid adding more kworker runs.
+        */
+       if (!mutex_trylock(&percpu_charge_mutex))
+               return;
         drain_all_stock(root_mem, false);
+       mutex_unlock(&percpu_charge_mutex);
  }
  
  /* This is a synchronous drain interface. */
  static void drain_all_stock_sync(struct mem_cgroup *root_mem)
  {
         /* called when force_empty is called */
+       mutex_lock(&percpu_charge_mutex);
         drain_all_stock(root_mem, true);
+       mutex_unlock(&percpu_charge_mutex);
  }
  
  /*
@@ -3858,18 +3752,14 @@ try_to_free:
         /* try to free all pages in this cgroup */
         shrink = 1;
         while (nr_retries && mem->res.usage > 0) {
-               struct memcg_scanrecord rec;
                 int progress;
  
                 if (signal_pending(current)) {
                         ret = -EINTR;
                         goto out;
                 }
-               rec.context = SCAN_BY_SHRINK;
-               rec.mem = mem;
-               rec.root = mem;
                 progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL,
-                                               false, &rec);
+                                               false);
                 if (!progress) {
                         nr_retries--;
                         /* maybe some writeback is necessary */
@@ -4713,54 +4603,6 @@ static int mem_control_numa_stat_open(struct inode *unused, struct file *file)
  }
  #endif /* CONFIG_NUMA */
  
-static int mem_cgroup_vmscan_stat_read(struct cgroup *cgrp,
-                               struct cftype *cft,
-                               struct cgroup_map_cb *cb)
-{
-       struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
-       char string[64];
-       int i;
-
-       for (i = 0; i < NR_SCANSTATS; i++) {
-               strcpy(string, scanstat_string[i]);
-               strcat(string, SCANSTAT_WORD_LIMIT);
-               cb->fill(cb, string,  mem->scanstat.stats[SCAN_BY_LIMIT][i]);
-       }
-
-       for (i = 0; i < NR_SCANSTATS; i++) {
-               strcpy(string, scanstat_string[i]);
-               strcat(string, SCANSTAT_WORD_SYSTEM);
-               cb->fill(cb, string,  mem->scanstat.stats[SCAN_BY_SYSTEM][i]);
-       }
-
-       for (i = 0; i < NR_SCANSTATS; i++) {
-               strcpy(string, scanstat_string[i]);
-               strcat(string, SCANSTAT_WORD_LIMIT);
-               strcat(string, SCANSTAT_WORD_HIERARCHY);
-               cb->fill(cb, string,  mem->scanstat.rootstats[SCAN_BY_LIMIT][i]);
-       }
-       for (i = 0; i < NR_SCANSTATS; i++) {
-               strcpy(string, scanstat_string[i]);
-               strcat(string, SCANSTAT_WORD_SYSTEM);
-               strcat(string, SCANSTAT_WORD_HIERARCHY);
-               cb->fill(cb, string,  mem->scanstat.rootstats[SCAN_BY_SYSTEM][i]);
-       }
-       return 0;
-}
-
-static int mem_cgroup_reset_vmscan_stat(struct cgroup *cgrp,
-                               unsigned int event)
-{
-       struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
-
-       spin_lock(&mem->scanstat.lock);
-       memset(&mem->scanstat.stats, 0, sizeof(mem->scanstat.stats));
-       memset(&mem->scanstat.rootstats, 0, sizeof(mem->scanstat.rootstats));
-       spin_unlock(&mem->scanstat.lock);
-       return 0;
-}
-
-
  static struct cftype mem_cgroup_files[] = {
         {
                 .name = "usage_in_bytes",
@@ -4831,11 +4673,6 @@ static struct cftype mem_cgroup_files[] = {
                 .mode = S_IRUGO,
         },
  #endif
-       {
-               .name = "vmscan_stat",
-               .read_map = mem_cgroup_vmscan_stat_read,
-               .trigger = mem_cgroup_reset_vmscan_stat,
-       },
  };
  
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
@@ -5099,7 +4936,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
         atomic_set(&mem->refcnt, 1);
         mem->move_charge_at_immigrate = 0;
         mutex_init(&mem->thresholds_lock);
-       spin_lock_init(&mem->scanstat.lock);
         return &mem->css;
  free_out:
         __mem_cgroup_free(mem);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c

index 8b57173c1dd5b09a8bb0b3e24fedb8c8584ce447..9c51f9f58cacc8d4a9c9f363fd5f8778ebfac190 100644 (file)
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -636,7 +636,6 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
         struct vm_area_struct *prev;
         struct vm_area_struct *vma;
         int err = 0;
-       pgoff_t pgoff;
         unsigned long vmstart;
         unsigned long vmend;
  
@@ -649,9 +648,9 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
                 vmstart = max(start, vma->vm_start);
                 vmend   = min(end, vma->vm_end);
  
-               pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
                 prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
-                                 vma->anon_vma, vma->vm_file, pgoff, new_pol);
+                                 vma->anon_vma, vma->vm_file, vma->vm_pgoff,
+                                 new_pol);
                 if (prev) {
                         vma = prev;
                         next = vma->vm_next;
@@ -1412,7 +1411,9 @@ asmlinkage long compat_sys_get_mempolicy(int __user *policy,
         err = sys_get_mempolicy(policy, nm, nr_bits+1, addr, flags);
  
         if (!err && nmask) {
-               err = copy_from_user(bm, nm, alloc_size);
+               unsigned long copy_size;
+               copy_size = min_t(unsigned long, sizeof(bm), alloc_size);
+               err = copy_from_user(bm, nm, copy_size);
                 /* ensure entire bitmap is zeroed */
                 err |= clear_user(nmask, ALIGN(maxnode-1, 8) / 8);
                 err |= compat_put_bitmap(nmask, bm, nr_bits);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c

index d1960744f881d34fe3f1c3412d717db86b8e5478..0e309cd1b5b9a2e2841a10205d7d91ad87083dc7 100644 (file)
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -754,21 +754,10 @@ static void balance_dirty_pages(struct address_space *mapping,
                  * 200ms is typically more than enough to curb heavy dirtiers;
                  * (b) the pause time limit makes the dirtiers more responsive.
                  */
-               if (nr_dirty < dirty_thresh +
-                              dirty_thresh / DIRTY_MAXPAUSE_AREA &&
+               if (nr_dirty < dirty_thresh &&
+                   bdi_dirty < (task_bdi_thresh + bdi_thresh) / 2 &&
                     time_after(jiffies, start_time + MAX_PAUSE))
                         break;
-               /*
-                * pass-good area. When some bdi gets blocked (eg. NFS server
-                * not responding), or write bandwidth dropped dramatically due
-                * to concurrent reads, or dirty threshold suddenly dropped and
-                * the dirty pages cannot be brought down anytime soon (eg. on
-                * slow USB stick), at least let go of the good bdi's.
-                */
-               if (nr_dirty < dirty_thresh +
-                              dirty_thresh / DIRTY_PASSGOOD_AREA &&
-                   bdi_dirty < bdi_thresh)
-                       break;
  
                 /*
                  * Increase the delay for each loop, up to our previous
diff --git a/mm/slub.c b/mm/slub.c

index eb5a8f93338a1819d026dedf71d9db452c3c62cf..7c54fe83a90c509b543dbee8a1a7c11f844b9899 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -701,7 +701,7 @@ static u8 *check_bytes(u8 *start, u8 value, unsigned int bytes)
                 return check_bytes8(start, value, bytes);
  
         value64 = value | value << 8 | value << 16 | value << 24;
-       value64 = value64 | value64 << 32;
+       value64 = (value64 & 0xffffffff) | value64 << 32;
         prefix = 8 - ((unsigned long)start) % 8;
  
         if (prefix) {
@@ -1854,7 +1854,7 @@ redo:
  
         new.frozen = 0;
  
-       if (!new.inuse && n->nr_partial < s->min_partial)
+       if (!new.inuse && n->nr_partial > s->min_partial)
                 m = M_FREE;
         else if (new.freelist) {
                 m = M_PARTIAL;
@@ -2377,7 +2377,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
                  */
                 if (unlikely(!prior)) {
                         remove_full(s, page);
-                       add_partial(n, page, 0);
+                       add_partial(n, page, 1);
                         stat(s, FREE_ADD_PARTIAL);
                 }
         }
@@ -2387,11 +2387,13 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
  slab_empty:
         if (prior) {
                 /*
-                * Slab still on the partial list.
+                * Slab on the partial list.
                  */
                 remove_partial(n, page);
                 stat(s, FREE_REMOVE_PARTIAL);
-       }
+       } else
+               /* Slab must be on the full list */
+               remove_full(s, page);
  
         spin_unlock_irqrestore(&n->list_lock, flags);
         stat(s, FREE_SLAB);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c

index 464621d18eb249fde1403741e0f75b193aa0c18a..5016f19e1661637373059dcc05124e883533fa87 100644 (file)
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -725,9 +725,10 @@ static void free_unmap_vmap_area_addr(unsigned long addr)
  #define VMAP_BBMAP_BITS_MIN    (VMAP_MAX_ALLOC*2)
  #define VMAP_MIN(x, y)         ((x) < (y) ? (x) : (y)) /* can't use min() */
  #define VMAP_MAX(x, y)         ((x) > (y) ? (x) : (y)) /* can't use max() */
-#define VMAP_BBMAP_BITS                VMAP_MIN(VMAP_BBMAP_BITS_MAX,           \
-                                       VMAP_MAX(VMAP_BBMAP_BITS_MIN,   \
-                                               VMALLOC_PAGES / NR_CPUS / 16))
+#define VMAP_BBMAP_BITS                \
+               VMAP_MIN(VMAP_BBMAP_BITS_MAX,   \
+               VMAP_MAX(VMAP_BBMAP_BITS_MIN,   \
+                       VMALLOC_PAGES / roundup_pow_of_two(NR_CPUS) / 16))
  
  #define VMAP_BLOCK_SIZE                (VMAP_BBMAP_BITS * PAGE_SIZE)
  
@@ -2139,6 +2140,14 @@ struct vm_struct *alloc_vm_area(size_t size)
                 return NULL;
         }
  
+       /*
+        * If the allocated address space is passed to a hypercall
+        * before being used then we cannot rely on a page fault to
+        * trigger an update of the page tables.  So sync all the page
+        * tables here.
+        */
+       vmalloc_sync_all();
+
         return area;
  }
  EXPORT_SYMBOL_GPL(alloc_vm_area);
diff --git a/mm/vmscan.c b/mm/vmscan.c

index 7ef69124fa3e5f4ef28baaed58a7e997d40155ab..b55699cd9067c31721866bb3ef4c1660a1177f06 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -105,7 +105,6 @@ struct scan_control {
  
         /* Which cgroup do we reclaim from */
         struct mem_cgroup *mem_cgroup;
-       struct memcg_scanrecord *memcg_record;
  
         /*
          * Nodemask of nodes allowed by the caller. If NULL, all nodes
@@ -1349,8 +1348,6 @@ putback_lru_pages(struct zone *zone, struct scan_control *sc,
                         int file = is_file_lru(lru);
                         int numpages = hpage_nr_pages(page);
                         reclaim_stat->recent_rotated[file] += numpages;
-                       if (!scanning_global_lru(sc))
-                               sc->memcg_record->nr_rotated[file] += numpages;
                 }
                 if (!pagevec_add(&pvec, page)) {
                         spin_unlock_irq(&zone->lru_lock);
@@ -1394,10 +1391,6 @@ static noinline_for_stack void update_isolated_counts(struct zone *zone,
  
         reclaim_stat->recent_scanned[0] += *nr_anon;
         reclaim_stat->recent_scanned[1] += *nr_file;
-       if (!scanning_global_lru(sc)) {
-               sc->memcg_record->nr_scanned[0] += *nr_anon;
-               sc->memcg_record->nr_scanned[1] += *nr_file;
-       }
  }
  
  /*
@@ -1511,9 +1504,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
                 nr_reclaimed += shrink_page_list(&page_list, zone, sc);
         }
  
-       if (!scanning_global_lru(sc))
-               sc->memcg_record->nr_freed[file] += nr_reclaimed;
-
         local_irq_disable();
         if (current_is_kswapd())
                 __count_vm_events(KSWAPD_STEAL, nr_reclaimed);
@@ -1613,8 +1603,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
         }
  
         reclaim_stat->recent_scanned[file] += nr_taken;
-       if (!scanning_global_lru(sc))
-               sc->memcg_record->nr_scanned[file] += nr_taken;
  
         __count_zone_vm_events(PGREFILL, zone, pgscanned);
         if (file)
@@ -1666,8 +1654,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
          * get_scan_ratio.
          */
         reclaim_stat->recent_rotated[file] += nr_rotated;
-       if (!scanning_global_lru(sc))
-               sc->memcg_record->nr_rotated[file] += nr_rotated;
  
         move_active_pages_to_lru(zone, &l_active,
                                                 LRU_ACTIVE + file * LRU_FILE);
@@ -1808,23 +1794,15 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
         u64 fraction[2], denominator;
         enum lru_list l;
         int noswap = 0;
-       int force_scan = 0;
+       bool force_scan = false;
         unsigned long nr_force_scan[2];
  
-
-       anon  = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
-               zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
-       file  = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
-               zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
-
-       if (((anon + file) >> priority) < SWAP_CLUSTER_MAX) {
-               /* kswapd does zone balancing and need to scan this zone */
-               if (scanning_global_lru(sc) && current_is_kswapd())
-                       force_scan = 1;
-               /* memcg may have small limit and need to avoid priority drop */
-               if (!scanning_global_lru(sc))
-                       force_scan = 1;
-       }
+       /* kswapd does zone balancing and needs to scan this zone */
+       if (scanning_global_lru(sc) && current_is_kswapd())
+               force_scan = true;
+       /* memcg may have small limit and need to avoid priority drop */
+       if (!scanning_global_lru(sc))
+               force_scan = true;
  
         /* If we have no swap space, do not bother scanning anon pages. */
         if (!sc->may_swap || (nr_swap_pages <= 0)) {
@@ -1837,6 +1815,11 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
                 goto out;
         }
  
+       anon  = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
+               zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
+       file  = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
+               zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
+
         if (scanning_global_lru(sc)) {
                 free  = zone_page_state(zone, NR_FREE_PAGES);
                 /* If we have very few page cache pages,
@@ -2268,10 +2251,9 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR
  
  unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
-                                       gfp_t gfp_mask, bool noswap,
-                                       struct zone *zone,
-                                       struct memcg_scanrecord *rec,
-                                       unsigned long *scanned)
+                                               gfp_t gfp_mask, bool noswap,
+                                               struct zone *zone,
+                                               unsigned long *nr_scanned)
  {
         struct scan_control sc = {
                 .nr_scanned = 0,
@@ -2281,9 +2263,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
                 .may_swap = !noswap,
                 .order = 0,
                 .mem_cgroup = mem,
-               .memcg_record = rec,
         };
-       unsigned long start, end;
  
         sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                         (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2292,7 +2272,6 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
                                                       sc.may_writepage,
                                                       sc.gfp_mask);
  
-       start = sched_clock();
         /*
          * NOTE: Although we can get the priority field, using it
          * here is not a good idea, since it limits the pages we can scan.
@@ -2301,25 +2280,19 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
          * the priority and make it zero.
          */
         shrink_zone(0, zone, &sc);
-       end = sched_clock();
-
-       if (rec)
-               rec->elapsed += end - start;
-       *scanned = sc.nr_scanned;
  
         trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
  
+       *nr_scanned = sc.nr_scanned;
         return sc.nr_reclaimed;
  }
  
  unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
                                            gfp_t gfp_mask,
-                                          bool noswap,
-                                          struct memcg_scanrecord *rec)
+                                          bool noswap)
  {
         struct zonelist *zonelist;
         unsigned long nr_reclaimed;
-       unsigned long start, end;
         int nid;
         struct scan_control sc = {
                 .may_writepage = !laptop_mode,
@@ -2328,7 +2301,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
                 .nr_to_reclaim = SWAP_CLUSTER_MAX,
                 .order = 0,
                 .mem_cgroup = mem_cont,
-               .memcg_record = rec,
                 .nodemask = NULL, /* we don't care the placement */
                 .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                                 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
@@ -2337,7 +2309,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
                 .gfp_mask = sc.gfp_mask,
         };
  
-       start = sched_clock();
         /*
          * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't
          * take care of from where we get pages. So the node where we start the
@@ -2352,9 +2323,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
                                             sc.gfp_mask);
  
         nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
-       end = sched_clock();
-       if (rec)
-               rec->elapsed += end - start;
  
         trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
  
@@ -2529,6 +2497,9 @@ loop_again:
                                         high_wmark_pages(zone), 0, 0)) {
                                 end_zone = i;
                                 break;
+                       } else {
+                               /* If balanced, clear the congested flag */
+                               zone_clear_flag(zone, ZONE_CONGESTED);
                         }
                 }
                 if (i < 0)
diff --git a/mm/vmstat.c b/mm/vmstat.c

index 20c18b7694b284f236149612335d017a2419f063..d52b13d28e8f4b25b827cd386693330655ff9174 100644 (file)
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -659,7 +659,7 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
  }
  #endif
  
-#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS)
+#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA)
  #ifdef CONFIG_ZONE_DMA
  #define TEXT_FOR_DMA(xx) xx "_dma",
  #else
@@ -788,7 +788,7 @@ const char * const vmstat_text[] = {
  
  #endif /* CONFIG_VM_EVENTS_COUNTERS */
  };
-#endif /* CONFIG_PROC_FS || CONFIG_SYSFS */
+#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */
  
  
  #ifdef CONFIG_PROC_FS
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c

index 5f27f8e302546371dec3b6ff24e62d389e4c41c2..f1f2f7bb6661e18c77276d20598e59caba8c34ed 100644 (file)
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -167,6 +167,8 @@ struct sk_buff *vlan_untag(struct sk_buff *skb)
         if (unlikely(!skb))
                 goto err_free;
  
+       skb_reset_network_header(skb);
+       skb_reset_transport_header(skb);
         return skb;
  
  err_free:
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c

index 175b5135bdcf524d324c73298cd17da0819c9a1e..e317583fcc7314d877b9a899df24eb2a475fe9ee 100644 (file)
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -263,7 +263,6 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
  {
         int in, out, inp, outp;
         struct virtio_chan *chan = client->trans;
-       char *rdata = (char *)req->rc+sizeof(struct p9_fcall);
         unsigned long flags;
         size_t pdata_off = 0;
         struct trans_rpage_info *rpinfo = NULL;
@@ -346,7 +345,8 @@ req_retry_pinned:
                  * Arrange in such a way that server places header in the
                  * alloced memory and payload onto the user buffer.
                  */
-               inp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 11);
+               inp = pack_sg_list(chan->sg, out,
+                                  VIRTQUEUE_NUM, req->rc->sdata, 11);
                 /*
                  * Running executables in the filesystem may result in
                  * a read request with kernel buffer as opposed to user buffer.
@@ -366,8 +366,8 @@ req_retry_pinned:
                 }
                 in += inp;
         } else {
-               in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata,
-                               req->rc->capacity);
+               in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM,
+                                 req->rc->sdata, req->rc->capacity);
         }
  
         err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
@@ -592,7 +592,14 @@ static struct p9_trans_module p9_virtio_trans = {
         .close = p9_virtio_close,
         .request = p9_virtio_request,
         .cancel = p9_virtio_cancel,
-       .maxsize = PAGE_SIZE*VIRTQUEUE_NUM,
+
+       /*
+        * We leave one entry for input and one entry for response
+        * headers. We also skip one more entry to accomodate, address
+        * that are not at page boundary, that can result in an extra
+        * page in zero copy.
+        */
+       .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3),
         .pref = P9_TRANS_PREF_PAYLOAD_SEP,
         .def = 0,
         .owner = THIS_MODULE,
diff --git a/net/atm/br2684.c b/net/atm/br2684.c

index 52cfd0c3ea71f19ef844531d055b4f96a0eb6e27..d07223c834af2a6b2bc45606d64d0093f30d9897 100644 (file)
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -558,12 +558,13 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
         spin_unlock_irqrestore(&rq->lock, flags);
  
         skb_queue_walk_safe(&queue, skb, tmp) {
-               struct net_device *dev = skb->dev;
+               struct net_device *dev;
+
+               br2684_push(atmvcc, skb);
+               dev = skb->dev;
  
                 dev->stats.rx_bytes -= skb->len;
                 dev->stats.rx_packets--;
-
-               br2684_push(atmvcc, skb);
         }
  
         /* initialize netdev carrier state */
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c

index 8add9b4999129bcbdf62122221bd4bfec0c686ac..117e0d161780b4a94d6350d0618a87d5937ffc6a 100644 (file)
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -494,9 +494,8 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo)
         BT_DBG("sk %p", sk);
  
         add_wait_queue(sk_sleep(sk), &wait);
+       set_current_state(TASK_INTERRUPTIBLE);
         while (sk->sk_state != state) {
-               set_current_state(TASK_INTERRUPTIBLE);
-
                 if (!timeo) {
                         err = -EINPROGRESS;
                         break;
@@ -510,12 +509,13 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo)
                 release_sock(sk);
                 timeo = schedule_timeout(timeo);
                 lock_sock(sk);
+               set_current_state(TASK_INTERRUPTIBLE);
  
                 err = sock_error(sk);
                 if (err)
                         break;
         }
-       set_current_state(TASK_RUNNING);
+       __set_current_state(TASK_RUNNING);
         remove_wait_queue(sk_sleep(sk), &wait);
         return err;
  }
diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h

index 8e6c06158f8ebf02268362d4286b4763813b4e0c..e7ee5314f39a1e6a611dd3f0bd59c10c012ae699 100644 (file)
--- a/net/bluetooth/bnep/bnep.h
+++ b/net/bluetooth/bnep/bnep.h
@@ -155,6 +155,7 @@ struct bnep_session {
         unsigned int  role;
         unsigned long state;
         unsigned long flags;
+       atomic_t      terminate;
         struct task_struct *task;
  
         struct ethhdr eh;
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c

index ca39fcf010ce3353e73312f4172f32c9ce12cb1f..d9edfe8bf9d677268cfc1f1e10b85f04d9f00f73 100644 (file)
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -484,9 +484,11 @@ static int bnep_session(void *arg)
  
         init_waitqueue_entry(&wait, current);
         add_wait_queue(sk_sleep(sk), &wait);
-       while (!kthread_should_stop()) {
+       while (1) {
                 set_current_state(TASK_INTERRUPTIBLE);
  
+               if (atomic_read(&s->terminate))
+                       break;
                 /* RX */
                 while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
                         skb_orphan(skb);
@@ -504,7 +506,7 @@ static int bnep_session(void *arg)
  
                 schedule();
         }
-       set_current_state(TASK_RUNNING);
+       __set_current_state(TASK_RUNNING);
         remove_wait_queue(sk_sleep(sk), &wait);
  
         /* Cleanup session */
@@ -640,9 +642,10 @@ int bnep_del_connection(struct bnep_conndel_req *req)
         down_read(&bnep_session_sem);
  
         s = __bnep_get_session(req->dst);
-       if (s)
-               kthread_stop(s->task);
-       else
+       if (s) {
+               atomic_inc(&s->terminate);
+               wake_up_process(s->task);
+       } else
                 err = -ENOENT;
  
         up_read(&bnep_session_sem);
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c

index 040f67b12978c17713e4040937127b0cc814de8e..50f0d135eb8f201daf8156c433f08a973bfa338f 100644 (file)
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -386,7 +386,8 @@ static void cmtp_reset_ctr(struct capi_ctr *ctrl)
  
         capi_ctr_down(ctrl);
  
-       kthread_stop(session->task);
+       atomic_inc(&session->terminate);
+       wake_up_process(session->task);
  }
  
  static void cmtp_register_appl(struct capi_ctr *ctrl, __u16 appl, capi_register_params *rp)
diff --git a/net/bluetooth/cmtp/cmtp.h b/net/bluetooth/cmtp/cmtp.h

index db43b54ac9afb91cfc78f911f8a15c05104fe7ed..c32638dddbf9409d685c3436eb1b1541d04d5a9e 100644 (file)
--- a/net/bluetooth/cmtp/cmtp.h
+++ b/net/bluetooth/cmtp/cmtp.h
@@ -81,6 +81,7 @@ struct cmtp_session {
  
         char name[BTNAMSIZ];
  
+       atomic_t terminate;
         struct task_struct *task;
  
         wait_queue_head_t wait;
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c

index c5b11af908be4fc3ab6c7a0cebb9639900feb162..521baa4fe835eb775f24e59dda205ae76d1f87ce 100644 (file)
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -292,9 +292,11 @@ static int cmtp_session(void *arg)
  
         init_waitqueue_entry(&wait, current);
         add_wait_queue(sk_sleep(sk), &wait);
-       while (!kthread_should_stop()) {
+       while (1) {
                 set_current_state(TASK_INTERRUPTIBLE);
  
+               if (atomic_read(&session->terminate))
+                       break;
                 if (sk->sk_state != BT_CONNECTED)
                         break;
  
@@ -307,7 +309,7 @@ static int cmtp_session(void *arg)
  
                 schedule();
         }
-       set_current_state(TASK_RUNNING);
+       __set_current_state(TASK_RUNNING);
         remove_wait_queue(sk_sleep(sk), &wait);
  
         down_write(&cmtp_session_sem);
@@ -380,16 +382,17 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
  
         if (!(session->flags & (1 << CMTP_LOOPBACK))) {
                 err = cmtp_attach_device(session);
-               if (err < 0)
-                       goto detach;
+               if (err < 0) {
+                       atomic_inc(&session->terminate);
+                       wake_up_process(session->task);
+                       up_write(&cmtp_session_sem);
+                       return err;
+               }
         }
  
         up_write(&cmtp_session_sem);
         return 0;
  
-detach:
-       cmtp_detach_device(session);
-
  unlink:
         __cmtp_unlink_session(session);
  
@@ -414,7 +417,8 @@ int cmtp_del_connection(struct cmtp_conndel_req *req)
                 skb_queue_purge(&session->transmit);
  
                 /* Stop session thread */
-               kthread_stop(session->task);
+               atomic_inc(&session->terminate);
+               wake_up_process(session->task);
         } else
                 err = -ENOENT;
  
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c

index ec0bc3f60f2eea4216471746fdbea09c51b3fbcb..56943add45cc44707167f4877e27fe5630b00c95 100644 (file)
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1209,7 +1209,6 @@ static void hci_cmd_timer(unsigned long arg)
  
         BT_ERR("%s command tx timeout", hdev->name);
         atomic_set(&hdev->cmd_cnt, 1);
-       clear_bit(HCI_RESET, &hdev->flags);
         tasklet_schedule(&hdev->cmd_task);
  }
  
@@ -1327,7 +1326,7 @@ int hci_blacklist_add(struct hci_dev *hdev, bdaddr_t *bdaddr)
  
         entry = kzalloc(sizeof(struct bdaddr_list), GFP_KERNEL);
         if (!entry) {
-               return -ENOMEM;
+               err = -ENOMEM;
                 goto err;
         }
  
@@ -2408,7 +2407,10 @@ static void hci_cmd_task(unsigned long arg)
                 if (hdev->sent_cmd) {
                         atomic_dec(&hdev->cmd_cnt);
                         hci_send_frame(skb);
-                       mod_timer(&hdev->cmd_timer,
+                       if (test_bit(HCI_RESET, &hdev->flags))
+                               del_timer(&hdev->cmd_timer);
+                       else
+                               mod_timer(&hdev->cmd_timer,
                                   jiffies + msecs_to_jiffies(HCI_CMD_TIMEOUT));
                 } else {
                         skb_queue_head(&hdev->cmd_q, skb);
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c

index 43b4c2deb7cc05bdc875e3f7b23a999468f5c1a4..fb68f344c34a68e5a4334b751196680ac33ffdfe 100644 (file)
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -764,6 +764,7 @@ static int hidp_session(void *arg)
  
         up_write(&hidp_session_sem);
  
+       kfree(session->rd_data);
         kfree(session);
         return 0;
  }
@@ -841,7 +842,8 @@ static int hidp_setup_input(struct hidp_session *session,
  
         err = input_register_device(input);
         if (err < 0) {
-               hci_conn_put_device(session->conn);
+               input_free_device(input);
+               session->input = NULL;
                 return err;
         }
  
@@ -1044,8 +1046,12 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
         }
  
         err = hid_add_device(session->hid);
-       if (err < 0)
-               goto err_add_device;
+       if (err < 0) {
+               atomic_inc(&session->terminate);
+               wake_up_process(session->task);
+               up_write(&hidp_session_sem);
+               return err;
+       }
  
         if (session->input) {
                 hidp_send_ctrl_message(session,
@@ -1059,12 +1065,6 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
         up_write(&hidp_session_sem);
         return 0;
  
-err_add_device:
-       hid_destroy_device(session->hid);
-       session->hid = NULL;
-       atomic_inc(&session->terminate);
-       wake_up_process(session->task);
-
  unlink:
         hidp_del_timer(session);
  
@@ -1090,7 +1090,6 @@ purge:
  failed:
         up_write(&hidp_session_sem);
  
-       input_free_device(session->input);
         kfree(session);
         return err;
  }
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c

index 3204ba8a701ca0d7c99eebfc629d22e51a007021..b3bdb482bbe6f5fdf2d197fc1035082f495c448b 100644 (file)
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1159,9 +1159,8 @@ int __l2cap_wait_ack(struct sock *sk)
         int timeo = HZ/5;
  
         add_wait_queue(sk_sleep(sk), &wait);
-       while ((chan->unacked_frames > 0 && chan->conn)) {
-               set_current_state(TASK_INTERRUPTIBLE);
-
+       set_current_state(TASK_INTERRUPTIBLE);
+       while (chan->unacked_frames > 0 && chan->conn) {
                 if (!timeo)
                         timeo = HZ/5;
  
@@ -1173,6 +1172,7 @@ int __l2cap_wait_ack(struct sock *sk)
                 release_sock(sk);
                 timeo = schedule_timeout(timeo);
                 lock_sock(sk);
+               set_current_state(TASK_INTERRUPTIBLE);
  
                 err = sock_error(sk);
                 if (err)
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c

index 5c36b3e8739cb706f9aa47e74a0ed9c5f72b8e24..61f1f623091dbcd89992a64a635fc8de62963795 100644 (file)
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -235,30 +235,26 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl
  
         lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
  
-       if (sk->sk_state != BT_LISTEN) {
-               err = -EBADFD;
-               goto done;
-       }
-
         timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
  
         BT_DBG("sk %p timeo %ld", sk, timeo);
  
         /* Wait for an incoming connection. (wake-one). */
         add_wait_queue_exclusive(sk_sleep(sk), &wait);
-       while (!(nsk = bt_accept_dequeue(sk, newsock))) {
+       while (1) {
                 set_current_state(TASK_INTERRUPTIBLE);
-               if (!timeo) {
-                       err = -EAGAIN;
+
+               if (sk->sk_state != BT_LISTEN) {
+                       err = -EBADFD;
                         break;
                 }
  
-               release_sock(sk);
-               timeo = schedule_timeout(timeo);
-               lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+               nsk = bt_accept_dequeue(sk, newsock);
+               if (nsk)
+                       break;
  
-               if (sk->sk_state != BT_LISTEN) {
-                       err = -EBADFD;
+               if (!timeo) {
+                       err = -EAGAIN;
                         break;
                 }
  
@@ -266,8 +262,12 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl
                         err = sock_intr_errno(timeo);
                         break;
                 }
+
+               release_sock(sk);
+               timeo = schedule_timeout(timeo);
+               lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
         }
-       set_current_state(TASK_RUNNING);
+       __set_current_state(TASK_RUNNING);
         remove_wait_queue(sk_sleep(sk), &wait);
  
         if (err)
@@ -993,7 +993,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p
         INIT_LIST_HEAD(&bt_sk(sk)->accept_q);
  
         sk->sk_destruct = l2cap_sock_destruct;
-       sk->sk_sndtimeo = msecs_to_jiffies(L2CAP_CONN_TIMEOUT);
+       sk->sk_sndtimeo = L2CAP_CONN_TIMEOUT;
  
         sock_reset_flag(sk, SOCK_ZAPPED);
  
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c

index 5759bb7054f7f4aca8822271b7de4da8997af915..5ba3f6df665cda5748044aaf21133f620979ca55 100644 (file)
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -62,7 +62,6 @@ static DEFINE_MUTEX(rfcomm_mutex);
  #define rfcomm_lock()  mutex_lock(&rfcomm_mutex)
  #define rfcomm_unlock()        mutex_unlock(&rfcomm_mutex)
  
-static unsigned long rfcomm_event;
  
  static LIST_HEAD(session_list);
  
@@ -120,7 +119,6 @@ static inline void rfcomm_schedule(void)
  {
         if (!rfcomm_thread)
                 return;
-       set_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event);
         wake_up_process(rfcomm_thread);
  }
  
@@ -2038,19 +2036,18 @@ static int rfcomm_run(void *unused)
  
         rfcomm_add_listener(BDADDR_ANY);
  
-       while (!kthread_should_stop()) {
+       while (1) {
                 set_current_state(TASK_INTERRUPTIBLE);
-               if (!test_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event)) {
-                       /* No pending events. Let's sleep.
-                        * Incoming connections and data will wake us up. */
-                       schedule();
-               }
-               set_current_state(TASK_RUNNING);
+
+               if (kthread_should_stop())
+                       break;
  
                 /* Process stuff */
-               clear_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event);
                 rfcomm_process_sessions();
+
+               schedule();
         }
+       __set_current_state(TASK_RUNNING);
  
         rfcomm_kill_listener();
  
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c

index 8f01e6b11a7037d487c5a32072d460d56760a64a..482722bbc7a052c67053a95ad357463a5bc641bf 100644 (file)
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -485,11 +485,6 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f
  
         lock_sock(sk);
  
-       if (sk->sk_state != BT_LISTEN) {
-               err = -EBADFD;
-               goto done;
-       }
-
         if (sk->sk_type != SOCK_STREAM) {
                 err = -EINVAL;
                 goto done;
@@ -501,19 +496,20 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f
  
         /* Wait for an incoming connection. (wake-one). */
         add_wait_queue_exclusive(sk_sleep(sk), &wait);
-       while (!(nsk = bt_accept_dequeue(sk, newsock))) {
+       while (1) {
                 set_current_state(TASK_INTERRUPTIBLE);
-               if (!timeo) {
-                       err = -EAGAIN;
+
+               if (sk->sk_state != BT_LISTEN) {
+                       err = -EBADFD;
                         break;
                 }
  
-               release_sock(sk);
-               timeo = schedule_timeout(timeo);
-               lock_sock(sk);
+               nsk = bt_accept_dequeue(sk, newsock);
+               if (nsk)
+                       break;
  
-               if (sk->sk_state != BT_LISTEN) {
-                       err = -EBADFD;
+               if (!timeo) {
+                       err = -EAGAIN;
                         break;
                 }
  
@@ -521,8 +517,12 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f
                         err = sock_intr_errno(timeo);
                         break;
                 }
+
+               release_sock(sk);
+               timeo = schedule_timeout(timeo);
+               lock_sock(sk);
         }
-       set_current_state(TASK_RUNNING);
+       __set_current_state(TASK_RUNNING);
         remove_wait_queue(sk_sleep(sk), &wait);
  
         if (err)
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c

index 4c3621b5e0aa2344196767bdeb6be407918732b4..8270f05e3f1f27b0883020eede325df8d17154d0 100644 (file)
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -564,30 +564,26 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag
  
         lock_sock(sk);
  
-       if (sk->sk_state != BT_LISTEN) {
-               err = -EBADFD;
-               goto done;
-       }
-
         timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
  
         BT_DBG("sk %p timeo %ld", sk, timeo);
  
         /* Wait for an incoming connection. (wake-one). */
         add_wait_queue_exclusive(sk_sleep(sk), &wait);
-       while (!(ch = bt_accept_dequeue(sk, newsock))) {
+       while (1) {
                 set_current_state(TASK_INTERRUPTIBLE);
-               if (!timeo) {
-                       err = -EAGAIN;
+
+               if (sk->sk_state != BT_LISTEN) {
+                       err = -EBADFD;
                         break;
                 }
  
-               release_sock(sk);
-               timeo = schedule_timeout(timeo);
-               lock_sock(sk);
+               ch = bt_accept_dequeue(sk, newsock);
+               if (ch)
+                       break;
  
-               if (sk->sk_state != BT_LISTEN) {
-                       err = -EBADFD;
+               if (!timeo) {
+                       err = -EAGAIN;
                         break;
                 }
  
@@ -595,8 +591,12 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag
                         err = sock_intr_errno(timeo);
                         break;
                 }
+
+               release_sock(sk);
+               timeo = schedule_timeout(timeo);
+               lock_sock(sk);
         }
-       set_current_state(TASK_RUNNING);
+       __set_current_state(TASK_RUNNING);
         remove_wait_queue(sk_sleep(sk), &wait);
  
         if (err)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c

index 3176e2e13d9bbaecf3117abf1c665dbac14538e4..e73815456adf2763f72d15ee649e2a3b1d6052a5 100644 (file)
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -231,6 +231,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
  int br_add_bridge(struct net *net, const char *name)
  {
         struct net_device *dev;
+       int res;
  
         dev = alloc_netdev(sizeof(struct net_bridge), name,
                            br_dev_setup);
@@ -240,7 +241,10 @@ int br_add_bridge(struct net *net, const char *name)
  
         dev_net_set(dev, net);
  
-       return register_netdev(dev);
+       res = register_netdev(dev);
+       if (res)
+               free_netdev(dev);
+       return res;
  }
  
  int br_del_bridge(struct net *net, const char *name)
@@ -417,6 +421,7 @@ put_back:
  int br_del_if(struct net_bridge *br, struct net_device *dev)
  {
         struct net_bridge_port *p;
+       bool changed_addr;
  
         p = br_port_get_rtnl(dev);
         if (!p || p->br != br)
@@ -425,9 +430,12 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
         del_nbp(p);
  
         spin_lock_bh(&br->lock);
-       br_stp_recalculate_bridge_id(br);
+       changed_addr = br_stp_recalculate_bridge_id(br);
         spin_unlock_bh(&br->lock);
  
+       if (changed_addr)
+               call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
+
         netdev_update_features(br->dev);
  
         return 0;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c

index 2d85ca7111d3994f1c60941a4992da749d195b57..995cbe0ac0b2b1e74f2b4762d61a13102d9f66bb 100644 (file)
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1456,7 +1456,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
  {
         struct sk_buff *skb2;
         const struct ipv6hdr *ip6h;
-       struct icmp6hdr *icmp6h;
+       u8 icmp6_type;
         u8 nexthdr;
         unsigned len;
         int offset;
@@ -1502,9 +1502,9 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
         __skb_pull(skb2, offset);
         skb_reset_transport_header(skb2);
  
-       icmp6h = icmp6_hdr(skb2);
+       icmp6_type = icmp6_hdr(skb2)->icmp6_type;
  
-       switch (icmp6h->icmp6_type) {
+       switch (icmp6_type) {
         case ICMPV6_MGM_QUERY:
         case ICMPV6_MGM_REPORT:
         case ICMPV6_MGM_REDUCTION:
@@ -1520,16 +1520,23 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
                 err = pskb_trim_rcsum(skb2, len);
                 if (err)
                         goto out;
+               err = -EINVAL;
         }
  
+       ip6h = ipv6_hdr(skb2);
+
         switch (skb2->ip_summed) {
         case CHECKSUM_COMPLETE:
-               if (!csum_fold(skb2->csum))
+               if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, skb2->len,
+                                       IPPROTO_ICMPV6, skb2->csum))
                         break;
                 /*FALLTHROUGH*/
         case CHECKSUM_NONE:
-               skb2->csum = 0;
-               if (skb_checksum_complete(skb2))
+               skb2->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
+                                                       &ip6h->daddr,
+                                                       skb2->len,
+                                                       IPPROTO_ICMPV6, 0));
+               if (__skb_checksum_complete(skb2))
                         goto out;
         }
  
@@ -1537,7 +1544,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
  
         BR_INPUT_SKB_CB(skb)->igmp = 1;
  
-       switch (icmp6h->icmp6_type) {
+       switch (icmp6_type) {
         case ICMPV6_MGM_REPORT:
             {
                 struct mld_msg *mld;
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c

index 6545ee9591d1b5d3d2211642fba6f53bf80df003..a76b62135558a06308cc76adff65de3d704d76bf 100644 (file)
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -34,6 +34,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
         struct net_device *dev = ptr;
         struct net_bridge_port *p;
         struct net_bridge *br;
+       bool changed_addr;
         int err;
  
         /* register of bridge completed, add sysfs entries */
@@ -57,8 +58,12 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
         case NETDEV_CHANGEADDR:
                 spin_lock_bh(&br->lock);
                 br_fdb_changeaddr(p, dev->dev_addr);
-               br_stp_recalculate_bridge_id(br);
+               changed_addr = br_stp_recalculate_bridge_id(br);
                 spin_unlock_bh(&br->lock);
+
+               if (changed_addr)
+                       call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
+
                 break;
  
         case NETDEV_CHANGE:
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig

index ba6f73eb06c60c1a7f44e90eacfe5c130819ddd8..a9aff9c7d0273b2a41ef9d122ac38e769956a640 100644 (file)
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -4,7 +4,7 @@
  
  menuconfig BRIDGE_NF_EBTABLES
         tristate "Ethernet Bridge tables (ebtables) support"
-       depends on BRIDGE && BRIDGE_NETFILTER
+       depends on BRIDGE && NETFILTER
         select NETFILTER_XTABLES
         help
           ebtables is a general, extensible frame/packet identification
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c

index 2b5ca1a0054da4aec5f6a17064e1040c4a107dda..5864cc49136993d9de12f4c0f9110ab2dbcf1025 100644 (file)
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1198,7 +1198,8 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table)
  
         if (table->check && table->check(newinfo, table->valid_hooks)) {
                 BUGPRINT("The table doesn't like its own initial data, lol\n");
-               return ERR_PTR(-EINVAL);
+               ret = -EINVAL;
+               goto free_chainstack;
         }
  
         table->private = newinfo;
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c

index 7c2fa0a0814848c399c64aa9f44fc4fa119fa28b..7f9ac0742d19cc71fa367ea83bd8a9a117adbd1d 100644 (file)
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -93,10 +93,14 @@ static struct caif_device_entry *caif_device_alloc(struct net_device *dev)
         caifdevs = caif_device_list(dev_net(dev));
         BUG_ON(!caifdevs);
  
-       caifd = kzalloc(sizeof(*caifd), GFP_ATOMIC);
+       caifd = kzalloc(sizeof(*caifd), GFP_KERNEL);
         if (!caifd)
                 return NULL;
         caifd->pcpu_refcnt = alloc_percpu(int);
+       if (!caifd->pcpu_refcnt) {
+               kfree(caifd);
+               return NULL;
+       }
         caifd->netdev = dev;
         dev_hold(dev);
         return caifd;
diff --git a/net/can/af_can.c b/net/can/af_can.c

index 8ce926d3b2cb9e51f053a8a06c271eb50ee481ea..9b0c32a2690c3f168eb78b5193b80fa12c72f53f 100644 (file)
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -857,7 +857,7 @@ static __exit void can_exit(void)
         struct net_device *dev;
  
         if (stats_timer)
-               del_timer(&can_stattimer);
+               del_timer_sync(&can_stattimer);
  
         can_remove_proc();
  
diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c

index d5f2d97ac05caf40124fe4f628efe77a514d8e55..1f4cb30a42c558e89cfa696af9efcacc4209c0d6 100644 (file)
--- a/net/ceph/msgpool.c
+++ b/net/ceph/msgpool.c
@@ -7,27 +7,37 @@
  
  #include <linux/ceph/msgpool.h>
  
-static void *alloc_fn(gfp_t gfp_mask, void *arg)
+static void *msgpool_alloc(gfp_t gfp_mask, void *arg)
  {
         struct ceph_msgpool *pool = arg;
-       void *p;
+       struct ceph_msg *msg;
  
-       p = ceph_msg_new(0, pool->front_len, gfp_mask);
-       if (!p)
-               pr_err("msgpool %s alloc failed\n", pool->name);
-       return p;
+       msg = ceph_msg_new(0, pool->front_len, gfp_mask);
+       if (!msg) {
+               dout("msgpool_alloc %s failed\n", pool->name);
+       } else {
+               dout("msgpool_alloc %s %p\n", pool->name, msg);
+               msg->pool = pool;
+       }
+       return msg;
  }
  
-static void free_fn(void *element, void *arg)
+static void msgpool_free(void *element, void *arg)
  {
-       ceph_msg_put(element);
+       struct ceph_msgpool *pool = arg;
+       struct ceph_msg *msg = element;
+
+       dout("msgpool_release %s %p\n", pool->name, msg);
+       msg->pool = NULL;
+       ceph_msg_put(msg);
  }
  
  int ceph_msgpool_init(struct ceph_msgpool *pool,
                       int front_len, int size, bool blocking, const char *name)
  {
+       dout("msgpool %s init\n", name);
         pool->front_len = front_len;
-       pool->pool = mempool_create(size, alloc_fn, free_fn, pool);
+       pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool);
         if (!pool->pool)
                 return -ENOMEM;
         pool->name = name;
@@ -36,14 +46,17 @@ int ceph_msgpool_init(struct ceph_msgpool *pool,
  
  void ceph_msgpool_destroy(struct ceph_msgpool *pool)
  {
+       dout("msgpool %s destroy\n", pool->name);
         mempool_destroy(pool->pool);
  }
  
  struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
                                   int front_len)
  {
+       struct ceph_msg *msg;
+
         if (front_len > pool->front_len) {
-               pr_err("msgpool_get pool %s need front %d, pool size is %d\n",
+               dout("msgpool_get %s need front %d, pool size is %d\n",
                        pool->name, front_len, pool->front_len);
                 WARN_ON(1);
  
@@ -51,14 +64,19 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
                 return ceph_msg_new(0, front_len, GFP_NOFS);
         }
  
-       return mempool_alloc(pool->pool, GFP_NOFS);
+       msg = mempool_alloc(pool->pool, GFP_NOFS);
+       dout("msgpool_get %s %p\n", pool->name, msg);
+       return msg;
  }
  
  void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg)
  {
+       dout("msgpool_put %s %p\n", pool->name, msg);
+
         /* reset msg front_len; user may have changed it */
         msg->front.iov_len = pool->front_len;
         msg->hdr.front_len = cpu_to_le32(pool->front_len);
  
         kref_init(&msg->kref);  /* retake single ref */
+       mempool_free(msg, pool->pool);
  }
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c

index ce310eee708d9f76c0a631b32edb6eb046cff153..16836a7df7a6364c5f6823975d559ff3b30106a2 100644 (file)
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -685,6 +685,18 @@ static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
         put_osd(osd);
  }
  
+static void remove_all_osds(struct ceph_osd_client *osdc)
+{
+       dout("__remove_old_osds %p\n", osdc);
+       mutex_lock(&osdc->request_mutex);
+       while (!RB_EMPTY_ROOT(&osdc->osds)) {
+               struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds),
+                                               struct ceph_osd, o_node);
+               __remove_osd(osdc, osd);
+       }
+       mutex_unlock(&osdc->request_mutex);
+}
+
  static void __move_osd_to_lru(struct ceph_osd_client *osdc,
                               struct ceph_osd *osd)
  {
@@ -701,14 +713,14 @@ static void __remove_osd_from_lru(struct ceph_osd *osd)
                 list_del_init(&osd->o_osd_lru);
  }
  
-static void remove_old_osds(struct ceph_osd_client *osdc, int remove_all)
+static void remove_old_osds(struct ceph_osd_client *osdc)
  {
         struct ceph_osd *osd, *nosd;
  
         dout("__remove_old_osds %p\n", osdc);
         mutex_lock(&osdc->request_mutex);
         list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) {
-               if (!remove_all && time_before(jiffies, osd->lru_ttl))
+               if (time_before(jiffies, osd->lru_ttl))
                         break;
                 __remove_osd(osdc, osd);
         }
@@ -751,6 +763,7 @@ static void __insert_osd(struct ceph_osd_client *osdc, struct ceph_osd *new)
         struct rb_node *parent = NULL;
         struct ceph_osd *osd = NULL;
  
+       dout("__insert_osd %p osd%d\n", new, new->o_osd);
         while (*p) {
                 parent = *p;
                 osd = rb_entry(parent, struct ceph_osd, o_node);
@@ -1144,7 +1157,7 @@ static void handle_osds_timeout(struct work_struct *work)
  
         dout("osds timeout\n");
         down_read(&osdc->map_sem);
-       remove_old_osds(osdc, 0);
+       remove_old_osds(osdc);
         up_read(&osdc->map_sem);
  
         schedule_delayed_work(&osdc->osds_timeout_work,
@@ -1862,8 +1875,7 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
                 ceph_osdmap_destroy(osdc->osdmap);
                 osdc->osdmap = NULL;
         }
-       remove_old_osds(osdc, 1);
-       WARN_ON(!RB_EMPTY_ROOT(&osdc->osds));
+       remove_all_osds(osdc);
         mempool_destroy(osdc->req_mempool);
         ceph_msgpool_destroy(&osdc->msgpool_op);
         ceph_msgpool_destroy(&osdc->msgpool_op_reply);
diff --git a/net/core/dev.c b/net/core/dev.c

index 17d67b579beb0c3f6f97bfd8add5d7027741854a..b10ff0a71855207ecf56fc815d2b08b4239693cd 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1515,6 +1515,14 @@ static inline bool is_skb_forwardable(struct net_device *dev,
   */
  int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
  {
+       if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+               if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
+                       atomic_long_inc(&dev->rx_dropped);
+                       kfree_skb(skb);
+                       return NET_RX_DROP;
+               }
+       }
+
         skb_orphan(skb);
         nf_reset(skb);
  
diff --git a/net/core/flow.c b/net/core/flow.c

index bf32c33cad3b03268016fa2fee1a6d2756d641d9..555a456efb076d016abe58827b93d05ab39a7465 100644 (file)
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -30,6 +30,7 @@ struct flow_cache_entry {
                 struct hlist_node       hlist;
                 struct list_head        gc_list;
         } u;
+       struct net                      *net;
         u16                             family;
         u8                              dir;
         u32                             genid;
@@ -172,29 +173,26 @@ static void flow_new_hash_rnd(struct flow_cache *fc,
  
  static u32 flow_hash_code(struct flow_cache *fc,
                           struct flow_cache_percpu *fcp,
-                         const struct flowi *key)
+                         const struct flowi *key,
+                         size_t keysize)
  {
         const u32 *k = (const u32 *) key;
+       const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32);
  
-       return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
+       return jhash2(k, length, fcp->hash_rnd)
                 & (flow_cache_hash_size(fc) - 1);
  }
  
-typedef unsigned long flow_compare_t;
-
  /* I hear what you're saying, use memcmp.  But memcmp cannot make
- * important assumptions that we can here, such as alignment and
- * constant size.
+ * important assumptions that we can here, such as alignment.
   */
-static int flow_key_compare(const struct flowi *key1, const struct flowi *key2)
+static int flow_key_compare(const struct flowi *key1, const struct flowi *key2,
+                           size_t keysize)
  {
         const flow_compare_t *k1, *k1_lim, *k2;
-       const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t);
-
-       BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t));
  
         k1 = (const flow_compare_t *) key1;
-       k1_lim = k1 + n_elem;
+       k1_lim = k1 + keysize;
  
         k2 = (const flow_compare_t *) key2;
  
@@ -215,6 +213,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
         struct flow_cache_entry *fle, *tfle;
         struct hlist_node *entry;
         struct flow_cache_object *flo;
+       size_t keysize;
         unsigned int hash;
  
         local_bh_disable();
@@ -222,6 +221,11 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
  
         fle = NULL;
         flo = NULL;
+
+       keysize = flow_key_size(family);
+       if (!keysize)
+               goto nocache;
+
         /* Packet really early in init?  Making flow_cache_init a
          * pre-smp initcall would solve this.  --RR */
         if (!fcp->hash_table)
@@ -230,11 +234,12 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
         if (fcp->hash_rnd_recalc)
                 flow_new_hash_rnd(fc, fcp);
  
-       hash = flow_hash_code(fc, fcp, key);
+       hash = flow_hash_code(fc, fcp, key, keysize);
         hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) {
-               if (tfle->family == family &&
+               if (tfle->net == net &&
+                   tfle->family == family &&
                     tfle->dir == dir &&
-                   flow_key_compare(key, &tfle->key) == 0) {
+                   flow_key_compare(key, &tfle->key, keysize) == 0) {
                         fle = tfle;
                         break;
                 }
@@ -246,9 +251,10 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
  
                 fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
                 if (fle) {
+                       fle->net = net;
                         fle->family = family;
                         fle->dir = dir;
-                       memcpy(&fle->key, key, sizeof(*key));
+                       memcpy(&fle->key, key, keysize * sizeof(flow_compare_t));
                         fle->object = NULL;
                         hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
                         fcp->hash_count++;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c

index 8fab9b0bb2036784e002a4b3bb0464db059bd5ad..1334d7e56f020d5fac869f528590842483b68178 100644 (file)
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1319,11 +1319,15 @@ static void neigh_proxy_process(unsigned long arg)
  
                 if (tdif <= 0) {
                         struct net_device *dev = skb->dev;
+
                         __skb_unlink(skb, &tbl->proxy_queue);
-                       if (tbl->proxy_redo && netif_running(dev))
+                       if (tbl->proxy_redo && netif_running(dev)) {
+                               rcu_read_lock();
                                 tbl->proxy_redo(skb);
-                       else
+                               rcu_read_unlock();
+                       } else {
                                 kfree_skb(skb);
+                       }
  
                         dev_put(dev);
                 } else if (!sched_next || tdif < sched_next)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c

index adf84dd8c7b5b44a215b7e79a3b3a838fe09b7d2..52622517e0d883c5315114ee16d9dd0ecf24eabf 100644 (file)
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -558,13 +558,14 @@ int __netpoll_rx(struct sk_buff *skb)
         if (skb_shared(skb))
                 goto out;
  
-       iph = (struct iphdr *)skb->data;
         if (!pskb_may_pull(skb, sizeof(struct iphdr)))
                 goto out;
+       iph = (struct iphdr *)skb->data;
         if (iph->ihl < 5 || iph->version != 4)
                 goto out;
         if (!pskb_may_pull(skb, iph->ihl*4))
                 goto out;
+       iph = (struct iphdr *)skb->data;
         if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
                 goto out;
  
@@ -579,6 +580,7 @@ int __netpoll_rx(struct sk_buff *skb)
         if (pskb_trim_rcsum(skb, len))
                 goto out;
  
+       iph = (struct iphdr *)skb->data;
         if (iph->protocol != IPPROTO_UDP)
                 goto out;
  
diff --git a/net/core/scm.c b/net/core/scm.c

index 4c1ef026d6955b8cadaf77ceee6538dd4b4e1800..811b53fb330e8a40e640db483965acfc8c55cc4a 100644 (file)
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -192,7 +192,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
                                         goto error;
  
                                 cred->uid = cred->euid = p->creds.uid;
-                               cred->gid = cred->egid = p->creds.uid;
+                               cred->gid = cred->egid = p->creds.gid;
                                 put_cred(p->cred);
                                 p->cred = cred;
                         }
diff --git a/net/core/skbuff.c b/net/core/skbuff.c

index 27002dffe7ed3ea4df4f9b8d77b4210615d668c2..387703f56fcef6784d0d449e38d4cd73cdf8ce3e 100644 (file)
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -611,8 +611,21 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
  }
  EXPORT_SYMBOL_GPL(skb_morph);
  
-/* skb frags copy userspace buffers to kernel */
-static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
+/*     skb_copy_ubufs  -       copy userspace skb frags buffers to kernel
+ *     @skb: the skb to modify
+ *     @gfp_mask: allocation priority
+ *
+ *     This must be called on SKBTX_DEV_ZEROCOPY skb.
+ *     It will copy all frags into kernel and drop the reference
+ *     to userspace pages.
+ *
+ *     If this function is called from an interrupt gfp_mask() must be
+ *     %GFP_ATOMIC.
+ *
+ *     Returns 0 on success or a negative error code on failure
+ *     to allocate kernel memory to copy to.
+ */
+int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
  {
         int i;
         int num_frags = skb_shinfo(skb)->nr_frags;
@@ -652,6 +665,8 @@ static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
                 skb_shinfo(skb)->frags[i - 1].page = head;
                 head = (struct page *)head->private;
         }
+
+       skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
         return 0;
  }
  
@@ -677,7 +692,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
         if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
                 if (skb_copy_ubufs(skb, gfp_mask))
                         return NULL;
-               skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
         }
  
         n = skb + 1;
@@ -803,7 +817,6 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
                                 n = NULL;
                                 goto out;
                         }
-                       skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
                 }
                 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                         skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
@@ -896,7 +909,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
                 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
                         if (skb_copy_ubufs(skb, gfp_mask))
                                 goto nofrags;
-                       skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY;
                 }
                 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
                         get_page(skb_shinfo(skb)->frags[i].page);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c

index 27997d35ebd3f405bf1df2a2d2d599bfe7fbbdb9..a2468363978ea820383c1da988a81b984ea435a5 100644 (file)
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -340,7 +340,7 @@ void ether_setup(struct net_device *dev)
         dev->addr_len           = ETH_ALEN;
         dev->tx_queue_len       = 1000; /* Ethernet wants good queues */
         dev->flags              = IFF_BROADCAST|IFF_MULTICAST;
-       dev->priv_flags         = IFF_TX_SKB_SHARING;
+       dev->priv_flags         |= IFF_TX_SKB_SHARING;
  
         memset(dev->broadcast, 0xFF, ETH_ALEN);
  
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c

index 1b745d412cf6eb8c0b2d424ab5e5c8cbbc439248..dd2b9478ddd1537e0a3cf812e755cfd5cda68e33 100644 (file)
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -466,8 +466,13 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                 goto out;
  
         if (addr->sin_family != AF_INET) {
+               /* Compatibility games : accept AF_UNSPEC (mapped to AF_INET)
+                * only if s_addr is INADDR_ANY.
+                */
                 err = -EAFNOSUPPORT;
-               goto out;
+               if (addr->sin_family != AF_UNSPEC ||
+                   addr->sin_addr.s_addr != htonl(INADDR_ANY))
+                       goto out;
         }
  
         chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c

index 33e2c35b74b770f526045776e355e01114477fd9..80106d89d548d841d5986533f749572eb75c3a43 100644 (file)
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -142,6 +142,14 @@ const struct fib_prop fib_props[RTN_MAX + 1] = {
  };
  
  /* Release a nexthop info record */
+static void free_fib_info_rcu(struct rcu_head *head)
+{
+       struct fib_info *fi = container_of(head, struct fib_info, rcu);
+
+       if (fi->fib_metrics != (u32 *) dst_default_metrics)
+               kfree(fi->fib_metrics);
+       kfree(fi);
+}
  
  void free_fib_info(struct fib_info *fi)
  {
@@ -156,7 +164,7 @@ void free_fib_info(struct fib_info *fi)
         } endfor_nexthops(fi);
         fib_info_cnt--;
         release_net(fi->fib_net);
-       kfree_rcu(fi, rcu);
+       call_rcu(&fi->rcu, free_fib_info_rcu);
  }
  
  void fib_release_info(struct fib_info *fi)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c

index 283c0a26e03f9ba2e3e0532638b6810fb4f7f349..d577199eabd5c3f179acd8e3d7f6602837fb8b5b 100644 (file)
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -767,7 +767,7 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
                         break;
                 for (i=0; i<nsrcs; i++) {
                         /* skip inactive filters */
-                       if (pmc->sfcount[MCAST_INCLUDE] ||
+                       if (psf->sf_count[MCAST_INCLUDE] ||
                             pmc->sfcount[MCAST_EXCLUDE] !=
                             psf->sf_count[MCAST_EXCLUDE])
                                 continue;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c

index 77d3eded665ad2e30c03f8e62ea30b29ad1b4bb9..8c6563361ab538cd49cae7594d9d67bdb9b0c7e9 100644 (file)
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -122,6 +122,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb)
         newskb->pkt_type = PACKET_LOOPBACK;
         newskb->ip_summed = CHECKSUM_UNNECESSARY;
         WARN_ON(!skb_dst(newskb));
+       skb_dst_force(newskb);
         netif_rx_ni(newskb);
         return 0;
  }
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c

index ab0c9efd1efabe7c003daef3f6d258ec38604817..8905e92f896a4a58404c297441ef4a9aa1d6f39a 100644 (file)
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1067,7 +1067,7 @@ EXPORT_SYMBOL(compat_ip_setsockopt);
   */
  
  static int do_ip_getsockopt(struct sock *sk, int level, int optname,
-                           char __user *optval, int __user *optlen)
+                           char __user *optval, int __user *optlen, unsigned flags)
  {
         struct inet_sock *inet = inet_sk(sk);
         int val;
@@ -1240,7 +1240,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
  
                 msg.msg_control = optval;
                 msg.msg_controllen = len;
-               msg.msg_flags = 0;
+               msg.msg_flags = flags;
  
                 if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
                         struct in_pktinfo info;
@@ -1294,7 +1294,7 @@ int ip_getsockopt(struct sock *sk, int level,
  {
         int err;
  
-       err = do_ip_getsockopt(sk, level, optname, optval, optlen);
+       err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
  #ifdef CONFIG_NETFILTER
         /* we need to exclude all possible ENOPROTOOPTs except default case */
         if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
@@ -1327,7 +1327,8 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
                 return compat_mc_getsockopt(sk, level, optname, optval, optlen,
                         ip_getsockopt);
  
-       err = do_ip_getsockopt(sk, level, optname, optval, optlen);
+       err = do_ip_getsockopt(sk, level, optname, optval, optlen,
+               MSG_CMSG_COMPAT);
  
  #ifdef CONFIG_NETFILTER
         /* we need to exclude all possible ENOPROTOOPTs except default case */
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c

index 2e97e3ec1eb72a25d5c6bd084f2c17f53e7e4d4d..929b27bdeb79829801b4be02f9371d6538a161e8 100644 (file)
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -18,17 +18,15 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
         struct rtable *rt;
         struct flowi4 fl4 = {};
         __be32 saddr = iph->saddr;
-       __u8 flags = 0;
+       __u8 flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0;
         unsigned int hh_len;
  
-       if (!skb->sk && addr_type != RTN_LOCAL) {
-               if (addr_type == RTN_UNSPEC)
-                       addr_type = inet_addr_type(net, saddr);
-               if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST)
-                       flags |= FLOWI_FLAG_ANYSRC;
-               else
-                       saddr = 0;
-       }
+       if (addr_type == RTN_UNSPEC)
+               addr_type = inet_addr_type(net, saddr);
+       if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST)
+               flags |= FLOWI_FLAG_ANYSRC;
+       else
+               saddr = 0;
  
         /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
          * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook.
@@ -38,7 +36,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
         fl4.flowi4_tos = RT_TOS(iph->tos);
         fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
         fl4.flowi4_mark = skb->mark;
-       fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : flags;
+       fl4.flowi4_flags = flags;
         rt = ip_route_output_key(net, &fl4);
         if (IS_ERR(rt))
                 return -1;
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c

index 5c9b9d963918aa538086239d2351afea574324cb..e59aabd0eae4f9a63559672914db90d5a54c97b6 100644 (file)
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -218,6 +218,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
         return skb;
  
  nlmsg_failure:
+       kfree_skb(skb);
         *errp = -EINVAL;
         printk(KERN_ERR "ip_queue: error creating packet message\n");
         return NULL;
@@ -313,7 +314,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
  {
         struct nf_queue_entry *entry;
  
-       if (vmsg->value > NF_MAX_VERDICT)
+       if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN)
                 return -EINVAL;
  
         entry = ipq_find_dequeue_entry(vmsg->id);
@@ -358,12 +359,9 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg,
                 break;
  
         case IPQM_VERDICT:
-               if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
-                       status = -EINVAL;
-               else
-                       status = ipq_set_verdict(&pmsg->msg.verdict,
-                                                len - sizeof(*pmsg));
-                       break;
+               status = ipq_set_verdict(&pmsg->msg.verdict,
+                                        len - sizeof(*pmsg));
+               break;
         default:
                 status = -EINVAL;
         }
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c

index b14ec7d03b6e70b9a5a1823261ac18cb40462035..4bfad5da94f4e879615cf0a0b0f9ba008fd53f85 100644 (file)
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -254,6 +254,8 @@ static const struct snmp_mib snmp4_net_list[] = {
         SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
         SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER),
         SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW),
+       SNMP_MIB_ITEM("TCPReqQFullDoCookies", LINUX_MIB_TCPREQQFULLDOCOOKIES),
+       SNMP_MIB_ITEM("TCPReqQFullDrop", LINUX_MIB_TCPREQQFULLDROP),
         SNMP_MIB_SENTINEL
  };
  
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c

index 1457acb39cec4ca0c7f34707383f97abead4bed8..61714bd5292512142aa373fed50d68398372d561 100644 (file)
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -563,7 +563,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
         flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
                            RT_SCOPE_UNIVERSE,
                            inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
-                          FLOWI_FLAG_CAN_SLEEP, daddr, saddr, 0, 0);
+                          inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP,
+                          daddr, saddr, 0, 0);
  
         if (!inet->hdrincl) {
                 err = raw_probe_proto_opt(&fl4, msg);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c

index e3dec1c9f09d2d406197cf8e6216a7a6743782d6..075212e41b839347dcaa04caca43ee49a914f88e 100644 (file)
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -722,7 +722,7 @@ static inline bool compare_hash_inputs(const struct rtable *rt1,
  {
         return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) |
                 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
-               (rt1->rt_iif ^ rt2->rt_iif)) == 0);
+               (rt1->rt_route_iif ^ rt2->rt_route_iif)) == 0);
  }
  
  static inline int compare_keys(struct rtable *rt1, struct rtable *rt2)
@@ -731,8 +731,8 @@ static inline int compare_keys(struct rtable *rt1, struct rtable *rt2)
                 ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) |
                 (rt1->rt_mark ^ rt2->rt_mark) |
                 (rt1->rt_key_tos ^ rt2->rt_key_tos) |
-               (rt1->rt_oif ^ rt2->rt_oif) |
-               (rt1->rt_iif ^ rt2->rt_iif)) == 0;
+               (rt1->rt_route_iif ^ rt2->rt_route_iif) |
+               (rt1->rt_oif ^ rt2->rt_oif)) == 0;
  }
  
  static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
@@ -2320,8 +2320,7 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
              rth = rcu_dereference(rth->dst.rt_next)) {
                 if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) |
                      ((__force u32)rth->rt_key_src ^ (__force u32)saddr) |
-                    (rth->rt_iif ^ iif) |
-                    rth->rt_oif |
+                    (rth->rt_route_iif ^ iif) |
                      (rth->rt_key_tos ^ tos)) == 0 &&
                     rth->rt_mark == skb->mark &&
                     net_eq(dev_net(rth->dst.dev), net) &&
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c

index 92bb9434b338d0064becad817e2aaf96f402870d..3bc5c8f7c71bba52e2a005d95566e52327ef7fa6 100644 (file)
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -276,7 +276,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
         int mss;
         struct rtable *rt;
         __u8 rcv_wscale;
-       bool ecn_ok;
+       bool ecn_ok = false;
  
         if (!sysctl_tcp_syncookies || !th->ack || th->rst)
                 goto out;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c

index ea0d2183df4bc7f862fdf25eff2c74483880f26a..21fab3edb92cde6bbbbb8aff46b09db948ca8b6f 100644 (file)
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1124,7 +1124,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,
                 return 0;
  
         /* ...Then it's D-SACK, and must reside below snd_una completely */
-       if (!after(end_seq, tp->snd_una))
+       if (after(end_seq, tp->snd_una))
                 return 0;
  
         if (!before(start_seq, tp->undo_marker))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c

index 1c12b8ec849dcff6b5363338a42ce00e06d9246a..c34f015139453245238e7bd5086c35d5b5a760f4 100644 (file)
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -808,20 +808,38 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
         kfree(inet_rsk(req)->opt);
  }
  
-static void syn_flood_warning(const struct sk_buff *skb)
+/*
+ * Return 1 if a syncookie should be sent
+ */
+int tcp_syn_flood_action(struct sock *sk,
+                        const struct sk_buff *skb,
+                        const char *proto)
  {
-       const char *msg;
+       const char *msg = "Dropping request";
+       int want_cookie = 0;
+       struct listen_sock *lopt;
+
+
  
  #ifdef CONFIG_SYN_COOKIES
-       if (sysctl_tcp_syncookies)
+       if (sysctl_tcp_syncookies) {
                 msg = "Sending cookies";
-       else
+               want_cookie = 1;
+               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
+       } else
  #endif
-               msg = "Dropping request";
+               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
  
-       pr_info("TCP: Possible SYN flooding on port %d. %s.\n",
-                               ntohs(tcp_hdr(skb)->dest), msg);
+       lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
+       if (!lopt->synflood_warned) {
+               lopt->synflood_warned = 1;
+               pr_info("%s: Possible SYN flooding on port %d. %s. "
+                       " Check SNMP counters.\n",
+                       proto, ntohs(tcp_hdr(skb)->dest), msg);
+       }
+       return want_cookie;
  }
+EXPORT_SYMBOL(tcp_syn_flood_action);
  
  /*
   * Save and compile IPv4 options into the request_sock if needed.
@@ -1235,11 +1253,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
         __be32 saddr = ip_hdr(skb)->saddr;
         __be32 daddr = ip_hdr(skb)->daddr;
         __u32 isn = TCP_SKB_CB(skb)->when;
-#ifdef CONFIG_SYN_COOKIES
         int want_cookie = 0;
-#else
-#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
-#endif
  
         /* Never answer to SYNs send to broadcast or multicast */
         if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
@@ -1250,14 +1264,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
          * evidently real one.
          */
         if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
-               if (net_ratelimit())
-                       syn_flood_warning(skb);
-#ifdef CONFIG_SYN_COOKIES
-               if (sysctl_tcp_syncookies) {
-                       want_cookie = 1;
-               } else
-#endif
-               goto drop;
+               want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
+               if (!want_cookie)
+                       goto drop;
         }
  
         /* Accept backlog is full. If we have already queued enough
@@ -1303,9 +1312,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
                 while (l-- > 0)
                         *c++ ^= *hash_location++;
  
-#ifdef CONFIG_SYN_COOKIES
                 want_cookie = 0;        /* not our kind of cookie */
-#endif
                 tmp_ext.cookie_out_never = 0; /* false */
                 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
         } else if (!tp->rx_opt.cookie_in_always) {
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c

index 9ef1831746efb70a1776791279e3c263f0be18d0..b46e9f88ce370fecdb84a84d570f43771bf4896c 100644 (file)
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -599,7 +599,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
         return 0;
  }
  
-int datagram_send_ctl(struct net *net,
+int datagram_send_ctl(struct net *net, struct sock *sk,
                       struct msghdr *msg, struct flowi6 *fl6,
                       struct ipv6_txoptions *opt,
                       int *hlimit, int *tclass, int *dontfrag)
@@ -658,7 +658,8 @@ int datagram_send_ctl(struct net *net,
  
                         if (addr_type != IPV6_ADDR_ANY) {
                                 int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
-                               if (!ipv6_chk_addr(net, &src_info->ipi6_addr,
+                               if (!inet_sk(sk)->transparent &&
+                                   !ipv6_chk_addr(net, &src_info->ipi6_addr,
                                                    strict ? dev : NULL, 0))
                                         err = -EINVAL;
                                 else
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c

index f3caf1b8d572770754b148848a30dbef38ff78ca..5430394501938dd4b288113162c060831f2b8db7 100644 (file)
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -322,8 +322,8 @@ static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned lo
  }
  
  static struct ip6_flowlabel *
-fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
-         int optlen, int *err_p)
+fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
+         char __user *optval, int optlen, int *err_p)
  {
         struct ip6_flowlabel *fl = NULL;
         int olen;
@@ -360,7 +360,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
                 msg.msg_control = (void*)(fl->opt+1);
                 memset(&flowi6, 0, sizeof(flowi6));
  
-               err = datagram_send_ctl(net, &msg, &flowi6, fl->opt, &junk,
+               err = datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, &junk,
                                         &junk, &junk);
                 if (err)
                         goto done;
@@ -528,7 +528,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
                 if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
                         return -EINVAL;
  
-               fl = fl_create(net, &freq, optval, optlen, &err);
+               fl = fl_create(net, sk, &freq, optval, optlen, &err);
                 if (fl == NULL)
                         return err;
                 sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c

index 9cb191ecaba8e7992834242364c745287157c317..2fbda5fc4cc48ffd5a1286b3a26f887148f9af38 100644 (file)
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -475,7 +475,7 @@ sticky_done:
                 msg.msg_controllen = optlen;
                 msg.msg_control = (void*)(opt+1);
  
-               retv = datagram_send_ctl(net, &msg, &fl6, opt, &junk, &junk,
+               retv = datagram_send_ctl(net, sk, &msg, &fl6, opt, &junk, &junk,
                                          &junk);
                 if (retv)
                         goto done;
@@ -913,7 +913,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
  }
  
  static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
-                   char __user *optval, int __user *optlen)
+                   char __user *optval, int __user *optlen, unsigned flags)
  {
         struct ipv6_pinfo *np = inet6_sk(sk);
         int len;
@@ -962,7 +962,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
  
                 msg.msg_control = optval;
                 msg.msg_controllen = len;
-               msg.msg_flags = 0;
+               msg.msg_flags = flags;
  
                 lock_sock(sk);
                 skb = np->pktoptions;
@@ -1222,7 +1222,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
         if(level != SOL_IPV6)
                 return -ENOPROTOOPT;
  
-       err = do_ipv6_getsockopt(sk, level, optname, optval, optlen);
+       err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, 0);
  #ifdef CONFIG_NETFILTER
         /* we need to exclude all possible ENOPROTOOPTs except default case */
         if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
@@ -1264,7 +1264,8 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
                 return compat_mc_getsockopt(sk, level, optname, optval, optlen,
                         ipv6_getsockopt);
  
-       err = do_ipv6_getsockopt(sk, level, optname, optval, optlen);
+       err = do_ipv6_getsockopt(sk, level, optname, optval, optlen,
+                                MSG_CMSG_COMPAT);
  #ifdef CONFIG_NETFILTER
         /* we need to exclude all possible ENOPROTOOPTs except default case */
         if (err == -ENOPROTOOPT && optname != IPV6_2292PKTOPTIONS) {
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c

index 3e6ebcdb4779761a5b35a362b9453d7017551f50..ee7839f4d6e3450c222f2b2677d3b9cf0709d21e 100644 (file)
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1059,7 +1059,7 @@ static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs,
                         break;
                 for (i=0; i<nsrcs; i++) {
                         /* skip inactive filters */
-                       if (pmc->mca_sfcount[MCAST_INCLUDE] ||
+                       if (psf->sf_count[MCAST_INCLUDE] ||
                             pmc->mca_sfcount[MCAST_EXCLUDE] !=
                             psf->sf_count[MCAST_EXCLUDE])
                                 continue;
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c

index 249394863284bcb2edcb3228183a953393df1a26..e63c3972a739eef92baef9897e6d4618670ec9b5 100644 (file)
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -218,6 +218,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
         return skb;
  
  nlmsg_failure:
+       kfree_skb(skb);
         *errp = -EINVAL;
         printk(KERN_ERR "ip6_queue: error creating packet message\n");
         return NULL;
@@ -313,7 +314,7 @@ ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
  {
         struct nf_queue_entry *entry;
  
-       if (vmsg->value > NF_MAX_VERDICT)
+       if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN)
                 return -EINVAL;
  
         entry = ipq_find_dequeue_entry(vmsg->id);
@@ -358,12 +359,9 @@ ipq_receive_peer(struct ipq_peer_msg *pmsg,
                 break;
  
         case IPQM_VERDICT:
-               if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
-                       status = -EINVAL;
-               else
-                       status = ipq_set_verdict(&pmsg->msg.verdict,
-                                                len - sizeof(*pmsg));
-                       break;
+               status = ipq_set_verdict(&pmsg->msg.verdict,
+                                        len - sizeof(*pmsg));
+               break;
         default:
                 status = -EINVAL;
         }
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c

index 6a79f3081bdb1b7922b4e36d472afbc46efb5361..343852e5c703d7c3fd028f083d9e42e6c91cc892 100644 (file)
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -817,8 +817,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
                 memset(opt, 0, sizeof(struct ipv6_txoptions));
                 opt->tot_len = sizeof(struct ipv6_txoptions);
  
-               err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit,
-                                       &tclass, &dontfrag);
+               err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
+                                       &hlimit, &tclass, &dontfrag);
                 if (err < 0) {
                         fl6_sock_release(flowlabel);
                         return err;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c

index 9e69eb0ec6dd48a96d92bcf59c2cd89ad5e8412a..1250f902067016d694ea59910778555ad68702bc 100644 (file)
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -104,6 +104,9 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
         struct inet_peer *peer;
         u32 *p = NULL;
  
+       if (!(rt->dst.flags & DST_HOST))
+               return NULL;
+
         if (!rt->rt6i_peer)
                 rt6_bind_peer(rt, 1);
  
@@ -252,6 +255,9 @@ static void ip6_dst_destroy(struct dst_entry *dst)
         struct inet6_dev *idev = rt->rt6i_idev;
         struct inet_peer *peer = rt->rt6i_peer;
  
+       if (!(rt->dst.flags & DST_HOST))
+               dst_destroy_metrics_generic(dst);
+
         if (idev != NULL) {
                 rt->rt6i_idev = NULL;
                 in6_dev_put(idev);
@@ -723,9 +729,7 @@ static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
                 }
  
-               rt->rt6i_dst.plen = 128;
                 rt->rt6i_flags |= RTF_CACHE;
-               rt->dst.flags |= DST_HOST;
  
  #ifdef CONFIG_IPV6_SUBTREES
                 if (rt->rt6i_src.plen && saddr) {
@@ -775,9 +779,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
  
         if (rt) {
-               rt->rt6i_dst.plen = 128;
                 rt->rt6i_flags |= RTF_CACHE;
-               rt->dst.flags |= DST_HOST;
                 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
         }
         return rt;
@@ -1078,12 +1080,15 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
                         neigh = NULL;
         }
  
-       rt->rt6i_idev     = idev;
+       rt->dst.flags |= DST_HOST;
+       rt->dst.output  = ip6_output;
         dst_set_neighbour(&rt->dst, neigh);
         atomic_set(&rt->dst.__refcnt, 1);
-       ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
-       rt->dst.output  = ip6_output;
+
+       ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
+       rt->rt6i_dst.plen = 128;
+       rt->rt6i_idev     = idev;
  
         spin_lock_bh(&icmp6_dst_lock);
         rt->dst.next = icmp6_dst_gc_list;
@@ -1261,6 +1266,14 @@ int ip6_route_add(struct fib6_config *cfg)
         if (rt->rt6i_dst.plen == 128)
                rt->dst.flags |= DST_HOST;
  
+       if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
+               u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
+               if (!metrics) {
+                       err = -ENOMEM;
+                       goto out;
+               }
+               dst_init_metrics(&rt->dst, metrics, 0);
+       }
  #ifdef CONFIG_IPV6_SUBTREES
         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
         rt->rt6i_src.plen = cfg->fc_src_len;
@@ -1607,9 +1620,6 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
         if (on_link)
                 nrt->rt6i_flags &= ~RTF_GATEWAY;
  
-       nrt->rt6i_dst.plen = 128;
-       nrt->dst.flags |= DST_HOST;
-
         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
         dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
  
@@ -1754,9 +1764,10 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
         if (rt) {
                 rt->dst.input = ort->dst.input;
                 rt->dst.output = ort->dst.output;
+               rt->dst.flags |= DST_HOST;
  
                 ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
-               rt->rt6i_dst.plen = ort->rt6i_dst.plen;
+               rt->rt6i_dst.plen = 128;
                 dst_copy_metrics(&rt->dst, &ort->dst);
                 rt->dst.error = ort->dst.error;
                 rt->rt6i_idev = ort->rt6i_idev;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c

index 07bf1085458f24ee1b8f9487c150158dd587675f..00b15ac7a70256f8bb780007cd960f725cbe8159 100644 (file)
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -672,6 +672,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
         if (skb->protocol != htons(ETH_P_IPV6))
                 goto tx_error;
  
+       if (tos == 1)
+               tos = ipv6_get_dsfield(iph6);
+
         /* ISATAP (RFC4214) - must come before 6to4 */
         if (dev->priv_flags & IFF_ISATAP) {
                 struct neighbour *neigh = NULL;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c

index 89d5bf8062223338cc1cbc548d53171b957734dc..ac838965ff34551693a44f758b5a2e8bcef79c74 100644 (file)
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -165,7 +165,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
         int mss;
         struct dst_entry *dst;
         __u8 rcv_wscale;
-       bool ecn_ok;
+       bool ecn_ok = false;
  
         if (!sysctl_tcp_syncookies || !th->ack || th->rst)
                 goto out;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c

index d1fb63f4aeb76351bd55d8c6bfa8cf0e8e20c60f..3c9fa618b69daae984931c171d4cb54b150c8166 100644 (file)
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -531,20 +531,6 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
         return tcp_v6_send_synack(sk, req, rvp);
  }
  
-static inline void syn_flood_warning(struct sk_buff *skb)
-{
-#ifdef CONFIG_SYN_COOKIES
-       if (sysctl_tcp_syncookies)
-               printk(KERN_INFO
-                      "TCPv6: Possible SYN flooding on port %d. "
-                      "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest));
-       else
-#endif
-               printk(KERN_INFO
-                      "TCPv6: Possible SYN flooding on port %d. "
-                      "Dropping request.\n", ntohs(tcp_hdr(skb)->dest));
-}
-
  static void tcp_v6_reqsk_destructor(struct request_sock *req)
  {
         kfree_skb(inet6_rsk(req)->pktopts);
@@ -1179,11 +1165,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
         struct tcp_sock *tp = tcp_sk(sk);
         __u32 isn = TCP_SKB_CB(skb)->when;
         struct dst_entry *dst = NULL;
-#ifdef CONFIG_SYN_COOKIES
         int want_cookie = 0;
-#else
-#define want_cookie 0
-#endif
  
         if (skb->protocol == htons(ETH_P_IP))
                 return tcp_v4_conn_request(sk, skb);
@@ -1192,14 +1174,9 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
                 goto drop;
  
         if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
-               if (net_ratelimit())
-                       syn_flood_warning(skb);
-#ifdef CONFIG_SYN_COOKIES
-               if (sysctl_tcp_syncookies)
-                       want_cookie = 1;
-               else
-#endif
-               goto drop;
+               want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
+               if (!want_cookie)
+                       goto drop;
         }
  
         if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
@@ -1249,9 +1226,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
                 while (l-- > 0)
                         *c++ ^= *hash_location++;
  
-#ifdef CONFIG_SYN_COOKIES
                 want_cookie = 0;        /* not our kind of cookie */
-#endif
                 tmp_ext.cookie_out_never = 0; /* false */
                 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
         } else if (!tp->rx_opt.cookie_in_always) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c

index 29213b51c4998e09a79f60fd534015269da54570..bb95e8e1c6f9b29c7cf21a549cddbb852f03db5c 100644 (file)
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1090,8 +1090,8 @@ do_udp_sendmsg:
                 memset(opt, 0, sizeof(struct ipv6_txoptions));
                 opt->tot_len = sizeof(*opt);
  
-               err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit,
-                                       &tclass, &dontfrag);
+               err = datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt,
+                                       &hlimit, &tclass, &dontfrag);
                 if (err < 0) {
                         fl6_sock_release(flowlabel);
                         return err;
diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c

index d0b70dadf73bee8ad5160329873e37228e7d18de..2615ffc8e785df65ca5bbba9a7a883881b607316 100644 (file)
--- a/net/irda/irsysctl.c
+++ b/net/irda/irsysctl.c
@@ -40,9 +40,9 @@ extern int  sysctl_slot_timeout;
  extern int  sysctl_fast_poll_increase;
  extern char sysctl_devname[];
  extern int  sysctl_max_baud_rate;
-extern int  sysctl_min_tx_turn_time;
-extern int  sysctl_max_tx_data_size;
-extern int  sysctl_max_tx_window;
+extern unsigned int sysctl_min_tx_turn_time;
+extern unsigned int sysctl_max_tx_data_size;
+extern unsigned int sysctl_max_tx_window;
  extern int  sysctl_max_noreply_time;
  extern int  sysctl_warn_noreply_time;
  extern int  sysctl_lap_keepalive_time;
diff --git a/net/irda/qos.c b/net/irda/qos.c

index 1b51bcf423945bc036022465e51ebaffed7a10f1..4369f7f41bcbc7065a7920cf6bb3fccf69588c77 100644 (file)
--- a/net/irda/qos.c
+++ b/net/irda/qos.c
@@ -60,7 +60,7 @@ int sysctl_max_noreply_time = 12;
   * Default is 10us which means using the unmodified value given by the
   * peer except if it's 0 (0 is likely a bug in the other stack).
   */
-unsigned sysctl_min_tx_turn_time = 10;
+unsigned int sysctl_min_tx_turn_time = 10;
  /*
   * Maximum data size to be used in transmission in payload of LAP frame.
   * There is a bit of confusion in the IrDA spec :
@@ -75,13 +75,13 @@ unsigned sysctl_min_tx_turn_time = 10;
   * bytes frames or all negotiated frame sizes, but you can use the sysctl
   * to play with this value anyway.
   * Jean II */
-unsigned sysctl_max_tx_data_size = 2042;
+unsigned int sysctl_max_tx_data_size = 2042;
  /*
   * Maximum transmit window, i.e. number of LAP frames between turn-around.
   * This allow to override what the peer told us. Some peers are buggy and
   * don't always support what they tell us.
   * Jean II */
-unsigned sysctl_max_tx_window = 7;
+unsigned int sysctl_max_tx_window = 7;
  
  static int irlap_param_baud_rate(void *instance, irda_param_t *param, int get);
  static int irlap_param_link_disconnect(void *instance, irda_param_t *parm,
diff --git a/net/mac80211/main.c b/net/mac80211/main.c

index 866f269183cf9a1532f317e3b9f52bc437a98213..acb44230b2512996ff754126f617942dca572683 100644 (file)
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1012,7 +1012,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
         cancel_work_sync(&local->reconfig_filter);
  
         ieee80211_clear_tx_pending(local);
-       sta_info_stop(local);
         rate_control_deinitialize(local);
  
         if (skb_queue_len(&local->skb_queue) ||
@@ -1024,6 +1023,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
  
         destroy_workqueue(local->workqueue);
         wiphy_unregister(local->hw.wiphy);
+       sta_info_stop(local);
         ieee80211_wep_free(local);
         ieee80211_led_exit(local);
         kfree(local->int_scan_req);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c

index 3db78b696c5ce4d6c844a4c0e81e4d3d7e32059a..21070e9bc8d0584ec65214e2ec3b3e40396833a3 100644 (file)
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -665,7 +665,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
                 BUG_ON(!sdata->bss);
  
                 atomic_dec(&sdata->bss->num_sta_ps);
-               __sta_info_clear_tim_bit(sdata->bss, sta);
+               sta_info_clear_tim_bit(sta);
         }
  
         local->num_sta--;
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c

index 2fd4565144defa5221bef7e458253de3d7df787f..31d56b23b9e9e743595cc721781bf215788ddd67 100644 (file)
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -364,6 +364,7 @@ pptp_inbound_pkt(struct sk_buff *skb,
                 break;
  
         case PPTP_WAN_ERROR_NOTIFY:
+       case PPTP_SET_LINK_INFO:
         case PPTP_ECHO_REQUEST:
         case PPTP_ECHO_REPLY:
                 /* I don't have to explain these ;) */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c

index 37bf94394be0d04a3285c0761e804225ef9c2e48..8235b86b4e87efdccf73d0f552f0a1a01a9f3763 100644 (file)
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -409,7 +409,7 @@ static void tcp_options(const struct sk_buff *skb,
                         if (opsize < 2) /* "silly options" */
                                 return;
                         if (opsize > length)
-                               break;  /* don't parse partial options */
+                               return; /* don't parse partial options */
  
                         if (opcode == TCPOPT_SACK_PERM
                             && opsize == TCPOLEN_SACK_PERM)
@@ -447,7 +447,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
         BUG_ON(ptr == NULL);
  
         /* Fast path for timestamp-only option */
-       if (length == TCPOLEN_TSTAMP_ALIGNED*4
+       if (length == TCPOLEN_TSTAMP_ALIGNED
             && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
                                        | (TCPOPT_NOP << 16)
                                        | (TCPOPT_TIMESTAMP << 8)
@@ -469,7 +469,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
                         if (opsize < 2) /* "silly options" */
                                 return;
                         if (opsize > length)
-                               break;  /* don't parse partial options */
+                               return; /* don't parse partial options */
  
                         if (opcode == TCPOPT_SACK
                             && opsize >= (TCPOLEN_SACK_BASE
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c

index 5b466cd1272feaf0d9b66caa42a542c8370bd1bf..84d0fd47636a19cbc5c16b0f3847f131e58ed876 100644 (file)
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -312,6 +312,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
                 }
                 break;
         case NF_STOLEN:
+               break;
         default:
                 kfree_skb(skb);
         }
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c

index 00bd475eab4b615ca48946c05b18566325ced03a..a80b0cb03f17adde8f8a7a5a0ae6279aace2d5d0 100644 (file)
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -646,8 +646,8 @@ verdicthdr_get(const struct nlattr * const nfqa[])
                 return NULL;
  
         vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]);
-       verdict = ntohl(vhdr->verdict);
-       if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT)
+       verdict = ntohl(vhdr->verdict) & NF_VERDICT_MASK;
+       if (verdict > NF_MAX_VERDICT || verdict == NF_STOLEN)
                 return NULL;
         return vhdr;
  }
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c

index 76a083184d8e6991b0e49e4fb77db3e78d47488b..ed0db15ab00e3c4f07f5931e27bc1f657988934c 100644 (file)
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -78,7 +78,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
  {
         struct xt_rateest_match_info *info = par->matchinfo;
         struct xt_rateest *est1, *est2;
-       int ret = false;
+       int ret = -EINVAL;
  
         if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS |
                                      XT_RATEEST_MATCH_REL)) != 1)
@@ -101,13 +101,12 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
         if (!est1)
                 goto err1;
  
+       est2 = NULL;
         if (info->flags & XT_RATEEST_MATCH_REL) {
                 est2 = xt_rateest_lookup(info->name2);
                 if (!est2)
                         goto err2;
-       } else
-               est2 = NULL;
-
+       }
  
         info->est1 = est1;
         info->est2 = est2;
@@ -116,7 +115,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
  err2:
         xt_rateest_put(est1);
  err1:
-       return -EINVAL;
+       return ret;
  }
  
  static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c

index 58107d06084614c6281a48f1d4c835038eed323e..9c24de10a6579b78e452e47912c56e5d894e58ea 100644 (file)
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -341,11 +341,11 @@ int netlbl_cfg_cipsov4_map_add(u32 doi,
  
         entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
         if (entry == NULL)
-               return -ENOMEM;
+               goto out_entry;
         if (domain != NULL) {
                 entry->domain = kstrdup(domain, GFP_ATOMIC);
                 if (entry->domain == NULL)
-                       goto cfg_cipsov4_map_add_failure;
+                       goto out_domain;
         }
  
         if (addr == NULL && mask == NULL) {
@@ -354,13 +354,13 @@ int netlbl_cfg_cipsov4_map_add(u32 doi,
         } else if (addr != NULL && mask != NULL) {
                 addrmap = kzalloc(sizeof(*addrmap), GFP_ATOMIC);
                 if (addrmap == NULL)
-                       goto cfg_cipsov4_map_add_failure;
+                       goto out_addrmap;
                 INIT_LIST_HEAD(&addrmap->list4);
                 INIT_LIST_HEAD(&addrmap->list6);
  
                 addrinfo = kzalloc(sizeof(*addrinfo), GFP_ATOMIC);
                 if (addrinfo == NULL)
-                       goto cfg_cipsov4_map_add_failure;
+                       goto out_addrinfo;
                 addrinfo->type_def.cipsov4 = doi_def;
                 addrinfo->type = NETLBL_NLTYPE_CIPSOV4;
                 addrinfo->list.addr = addr->s_addr & mask->s_addr;
@@ -374,7 +374,7 @@ int netlbl_cfg_cipsov4_map_add(u32 doi,
                 entry->type = NETLBL_NLTYPE_ADDRSELECT;
         } else {
                 ret_val = -EINVAL;
-               goto cfg_cipsov4_map_add_failure;
+               goto out_addrmap;
         }
  
         ret_val = netlbl_domhsh_add(entry, audit_info);
@@ -384,11 +384,15 @@ int netlbl_cfg_cipsov4_map_add(u32 doi,
         return 0;
  
  cfg_cipsov4_map_add_failure:
-       cipso_v4_doi_putdef(doi_def);
+       kfree(addrinfo);
+out_addrinfo:
+       kfree(addrmap);
+out_addrmap:
         kfree(entry->domain);
+out_domain:
         kfree(entry);
-       kfree(addrmap);
-       kfree(addrinfo);
+out_entry:
+       cipso_v4_doi_putdef(doi_def);
         return ret_val;
  }
  
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c

index 102fc212cd64093726dffa311ba41716d2cc0250..e051398fdf6baf4834bbe21bf13deac081594148 100644 (file)
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -196,8 +196,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
  
         skb2->skb_iif = skb->dev->ifindex;
         skb2->dev = dev;
-       dev_queue_xmit(skb2);
-       err = 0;
+       err = dev_queue_xmit(skb2);
  
  out:
         if (err) {
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h

index be4505ee67a9f90dbaa5a8ad951ea9f220b27f13..b01427924f8112dd0b923287976f285fd7674fbf 100644 (file)
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -425,7 +425,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
         struct rsvp_filter *f, **fp;
         struct rsvp_session *s, **sp;
         struct tc_rsvp_pinfo *pinfo = NULL;
-       struct nlattr *opt = tca[TCA_OPTIONS-1];
+       struct nlattr *opt = tca[TCA_OPTIONS];
         struct nlattr *tb[TCA_RSVP_MAX + 1];
         struct tcf_exts e;
         unsigned int h1, h2;
@@ -439,7 +439,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
         if (err < 0)
                 return err;
  
-       err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
+       err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
         if (err < 0)
                 return err;
  
@@ -449,8 +449,8 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
  
                 if (f->handle != handle && handle)
                         goto errout2;
-               if (tb[TCA_RSVP_CLASSID-1]) {
-                       f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+               if (tb[TCA_RSVP_CLASSID]) {
+                       f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
                         tcf_bind_filter(tp, &f->res, base);
                 }
  
@@ -462,7 +462,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
         err = -EINVAL;
         if (handle)
                 goto errout2;
-       if (tb[TCA_RSVP_DST-1] == NULL)
+       if (tb[TCA_RSVP_DST] == NULL)
                 goto errout2;
  
         err = -ENOBUFS;
@@ -471,19 +471,19 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
                 goto errout2;
  
         h2 = 16;
-       if (tb[TCA_RSVP_SRC-1]) {
-               memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
+       if (tb[TCA_RSVP_SRC]) {
+               memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
                 h2 = hash_src(f->src);
         }
-       if (tb[TCA_RSVP_PINFO-1]) {
-               pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
+       if (tb[TCA_RSVP_PINFO]) {
+               pinfo = nla_data(tb[TCA_RSVP_PINFO]);
                 f->spi = pinfo->spi;
                 f->tunnelhdr = pinfo->tunnelhdr;
         }
-       if (tb[TCA_RSVP_CLASSID-1])
-               f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+       if (tb[TCA_RSVP_CLASSID])
+               f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
  
-       dst = nla_data(tb[TCA_RSVP_DST-1]);
+       dst = nla_data(tb[TCA_RSVP_DST]);
         h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
  
         err = -ENOMEM;
@@ -642,8 +642,7 @@ nla_put_failure:
         return -1;
  }
  
-static struct tcf_proto_ops RSVP_OPS = {
-       .next           =       NULL,
+static struct tcf_proto_ops RSVP_OPS __read_mostly = {
         .kind           =       RSVP_ID,
         .classify       =       rsvp_classify,
         .init           =       rsvp_init,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c

index 2a318f2dc3e532a90149a640d60848d10160a52b..b5d56a22b1d20f2605771ed8360a75f5d2f8ce75 100644 (file)
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -112,7 +112,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc *sch)
  
         for (prio = 0; prio < q->bands; prio++) {
                 struct Qdisc *qdisc = q->queues[prio];
-               struct sk_buff *skb = qdisc->dequeue(qdisc);
+               struct sk_buff *skb = qdisc_dequeue_peeked(qdisc);
                 if (skb) {
                         qdisc_bstats_update(sch, skb);
                         sch->q.qlen--;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c

index 167c880cf8da45240932d6fce85f43e9f2c71ddb..76388b083f283a2e2c62ab3f605ee8a12469454f 100644 (file)
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1689,6 +1689,11 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
                 case SCTP_CMD_PURGE_ASCONF_QUEUE:
                         sctp_asconf_queue_teardown(asoc);
                         break;
+
+               case SCTP_CMD_SET_ASOC:
+                       asoc = cmd->obj.asoc;
+                       break;
+
                 default:
                         pr_warn("Impossible command: %u, %p\n",
                                 cmd->verb, cmd->obj.ptr);
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c

index 49b847b00f9956cc1eea2d5077bdcbff1b3a3285..a0f31e6c1c63c1c44e0c3a27a62b0a36f6dc55b4 100644 (file)
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -2047,6 +2047,12 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
         sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
         sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
  
+       /* Restore association pointer to provide SCTP command interpeter
+        * with a valid context in case it needs to manipulate
+        * the queues */
+       sctp_add_cmd_sf(commands, SCTP_CMD_SET_ASOC,
+                        SCTP_ASOC((struct sctp_association *)asoc));
+
         return retval;
  
  nomem:
diff --git a/net/socket.c b/net/socket.c

index 24a77400b65e4e8fd63f825144bdc4891b5fced9..ffe92ca32f2acc031605e69d13614b6c8374bed3 100644 (file)
--- a/net/socket.c
+++ b/net/socket.c
@@ -1965,8 +1965,9 @@ static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
          * used_address->name_len is initialized to UINT_MAX so that the first
          * destination address never matches.
          */
-       if (used_address && used_address->name_len == msg_sys->msg_namelen &&
-           !memcmp(&used_address->name, msg->msg_name,
+       if (used_address && msg_sys->msg_name &&
+           used_address->name_len == msg_sys->msg_namelen &&
+           !memcmp(&used_address->name, msg_sys->msg_name,
                     used_address->name_len)) {
                 err = sock_sendmsg_nosec(sock, msg_sys, total_len);
                 goto out_freectl;
@@ -1978,8 +1979,9 @@ static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
          */
         if (used_address && err >= 0) {
                 used_address->name_len = msg_sys->msg_namelen;
-               memcpy(&used_address->name, msg->msg_name,
-                      used_address->name_len);
+               if (msg_sys->msg_name)
+                       memcpy(&used_address->name, msg_sys->msg_name,
+                              used_address->name_len);
         }
  
  out_freectl:
diff --git a/net/wireless/core.c b/net/wireless/core.c

index 645437cfc464d505a3673870f8c0a6eb73287535..c14865172da70d0fd04918f503d0714042ec5e64 100644 (file)
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -616,6 +616,9 @@ int wiphy_register(struct wiphy *wiphy)
         if (res)
                 goto out_rm_dev;
  
+       rtnl_lock();
+       rdev->wiphy.registered = true;
+       rtnl_unlock();
         return 0;
  
  out_rm_dev:
@@ -647,6 +650,10 @@ void wiphy_unregister(struct wiphy *wiphy)
  {
         struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
  
+       rtnl_lock();
+       rdev->wiphy.registered = false;
+       rtnl_unlock();
+
         rfkill_unregister(rdev->rfkill);
  
         /* protect the device list */
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c

index c6e4ca6a7d2e0b1fcc6e450b1255a0140ae64cff..ff574597a8544030ef18b2e2186354ef46511bc3 100644 (file)
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -93,7 +93,8 @@ static int wiphy_suspend(struct device *dev, pm_message_t state)
  
         if (rdev->ops->suspend) {
                 rtnl_lock();
-               ret = rdev->ops->suspend(&rdev->wiphy, rdev->wowlan);
+               if (rdev->wiphy.registered)
+                       ret = rdev->ops->suspend(&rdev->wiphy, rdev->wowlan);
                 rtnl_unlock();
         }
  
@@ -112,7 +113,8 @@ static int wiphy_resume(struct device *dev)
  
         if (rdev->ops->resume) {
                 rtnl_lock();
-               ret = rdev->ops->resume(&rdev->wiphy);
+               if (rdev->wiphy.registered)
+                       ret = rdev->ops->resume(&rdev->wiphy);
                 rtnl_unlock();
         }
  
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl

index 9d761c95eca2988e0ff227baa8619b684d6eb33a..3dfc47134e51e9cbee67aab2490360927ff7c0f4 100755 (executable)
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2574,7 +2574,8 @@ sub process {
                                 } else {
                                         $cast = $cast2;
                                 }
-                               WARN("$call() should probably be ${call}_t($cast, $arg1, $arg2)\n" . $herecurr);
+                               WARN("MINMAX",
+                                    "$call() should probably be ${call}_t($cast, $arg1, $arg2)\n" . $herecurr);
                         }
                 }
  
diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl

index eb2f1e64edf79b63069fc53399ce71d759ae8795..4594f334105110fefba64883186a2cd546641a82 100755 (executable)
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -1389,7 +1389,7 @@ sub vcs_exists {
         warn("$P: No supported VCS found.  Add --nogit to options?\n");
         warn("Using a git repository produces better results.\n");
         warn("Try Linus Torvalds' latest git repository using:\n");
-       warn("git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git\n");
+       warn("git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git\n");
         $printed_novcs = 1;
      }
      return 0;
diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c

index c8439cf2a448869820cc5dbf175c45956d9d7698..2e43aec1c36b04b6335b83d9942ada9d499c5ff8 100644 (file)
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -710,8 +710,10 @@ static void tomoyo_read_profile(struct tomoyo_io_buffer *head)
                       head->r.index++)
                         if (ns->profile_ptr[head->r.index])
                                 break;
-               if (head->r.index == TOMOYO_MAX_PROFILES)
+               if (head->r.index == TOMOYO_MAX_PROFILES) {
+                       head->r.eof = true;
                         return;
+               }
                 head->r.step++;
                 break;
         case 2:
@@ -723,6 +725,7 @@ static void tomoyo_read_profile(struct tomoyo_io_buffer *head)
                         tomoyo_io_printf(head, "%u-COMMENT=", index);
                         tomoyo_set_string(head, comment ? comment->name : "");
                         tomoyo_set_lf(head);
+                       tomoyo_print_namespace(head);
                         tomoyo_io_printf(head, "%u-PREFERENCE={ ", index);
                         for (i = 0; i < TOMOYO_MAX_PREF; i++)
                                 tomoyo_io_printf(head, "%s=%u ",
diff --git a/tools/perf/Makefile b/tools/perf/Makefile

index 56d62d3fb16715525446a8ce900a2021e06aa9b3..3b8f7b80376b25f31a25eff483393f8248981dd3 100644 (file)
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -181,9 +181,9 @@ strip-libs = $(filter-out -l%,$(1))
  
  $(OUTPUT)python/perf.so: $(PYRF_OBJS)
         $(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \
-         --quiet build_ext \
-         --build-lib='$(OUTPUT)python' \
-         --build-temp='$(OUTPUT)python/temp'
+         --quiet build_ext; \
+       mkdir -p $(OUTPUT)python && \
+       cp $(PYTHON_EXTBUILD_LIB)perf.so $(OUTPUT)python/
  #
  # No Perl scripts right now:
  #
@@ -509,9 +509,13 @@ else
  
    PYTHON_WORD := $(call shell-wordify,$(PYTHON))
  
-  python-clean := $(PYTHON_WORD) util/setup.py clean \
-    --build-lib='$(OUTPUT)python' \
-    --build-temp='$(OUTPUT)python/temp'
+  # python extension build directories
+  PYTHON_EXTBUILD     := $(OUTPUT)python_ext_build/
+  PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
+  PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/
+  export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
+
+  python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
  
    ifdef NO_LIBPYTHON
      $(call disable-python)
@@ -868,6 +872,9 @@ install: all
         $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'
         $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
  
+install-python_ext:
+       $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)'
+
  install-doc:
         $(MAKE) -C Documentation install
  
@@ -895,7 +902,7 @@ quick-install-html:
  ### Cleaning rules
  
  clean:
-       $(RM) $(OUTPUT){*.o,*/*.o,*/*/*.o,*/*/*/*.o,$(LIB_FILE),perf-archive}
+       $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS)
         $(RM) $(ALL_PROGRAMS) perf
         $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope*
         $(MAKE) -C Documentation/ clean
diff --git a/tools/perf/arch/arm/util/dwarf-regs.c b/tools/perf/arch/arm/util/dwarf-regs.c

index fff6450c8c999b6c0ae37597a67abaed900dc023..e8d5c551c69c08d4a17e89a370d0e4caedacb101 100644 (file)
--- a/tools/perf/arch/arm/util/dwarf-regs.c
+++ b/tools/perf/arch/arm/util/dwarf-regs.c
@@ -8,7 +8,10 @@
   * published by the Free Software Foundation.
   */
  
+#include <stdlib.h>
+#ifndef __UCLIBC__
  #include <libio.h>
+#endif
  #include <dwarf-regs.h>
  
  struct pt_regs_dwarfnum {
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c

index 9ac05aafd9b2563e9625945e5dfa93dc98dfabad..899080ace26761c497935e1ad0480faa628bf7c9 100644 (file)
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -942,10 +942,10 @@ static const char *record_args[] = {
         "-f",
         "-m", "1024",
         "-c", "1",
-       "-e", "lock:lock_acquire:r",
-       "-e", "lock:lock_acquired:r",
-       "-e", "lock:lock_contended:r",
-       "-e", "lock:lock_release:r",
+       "-e", "lock:lock_acquire",
+       "-e", "lock:lock_acquired",
+       "-e", "lock:lock_contended",
+       "-e", "lock:lock_release",
  };
  
  static int __cmd_record(int argc, const char **argv)
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c

index 5f2a5c7046dfd6d07b973a454ddc28332786b4ec..710ae3d0a489ba9aac850cf9c28baff6b267d9b6 100644 (file)
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -134,10 +134,18 @@ static int opt_show_lines(const struct option *opt __used,
  {
         int ret = 0;
  
-       if (str)
-               ret = parse_line_range_desc(str, &params.line_range);
-       INIT_LIST_HEAD(&params.line_range.line_list);
+       if (!str)
+               return 0;
+
+       if (params.show_lines) {
+               pr_warning("Warning: more than one --line options are"
+                          " detected. Only the first one is valid.\n");
+               return 0;
+       }
+
         params.show_lines = true;
+       ret = parse_line_range_desc(str, &params.line_range);
+       INIT_LIST_HEAD(&params.line_range.line_list);
  
         return ret;
  }
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c

index 80dc5b790e4790c22b7c4f23fcbae6f45b757acb..6b0519f885e4ecc436e9344a3c2f94865eafbd92 100644 (file)
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -30,8 +30,6 @@
  #include <sched.h>
  #include <sys/mman.h>
  
-#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
-
  enum write_mode_t {
         WRITE_FORCE,
         WRITE_APPEND
@@ -47,7 +45,7 @@ static int                    freq                            =   1000;
  static int                     output;
  static int                     pipe_output                     =      0;
  static const char              *output_name                    = NULL;
-static int                     group                           =      0;
+static bool                    group                           =  false;
  static int                     realtime_prio                   =      0;
  static bool                    nodelay                         =  false;
  static bool                    raw_samples                     =  false;
@@ -438,7 +436,6 @@ static void mmap_read_all(void)
  
  static int __cmd_record(int argc, const char **argv)
  {
-       int i;
         struct stat st;
         int flags;
         int err;
@@ -682,7 +679,6 @@ static int __cmd_record(int argc, const char **argv)
  
         for (;;) {
                 int hits = samples;
-               int thread;
  
                 mmap_read_all();
  
@@ -693,19 +689,8 @@ static int __cmd_record(int argc, const char **argv)
                         waking++;
                 }
  
-               if (done) {
-                       for (i = 0; i < evsel_list->cpus->nr; i++) {
-                               struct perf_evsel *pos;
-
-                               list_for_each_entry(pos, &evsel_list->entries, node) {
-                                       for (thread = 0;
-                                               thread < evsel_list->threads->nr;
-                                               thread++)
-                                               ioctl(FD(pos, i, thread),
-                                                       PERF_EVENT_IOC_DISABLE);
-                               }
-                       }
-               }
+               if (done)
+                       perf_evlist__disable(evsel_list);
         }
  
         if (quiet || signr == SIGUSR1)
@@ -768,6 +753,8 @@ const struct option record_options[] = {
                     "child tasks do not inherit counters"),
         OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
         OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
+       OPT_BOOLEAN(0, "group", &group,
+                   "put the counters into a counter group"),
         OPT_BOOLEAN('g', "call-graph", &call_graph,
                     "do call-graph (stack chain/backtrace) recording"),
         OPT_INCR('v', "verbose", &verbose,
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c

index f854efda76869412210ede431f4844a5e386cd34..d7ff277bdb78ba1d4a06f5042f8e50ea8053795b 100644 (file)
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -162,23 +162,22 @@ static int perf_session__setup_sample_type(struct perf_session *self)
  {
         if (!(self->sample_type & PERF_SAMPLE_CALLCHAIN)) {
                 if (sort__has_parent) {
-                       fprintf(stderr, "selected --sort parent, but no"
-                                       " callchain data. Did you call"
-                                       " perf record without -g?\n");
+                       ui__warning("Selected --sort parent, but no "
+                                   "callchain data. Did you call "
+                                   "'perf record' without -g?\n");
                         return -EINVAL;
                 }
                 if (symbol_conf.use_callchain) {
-                       fprintf(stderr, "selected -g but no callchain data."
-                                       " Did you call perf record without"
-                                       " -g?\n");
+                       ui__warning("Selected -g but no callchain data. Did "
+                                   "you call 'perf record' without -g?\n");
                         return -1;
                 }
         } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE &&
                    !symbol_conf.use_callchain) {
                         symbol_conf.use_callchain = true;
                         if (callchain_register_param(&callchain_param) < 0) {
-                               fprintf(stderr, "Can't register callchain"
-                                               " params\n");
+                               ui__warning("Can't register callchain "
+                                           "params.\n");
                                 return -EINVAL;
                         }
         }
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c

index dcfe8873c9a1b1b0813373aa2ae9365c639a16e7..5177964943e72ecd96881daf3bd48b71c1937faa 100644 (file)
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1637,23 +1637,29 @@ static struct perf_event_ops event_ops = {
         .ordered_samples        = true,
  };
  
-static int read_events(void)
+static void read_events(bool destroy, struct perf_session **psession)
  {
         int err = -EINVAL;
         struct perf_session *session = perf_session__new(input_name, O_RDONLY,
                                                          0, false, &event_ops);
         if (session == NULL)
-               return -ENOMEM;
+               die("No Memory");
  
         if (perf_session__has_traces(session, "record -R")) {
                 err = perf_session__process_events(session, &event_ops);
+               if (err)
+                       die("Failed to process events, error %d", err);
+
                 nr_events      = session->hists.stats.nr_events[0];
                 nr_lost_events = session->hists.stats.total_lost;
                 nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST];
         }
  
-       perf_session__delete(session);
-       return err;
+       if (destroy)
+               perf_session__delete(session);
+
+       if (psession)
+               *psession = session;
  }
  
  static void print_bad_events(void)
@@ -1689,9 +1695,10 @@ static void print_bad_events(void)
  static void __cmd_lat(void)
  {
         struct rb_node *next;
+       struct perf_session *session;
  
         setup_pager();
-       read_events();
+       read_events(false, &session);
         sort_lat();
  
         printf("\n ---------------------------------------------------------------------------------------------------------------\n");
@@ -1717,6 +1724,7 @@ static void __cmd_lat(void)
         print_bad_events();
         printf("\n");
  
+       perf_session__delete(session);
  }
  
  static struct trace_sched_handler map_ops  = {
@@ -1731,7 +1739,7 @@ static void __cmd_map(void)
         max_cpu = sysconf(_SC_NPROCESSORS_CONF);
  
         setup_pager();
-       read_events();
+       read_events(true, NULL);
         print_bad_events();
  }
  
@@ -1744,7 +1752,7 @@ static void __cmd_replay(void)
  
         test_calibrations();
  
-       read_events();
+       read_events(true, NULL);
  
         printf("nr_run_events:        %ld\n", nr_run_events);
         printf("nr_sleep_events:      %ld\n", nr_sleep_events);
@@ -1769,7 +1777,7 @@ static void __cmd_replay(void)
  
  
  static const char * const sched_usage[] = {
-       "perf sched [<options>] {record|latency|map|replay|trace}",
+       "perf sched [<options>] {record|latency|map|replay|script}",
         NULL
  };
  
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c

index 1ad04ce29c3479828c8a91b2a13efbaaf871a3f2..5deb17d9e79506028921b512f734b4a6becc4f32 100644 (file)
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -193,6 +193,7 @@ static int                  big_num_opt                     =  -1;
  static const char              *cpu_list;
  static const char              *csv_sep                        = NULL;
  static bool                    csv_output                      = false;
+static bool                    group                           = false;
  
  static volatile int done = 0;
  
@@ -280,14 +281,14 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
         attr->inherit = !no_inherit;
  
         if (system_wide)
-               return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, false);
+               return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, group);
  
         if (target_pid == -1 && target_tid == -1) {
                 attr->disabled = 1;
                 attr->enable_on_exec = 1;
         }
  
-       return perf_evsel__open_per_thread(evsel, evsel_list->threads, false);
+       return perf_evsel__open_per_thread(evsel, evsel_list->threads, group);
  }
  
  /*
@@ -1043,6 +1044,8 @@ static const struct option options[] = {
                     "stat events on existing thread id"),
         OPT_BOOLEAN('a', "all-cpus", &system_wide,
                     "system-wide collection from all CPUs"),
+       OPT_BOOLEAN('g', "group", &group,
+                   "put the counters into a counter group"),
         OPT_BOOLEAN('c', "scale", &scale,
                     "scale/normalize counters"),
         OPT_INCR('v', "verbose", &verbose,
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c

index e02d78cae70f688fc0fc32957d078cf45796ed75..fe02903f7d0f0d11e0dacaf24d03672f7af1a79a 100644 (file)
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -399,7 +399,6 @@ static int perf_config_global(void)
  int perf_config(config_fn_t fn, void *data)
  {
         int ret = 0, found = 0;
-       char *repo_config = NULL;
         const char *home = NULL;
  
         /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
@@ -414,19 +413,32 @@ int perf_config(config_fn_t fn, void *data)
         home = getenv("HOME");
         if (perf_config_global() && home) {
                 char *user_config = strdup(mkpath("%s/.perfconfig", home));
-               if (!access(user_config, R_OK)) {
-                       ret += perf_config_from_file(fn, user_config, data);
-                       found += 1;
+               struct stat st;
+
+               if (user_config == NULL) {
+                       warning("Not enough memory to process %s/.perfconfig, "
+                               "ignoring it.", home);
+                       goto out;
                 }
-               free(user_config);
-       }
  
-       repo_config = perf_pathdup("config");
-       if (!access(repo_config, R_OK)) {
-               ret += perf_config_from_file(fn, repo_config, data);
+               if (stat(user_config, &st) < 0)
+                       goto out_free;
+
+               if (st.st_uid && (st.st_uid != geteuid())) {
+                       warning("File %s not owned by current user or root, "
+                               "ignoring it.", user_config);
+                       goto out_free;
+               }
+
+               if (!st.st_size)
+                       goto out_free;
+
+               ret += perf_config_from_file(fn, user_config, data);
                 found += 1;
+out_free:
+               free(user_config);
         }
-       free(repo_config);
+out:
         if (found == 0)
                 return -1;
         return ret;
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c

index fddf40f30d3ebc3670858946610687144c67a361..ee51e9b4dc0996709059723a07ff67fbf9f7da82 100644 (file)
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -96,6 +96,39 @@ int cu_find_lineinfo(Dwarf_Die *cu_die, unsigned long addr,
         return *lineno ?: -ENOENT;
  }
  
+static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data);
+
+/**
+ * cu_walk_functions_at - Walk on function DIEs at given address
+ * @cu_die: A CU DIE
+ * @addr: An address
+ * @callback: A callback which called with found DIEs
+ * @data: A user data
+ *
+ * Walk on function DIEs at given @addr in @cu_die. Passed DIEs
+ * should be subprogram or inlined-subroutines.
+ */
+int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
+                   int (*callback)(Dwarf_Die *, void *), void *data)
+{
+       Dwarf_Die die_mem;
+       Dwarf_Die *sc_die;
+       int ret = -ENOENT;
+
+       /* Inlined function could be recursive. Trace it until fail */
+       for (sc_die = die_find_realfunc(cu_die, addr, &die_mem);
+            sc_die != NULL;
+            sc_die = die_find_child(sc_die, __die_find_inline_cb, &addr,
+                                    &die_mem)) {
+               ret = callback(sc_die, data);
+               if (ret)
+                       break;
+       }
+
+       return ret;
+
+}
+
  /**
   * die_compare_name - Compare diename and tname
   * @dw_die: a DIE
@@ -198,6 +231,19 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
         return 0;
  }
  
+/* Get attribute and translate it as a sdata */
+static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name,
+                             Dwarf_Sword *result)
+{
+       Dwarf_Attribute attr;
+
+       if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
+           dwarf_formsdata(&attr, result) != 0)
+               return -ENOENT;
+
+       return 0;
+}
+
  /**
   * die_is_signed_type - Check whether a type DIE is signed or not
   * @tp_die: a DIE of a type
@@ -250,6 +296,50 @@ int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs)
         return 0;
  }
  
+/* Get the call file index number in CU DIE */
+static int die_get_call_fileno(Dwarf_Die *in_die)
+{
+       Dwarf_Sword idx;
+
+       if (die_get_attr_sdata(in_die, DW_AT_call_file, &idx) == 0)
+               return (int)idx;
+       else
+               return -ENOENT;
+}
+
+/* Get the declared file index number in CU DIE */
+static int die_get_decl_fileno(Dwarf_Die *pdie)
+{
+       Dwarf_Sword idx;
+
+       if (die_get_attr_sdata(pdie, DW_AT_decl_file, &idx) == 0)
+               return (int)idx;
+       else
+               return -ENOENT;
+}
+
+/**
+ * die_get_call_file - Get callsite file name of inlined function instance
+ * @in_die: a DIE of an inlined function instance
+ *
+ * Get call-site file name of @in_die. This means from which file the inline
+ * function is called.
+ */
+const char *die_get_call_file(Dwarf_Die *in_die)
+{
+       Dwarf_Die cu_die;
+       Dwarf_Files *files;
+       int idx;
+
+       idx = die_get_call_fileno(in_die);
+       if (idx < 0 || !dwarf_diecu(in_die, &cu_die, NULL, NULL) ||
+           dwarf_getsrcfiles(&cu_die, &files, NULL) != 0)
+               return NULL;
+
+       return dwarf_filesrc(files, idx, NULL, NULL);
+}
+
+
  /**
   * die_find_child - Generic DIE search function in DIE tree
   * @rt_die: a root DIE
@@ -374,9 +464,78 @@ Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
         return die_mem;
  }
  
+struct __instance_walk_param {
+       void    *addr;
+       int     (*callback)(Dwarf_Die *, void *);
+       void    *data;
+       int     retval;
+};
+
+static int __die_walk_instances_cb(Dwarf_Die *inst, void *data)
+{
+       struct __instance_walk_param *iwp = data;
+       Dwarf_Attribute attr_mem;
+       Dwarf_Die origin_mem;
+       Dwarf_Attribute *attr;
+       Dwarf_Die *origin;
+       int tmp;
+
+       attr = dwarf_attr(inst, DW_AT_abstract_origin, &attr_mem);
+       if (attr == NULL)
+               return DIE_FIND_CB_CONTINUE;
+
+       origin = dwarf_formref_die(attr, &origin_mem);
+       if (origin == NULL || origin->addr != iwp->addr)
+               return DIE_FIND_CB_CONTINUE;
+
+       /* Ignore redundant instances */
+       if (dwarf_tag(inst) == DW_TAG_inlined_subroutine) {
+               dwarf_decl_line(origin, &tmp);
+               if (die_get_call_lineno(inst) == tmp) {
+                       tmp = die_get_decl_fileno(origin);
+                       if (die_get_call_fileno(inst) == tmp)
+                               return DIE_FIND_CB_CONTINUE;
+               }
+       }
+
+       iwp->retval = iwp->callback(inst, iwp->data);
+
+       return (iwp->retval) ? DIE_FIND_CB_END : DIE_FIND_CB_CONTINUE;
+}
+
+/**
+ * die_walk_instances - Walk on instances of given DIE
+ * @or_die: an abstract original DIE
+ * @callback: a callback function which is called with instance DIE
+ * @data: user data
+ *
+ * Walk on the instances of give @in_die. @in_die must be an inlined function
+ * declartion. This returns the return value of @callback if it returns
+ * non-zero value, or -ENOENT if there is no instance.
+ */
+int die_walk_instances(Dwarf_Die *or_die, int (*callback)(Dwarf_Die *, void *),
+                      void *data)
+{
+       Dwarf_Die cu_die;
+       Dwarf_Die die_mem;
+       struct __instance_walk_param iwp = {
+               .addr = or_die->addr,
+               .callback = callback,
+               .data = data,
+               .retval = -ENOENT,
+       };
+
+       if (dwarf_diecu(or_die, &cu_die, NULL, NULL) == NULL)
+               return -ENOENT;
+
+       die_find_child(&cu_die, __die_walk_instances_cb, &iwp, &die_mem);
+
+       return iwp.retval;
+}
+
  /* Line walker internal parameters */
  struct __line_walk_param {
-       const char *fname;
+       bool recursive;
         line_walk_callback_t callback;
         void *data;
         int retval;
@@ -385,39 +544,56 @@ struct __line_walk_param {
  static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data)
  {
         struct __line_walk_param *lw = data;
-       Dwarf_Addr addr;
+       Dwarf_Addr addr = 0;
+       const char *fname;
         int lineno;
  
         if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) {
+               fname = die_get_call_file(in_die);
                 lineno = die_get_call_lineno(in_die);
-               if (lineno > 0 && dwarf_entrypc(in_die, &addr) == 0) {
-                       lw->retval = lw->callback(lw->fname, lineno, addr,
-                                                 lw->data);
+               if (fname && lineno > 0 && dwarf_entrypc(in_die, &addr) == 0) {
+                       lw->retval = lw->callback(fname, lineno, addr, lw->data);
                         if (lw->retval != 0)
                                 return DIE_FIND_CB_END;
                 }
         }
-       return DIE_FIND_CB_SIBLING;
+       if (!lw->recursive)
+               /* Don't need to search recursively */
+               return DIE_FIND_CB_SIBLING;
+
+       if (addr) {
+               fname = dwarf_decl_file(in_die);
+               if (fname && dwarf_decl_line(in_die, &lineno) == 0) {
+                       lw->retval = lw->callback(fname, lineno, addr, lw->data);
+                       if (lw->retval != 0)
+                               return DIE_FIND_CB_END;
+               }
+       }
+
+       /* Continue to search nested inlined function call-sites */
+       return DIE_FIND_CB_CONTINUE;
  }
  
  /* Walk on lines of blocks included in given DIE */
-static int __die_walk_funclines(Dwarf_Die *sp_die,
+static int __die_walk_funclines(Dwarf_Die *sp_die, bool recursive,
                                 line_walk_callback_t callback, void *data)
  {
         struct __line_walk_param lw = {
+               .recursive = recursive,
                 .callback = callback,
                 .data = data,
                 .retval = 0,
         };
         Dwarf_Die die_mem;
         Dwarf_Addr addr;
+       const char *fname;
         int lineno;
  
         /* Handle function declaration line */
-       lw.fname = dwarf_decl_file(sp_die);
-       if (lw.fname && dwarf_decl_line(sp_die, &lineno) == 0 &&
+       fname = dwarf_decl_file(sp_die);
+       if (fname && dwarf_decl_line(sp_die, &lineno) == 0 &&
             dwarf_entrypc(sp_die, &addr) == 0) {
-               lw.retval = callback(lw.fname, lineno, addr, data);
+               lw.retval = callback(fname, lineno, addr, data);
                 if (lw.retval != 0)
                         goto done;
         }
@@ -430,7 +606,7 @@ static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data)
  {
         struct __line_walk_param *lw = data;
  
-       lw->retval = __die_walk_funclines(sp_die, lw->callback, lw->data);
+       lw->retval = __die_walk_funclines(sp_die, true, lw->callback, lw->data);
         if (lw->retval != 0)
                 return DWARF_CB_ABORT;
  
@@ -439,7 +615,7 @@ static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data)
  
  /**
   * die_walk_lines - Walk on lines inside given DIE
- * @rt_die: a root DIE (CU or subprogram)
+ * @rt_die: a root DIE (CU, subprogram or inlined_subroutine)
   * @callback: callback routine
   * @data: user data
   *
@@ -460,12 +636,12 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data)
         size_t nlines, i;
  
         /* Get the CU die */
-       if (dwarf_tag(rt_die) == DW_TAG_subprogram)
+       if (dwarf_tag(rt_die) != DW_TAG_compile_unit)
                 cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL);
         else
                 cu_die = rt_die;
         if (!cu_die) {
-               pr_debug2("Failed to get CU from subprogram\n");
+               pr_debug2("Failed to get CU from given DIE.\n");
                 return -EINVAL;
         }
  
@@ -509,7 +685,11 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data)
          * subroutines. We have to check functions list or given function.
          */
         if (rt_die != cu_die)
-               ret = __die_walk_funclines(rt_die, callback, data);
+               /*
+                * Don't need walk functions recursively, because nested
+                * inlined functions don't have lines of the specified DIE.
+                */
+               ret = __die_walk_funclines(rt_die, false, callback, data);
         else {
                 struct __line_walk_param param = {
                         .callback = callback,
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h

index bc3b21167e709cd42b98ea46e51e8a7fe3795154..6ce1717784b7ab42a14d58431c3026df749f0620 100644 (file)
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -34,12 +34,19 @@ extern const char *cu_get_comp_dir(Dwarf_Die *cu_die);
  extern int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr,
                             const char **fname, int *lineno);
  
+/* Walk on funcitons at given address */
+extern int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
+                       int (*callback)(Dwarf_Die *, void *), void *data);
+
  /* Compare diename and tname */
  extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname);
  
  /* Get callsite line number of inline-function instance */
  extern int die_get_call_lineno(Dwarf_Die *in_die);
  
+/* Get callsite file name of inlined function instance */
+extern const char *die_get_call_file(Dwarf_Die *in_die);
+
  /* Get type die */
  extern Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
  
@@ -73,6 +80,10 @@ extern Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
  extern Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
                                       Dwarf_Die *die_mem);
  
+/* Walk on the instances of given DIE */
+extern int die_walk_instances(Dwarf_Die *in_die,
+                             int (*callback)(Dwarf_Die *, void *), void *data);
+
  /* Walker on lines (Note: line number will not be sorted) */
  typedef int (* line_walk_callback_t) (const char *fname, int lineno,
                                       Dwarf_Addr addr, void *data);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c

index b021ea9265c3e39b9f18fb9e65f731516b49642d..c12bd476c6f7b404436269f4963c937cd18a69e8 100644 (file)
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -85,10 +85,32 @@ int perf_evlist__add_default(struct perf_evlist *evlist)
         struct perf_evsel *evsel = perf_evsel__new(&attr, 0);
  
         if (evsel == NULL)
-               return -ENOMEM;
+               goto error;
+
+       /* use strdup() because free(evsel) assumes name is allocated */
+       evsel->name = strdup("cycles");
+       if (!evsel->name)
+               goto error_free;
  
         perf_evlist__add(evlist, evsel);
         return 0;
+error_free:
+       perf_evsel__delete(evsel);
+error:
+       return -ENOMEM;
+}
+
+void perf_evlist__disable(struct perf_evlist *evlist)
+{
+       int cpu, thread;
+       struct perf_evsel *pos;
+
+       for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
+               list_for_each_entry(pos, &evlist->entries, node) {
+                       for (thread = 0; thread < evlist->threads->nr; thread++)
+                               ioctl(FD(pos, cpu, thread), PERF_EVENT_IOC_DISABLE);
+               }
+       }
  }
  
  int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h

index b2b862374f371d363bc19ed3cc684fde1de9f6d3..ce85ae9ae57a1feb6e7482049afa5dce491c596b 100644 (file)
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -53,6 +53,8 @@ int perf_evlist__alloc_mmap(struct perf_evlist *evlist);
  int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite);
  void perf_evlist__munmap(struct perf_evlist *evlist);
  
+void perf_evlist__disable(struct perf_evlist *evlist);
+
  static inline void perf_evlist__set_maps(struct perf_evlist *evlist,
                                          struct cpu_map *cpus,
                                          struct thread_map *threads)
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c

index cb2959a3fb43e12f6e7de76c6a7fc6a04221f8b7..b6c1ad123ca9769cee8fd5845661382b7c09fdf2 100644 (file)
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -189,8 +189,8 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
                           const char *name, bool is_kallsyms)
  {
         const size_t size = PATH_MAX;
-       char *realname, *filename = malloc(size),
-            *linkname = malloc(size), *targetname;
+       char *realname, *filename = zalloc(size),
+            *linkname = zalloc(size), *targetname;
         int len, err = -1;
  
         if (is_kallsyms) {
@@ -254,8 +254,8 @@ static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size,
  int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir)
  {
         const size_t size = PATH_MAX;
-       char *filename = malloc(size),
-            *linkname = malloc(size);
+       char *filename = zalloc(size),
+            *linkname = zalloc(size);
         int err = -1;
  
         if (filename == NULL || linkname == NULL)
@@ -726,7 +726,16 @@ static int perf_header__read_build_ids_abi_quirk(struct perf_header *header,
                         return -1;
  
                 bev.header = old_bev.header;
-               bev.pid    = 0;
+
+               /*
+                * As the pid is the missing value, we need to fill
+                * it properly. The header.misc value give us nice hint.
+                */
+               bev.pid = HOST_KERNEL_ID;
+               if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER ||
+                   bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL)
+                       bev.pid = DEFAULT_GUEST_KERNEL_ID;
+
                 memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id));
                 __event_process_build_id(&bev, filename, session);
  
diff --git a/tools/perf/util/include/linux/compiler.h b/tools/perf/util/include/linux/compiler.h

index 791f9dd27ebffd3166f6535c629c0595d073b3db..547628e97f3d2ac4aacf85485f8e5d790575c042 100644 (file)
--- a/tools/perf/util/include/linux/compiler.h
+++ b/tools/perf/util/include/linux/compiler.h
@@ -5,7 +5,9 @@
  #define __always_inline        inline
  #endif
  #define __user
+#ifndef __attribute_const__
  #define __attribute_const__
+#endif
  
  #define __used         __attribute__((__unused__))
  
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c

index 4ea7e19f52515d61a1bd02ccac9e4942bad60dfa..928918b796b2bd355799cf25926d926fea002bad 100644 (file)
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -697,7 +697,11 @@ parse_raw_event(const char **strp, struct perf_event_attr *attr)
                 return EVT_FAILED;
         n = hex2u64(str + 1, &config);
         if (n > 0) {
-               *strp = str + n + 1;
+               const char *end = str + n + 1;
+               if (*end != '\0' && *end != ',' && *end != ':')
+                       return EVT_FAILED;
+
+               *strp = end;
                 attr->type = PERF_TYPE_RAW;
                 attr->config = config;
                 return EVT_HANDLED;
@@ -1097,6 +1101,4 @@ void print_events(const char *event_glob)
         printf("\n");
  
         print_tracepoint_events(NULL, NULL);
-
-       exit(129);
  }
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c

index b82d54fa2c566d420a32e2e675c873a6ce7c7720..1c7bfa5fe0a81052fa1718cf996ef80c21d2e96e 100644 (file)
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -1820,11 +1820,15 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
                 ret = -ENOMEM;
                 goto error;
         }
-       tev->point.module = strdup(module);
-       if (tev->point.module == NULL) {
-               ret = -ENOMEM;
-               goto error;
+
+       if (module) {
+               tev->point.module = strdup(module);
+               if (tev->point.module == NULL) {
+                       ret = -ENOMEM;
+                       goto error;
+               }
         }
+
         tev->point.offset = pev->point.offset;
         tev->point.retprobe = pev->point.retprobe;
         tev->nargs = pev->nargs;
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c

index 3e44a3e36519cb0f6d1ee708d88b90f467d38032..555fc3864b9024e9dc43a847ec0ba8d38f3bb065 100644 (file)
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -612,12 +612,12 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf)
         return ret;
  }
  
-/* Find a variable in a subprogram die */
-static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf)
+/* Find a variable in a scope DIE */
+static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf)
  {
-       Dwarf_Die vr_die, *scopes;
+       Dwarf_Die vr_die;
         char buf[32], *ptr;
-       int ret, nscopes;
+       int ret = 0;
  
         if (!is_c_varname(pf->pvar->var)) {
                 /* Copy raw parameters */
@@ -652,30 +652,16 @@ static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf)
         if (pf->tvar->name == NULL)
                 return -ENOMEM;
  
-       pr_debug("Searching '%s' variable in context.\n",
-                pf->pvar->var);
+       pr_debug("Searching '%s' variable in context.\n", pf->pvar->var);
         /* Search child die for local variables and parameters. */
-       if (die_find_variable_at(sp_die, pf->pvar->var, pf->addr, &vr_die))
-               ret = convert_variable(&vr_die, pf);
-       else {
-               /* Search upper class */
-               nscopes = dwarf_getscopes_die(sp_die, &scopes);
-               while (nscopes-- > 1) {
-                       pr_debug("Searching variables in %s\n",
-                                dwarf_diename(&scopes[nscopes]));
-                       /* We should check this scope, so give dummy address */
-                       if (die_find_variable_at(&scopes[nscopes],
-                                                pf->pvar->var, 0,
-                                                &vr_die)) {
-                               ret = convert_variable(&vr_die, pf);
-                               goto found;
-                       }
-               }
-               if (scopes)
-                       free(scopes);
-               ret = -ENOENT;
+       if (!die_find_variable_at(sc_die, pf->pvar->var, pf->addr, &vr_die)) {
+               /* Search again in global variables */
+               if (!die_find_variable_at(&pf->cu_die, pf->pvar->var, 0, &vr_die))
+                       ret = -ENOENT;
         }
-found:
+       if (ret == 0)
+               ret = convert_variable(&vr_die, pf);
+
         if (ret < 0)
                 pr_warning("Failed to find '%s' in this function.\n",
                            pf->pvar->var);
@@ -718,26 +704,30 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr,
         return 0;
  }
  
-/* Call probe_finder callback with real subprogram DIE */
-static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf)
+/* Call probe_finder callback with scope DIE */
+static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
  {
-       Dwarf_Die die_mem;
         Dwarf_Attribute fb_attr;
         size_t nops;
         int ret;
  
-       /* If no real subprogram, find a real one */
-       if (!sp_die || dwarf_tag(sp_die) != DW_TAG_subprogram) {
-               sp_die = die_find_realfunc(&pf->cu_die, pf->addr, &die_mem);
-               if (!sp_die) {
+       if (!sc_die) {
+               pr_err("Caller must pass a scope DIE. Program error.\n");
+               return -EINVAL;
+       }
+
+       /* If not a real subprogram, find a real one */
+       if (dwarf_tag(sc_die) != DW_TAG_subprogram) {
+               if (!die_find_realfunc(&pf->cu_die, pf->addr, &pf->sp_die)) {
                         pr_warning("Failed to find probe point in any "
                                    "functions.\n");
                         return -ENOENT;
                 }
-       }
+       } else
+               memcpy(&pf->sp_die, sc_die, sizeof(Dwarf_Die));
  
-       /* Get the frame base attribute/ops */
-       dwarf_attr(sp_die, DW_AT_frame_base, &fb_attr);
+       /* Get the frame base attribute/ops from subprogram */
+       dwarf_attr(&pf->sp_die, DW_AT_frame_base, &fb_attr);
         ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1);
         if (ret <= 0 || nops == 0) {
                 pf->fb_ops = NULL;
@@ -755,7 +745,7 @@ static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf)
         }
  
         /* Call finder's callback handler */
-       ret = pf->callback(sp_die, pf);
+       ret = pf->callback(sc_die, pf);
  
         /* *pf->fb_ops will be cached in libdw. Don't free it. */
         pf->fb_ops = NULL;
@@ -763,17 +753,82 @@ static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf)
         return ret;
  }
  
+struct find_scope_param {
+       const char *function;
+       const char *file;
+       int line;
+       int diff;
+       Dwarf_Die *die_mem;
+       bool found;
+};
+
+static int find_best_scope_cb(Dwarf_Die *fn_die, void *data)
+{
+       struct find_scope_param *fsp = data;
+       const char *file;
+       int lno;
+
+       /* Skip if declared file name does not match */
+       if (fsp->file) {
+               file = dwarf_decl_file(fn_die);
+               if (!file || strcmp(fsp->file, file) != 0)
+                       return 0;
+       }
+       /* If the function name is given, that's what user expects */
+       if (fsp->function) {
+               if (die_compare_name(fn_die, fsp->function)) {
+                       memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die));
+                       fsp->found = true;
+                       return 1;
+               }
+       } else {
+               /* With the line number, find the nearest declared DIE */
+               dwarf_decl_line(fn_die, &lno);
+               if (lno < fsp->line && fsp->diff > fsp->line - lno) {
+                       /* Keep a candidate and continue */
+                       fsp->diff = fsp->line - lno;
+                       memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die));
+                       fsp->found = true;
+               }
+       }
+       return 0;
+}
+
+/* Find an appropriate scope fits to given conditions */
+static Dwarf_Die *find_best_scope(struct probe_finder *pf, Dwarf_Die *die_mem)
+{
+       struct find_scope_param fsp = {
+               .function = pf->pev->point.function,
+               .file = pf->fname,
+               .line = pf->lno,
+               .diff = INT_MAX,
+               .die_mem = die_mem,
+               .found = false,
+       };
+
+       cu_walk_functions_at(&pf->cu_die, pf->addr, find_best_scope_cb, &fsp);
+
+       return fsp.found ? die_mem : NULL;
+}
+
  static int probe_point_line_walker(const char *fname, int lineno,
                                    Dwarf_Addr addr, void *data)
  {
         struct probe_finder *pf = data;
+       Dwarf_Die *sc_die, die_mem;
         int ret;
  
         if (lineno != pf->lno || strtailcmp(fname, pf->fname) != 0)
                 return 0;
  
         pf->addr = addr;
-       ret = call_probe_finder(NULL, pf);
+       sc_die = find_best_scope(pf, &die_mem);
+       if (!sc_die) {
+               pr_warning("Failed to find scope of probe point.\n");
+               return -ENOENT;
+       }
+
+       ret = call_probe_finder(sc_die, pf);
  
         /* Continue if no error, because the line will be in inline function */
         return ret < 0 ? ret : 0;
@@ -827,6 +882,7 @@ static int probe_point_lazy_walker(const char *fname, int lineno,
                                    Dwarf_Addr addr, void *data)
  {
         struct probe_finder *pf = data;
+       Dwarf_Die *sc_die, die_mem;
         int ret;
  
         if (!line_list__has_line(&pf->lcache, lineno) ||
@@ -836,7 +892,14 @@ static int probe_point_lazy_walker(const char *fname, int lineno,
         pr_debug("Probe line found: line:%d addr:0x%llx\n",
                  lineno, (unsigned long long)addr);
         pf->addr = addr;
-       ret = call_probe_finder(NULL, pf);
+       pf->lno = lineno;
+       sc_die = find_best_scope(pf, &die_mem);
+       if (!sc_die) {
+               pr_warning("Failed to find scope of probe point.\n");
+               return -ENOENT;
+       }
+
+       ret = call_probe_finder(sc_die, pf);
  
         /*
          * Continue if no error, because the lazy pattern will match
@@ -861,42 +924,39 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
         return die_walk_lines(sp_die, probe_point_lazy_walker, pf);
  }
  
-/* Callback parameter with return value */
-struct dwarf_callback_param {
-       void *data;
-       int retval;
-};
-
  static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
  {
-       struct dwarf_callback_param *param = data;
-       struct probe_finder *pf = param->data;
+       struct probe_finder *pf = data;
         struct perf_probe_point *pp = &pf->pev->point;
         Dwarf_Addr addr;
+       int ret;
  
         if (pp->lazy_line)
-               param->retval = find_probe_point_lazy(in_die, pf);
+               ret = find_probe_point_lazy(in_die, pf);
         else {
                 /* Get probe address */
                 if (dwarf_entrypc(in_die, &addr) != 0) {
                         pr_warning("Failed to get entry address of %s.\n",
                                    dwarf_diename(in_die));
-                       param->retval = -ENOENT;
-                       return DWARF_CB_ABORT;
+                       return -ENOENT;
                 }
                 pf->addr = addr;
                 pf->addr += pp->offset;
                 pr_debug("found inline addr: 0x%jx\n",
                          (uintmax_t)pf->addr);
  
-               param->retval = call_probe_finder(in_die, pf);
-               if (param->retval < 0)
-                       return DWARF_CB_ABORT;
+               ret = call_probe_finder(in_die, pf);
         }
  
-       return DWARF_CB_OK;
+       return ret;
  }
  
+/* Callback parameter with return value for libdw */
+struct dwarf_callback_param {
+       void *data;
+       int retval;
+};
+
  /* Search function from function name */
  static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
  {
@@ -933,14 +993,10 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
                         /* TODO: Check the address in this function */
                         param->retval = call_probe_finder(sp_die, pf);
                 }
-       } else {
-               struct dwarf_callback_param _param = {.data = (void *)pf,
-                                                     .retval = 0};
+       } else
                 /* Inlined function: search instances */
-               dwarf_func_inline_instances(sp_die, probe_point_inline_cb,
-                                           &_param);
-               param->retval = _param.retval;
-       }
+               param->retval = die_walk_instances(sp_die,
+                                       probe_point_inline_cb, (void *)pf);
  
         return DWARF_CB_ABORT; /* Exit; no same symbol in this CU. */
  }
@@ -1060,7 +1116,7 @@ found:
  }
  
  /* Add a found probe point into trace event list */
-static int add_probe_trace_event(Dwarf_Die *sp_die, struct probe_finder *pf)
+static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf)
  {
         struct trace_event_finder *tf =
                         container_of(pf, struct trace_event_finder, pf);
@@ -1075,8 +1131,9 @@ static int add_probe_trace_event(Dwarf_Die *sp_die, struct probe_finder *pf)
         }
         tev = &tf->tevs[tf->ntevs++];
  
-       ret = convert_to_trace_point(sp_die, pf->addr, pf->pev->point.retprobe,
-                                    &tev->point);
+       /* Trace point should be converted from subprogram DIE */
+       ret = convert_to_trace_point(&pf->sp_die, pf->addr,
+                                    pf->pev->point.retprobe, &tev->point);
         if (ret < 0)
                 return ret;
  
@@ -1091,7 +1148,8 @@ static int add_probe_trace_event(Dwarf_Die *sp_die, struct probe_finder *pf)
         for (i = 0; i < pf->pev->nargs; i++) {
                 pf->pvar = &pf->pev->args[i];
                 pf->tvar = &tev->args[i];
-               ret = find_variable(sp_die, pf);
+               /* Variable should be found from scope DIE */
+               ret = find_variable(sc_die, pf);
                 if (ret != 0)
                         return ret;
         }
@@ -1159,13 +1217,13 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
  }
  
  /* Add a found vars into available variables list */
-static int add_available_vars(Dwarf_Die *sp_die, struct probe_finder *pf)
+static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf)
  {
         struct available_var_finder *af =
                         container_of(pf, struct available_var_finder, pf);
         struct variable_list *vl;
-       Dwarf_Die die_mem, *scopes = NULL;
-       int ret, nscopes;
+       Dwarf_Die die_mem;
+       int ret;
  
         /* Check number of tevs */
         if (af->nvls == af->max_vls) {
@@ -1174,8 +1232,9 @@ static int add_available_vars(Dwarf_Die *sp_die, struct probe_finder *pf)
         }
         vl = &af->vls[af->nvls++];
  
-       ret = convert_to_trace_point(sp_die, pf->addr, pf->pev->point.retprobe,
-                                    &vl->point);
+       /* Trace point should be converted from subprogram DIE */
+       ret = convert_to_trace_point(&pf->sp_die, pf->addr,
+                                    pf->pev->point.retprobe, &vl->point);
         if (ret < 0)
                 return ret;
  
@@ -1187,19 +1246,14 @@ static int add_available_vars(Dwarf_Die *sp_die, struct probe_finder *pf)
         if (vl->vars == NULL)
                 return -ENOMEM;
         af->child = true;
-       die_find_child(sp_die, collect_variables_cb, (void *)af, &die_mem);
+       die_find_child(sc_die, collect_variables_cb, (void *)af, &die_mem);
  
         /* Find external variables */
         if (!af->externs)
                 goto out;
         /* Don't need to search child DIE for externs. */
         af->child = false;
-       nscopes = dwarf_getscopes_die(sp_die, &scopes);
-       while (nscopes-- > 1)
-               die_find_child(&scopes[nscopes], collect_variables_cb,
-                              (void *)af, &die_mem);
-       if (scopes)
-               free(scopes);
+       die_find_child(&pf->cu_die, collect_variables_cb, (void *)af, &die_mem);
  
  out:
         if (strlist__empty(vl->vars)) {
@@ -1391,10 +1445,14 @@ static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf)
  
  static int line_range_inline_cb(Dwarf_Die *in_die, void *data)
  {
-       struct dwarf_callback_param *param = data;
+       find_line_range_by_line(in_die, data);
  
-       param->retval = find_line_range_by_line(in_die, param->data);
-       return DWARF_CB_ABORT;  /* No need to find other instances */
+       /*
+        * We have to check all instances of inlined function, because
+        * some execution paths can be optimized out depends on the
+        * function argument of instances
+        */
+       return 0;
  }
  
  /* Search function from function name */
@@ -1422,15 +1480,10 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
                 pr_debug("New line range: %d to %d\n", lf->lno_s, lf->lno_e);
                 lr->start = lf->lno_s;
                 lr->end = lf->lno_e;
-               if (dwarf_func_inline(sp_die)) {
-                       struct dwarf_callback_param _param;
-                       _param.data = (void *)lf;
-                       _param.retval = 0;
-                       dwarf_func_inline_instances(sp_die,
-                                                   line_range_inline_cb,
-                                                   &_param);
-                       param->retval = _param.retval;
-               } else
+               if (dwarf_func_inline(sp_die))
+                       param->retval = die_walk_instances(sp_die,
+                                               line_range_inline_cb, lf);
+               else
                         param->retval = find_line_range_by_line(sp_die, lf);
                 return DWARF_CB_ABORT;
         }
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h

index c478b42a2473f68ea1dcb786c1a5892c70913866..1132c8f0ce890b09bab27520a59a4bf1ad7fe765 100644 (file)
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -57,7 +57,7 @@ struct probe_finder {
         struct perf_probe_event *pev;           /* Target probe event */
  
         /* Callback when a probe point is found */
-       int (*callback)(Dwarf_Die *sp_die, struct probe_finder *pf);
+       int (*callback)(Dwarf_Die *sc_die, struct probe_finder *pf);
  
         /* For function searching */
         int                     lno;            /* Line number */
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c

index 8e0b5a39d8a739720e621521f8b65710b1bb2486..cbc8f215d4b7d518ade4d3e8edee5a35d92be81d 100644 (file)
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -187,14 +187,117 @@ static PyTypeObject pyrf_throttle_event__type = {
         .tp_repr        = (reprfunc)pyrf_throttle_event__repr,
  };
  
+static char pyrf_lost_event__doc[] = PyDoc_STR("perf lost event object.");
+
+static PyMemberDef pyrf_lost_event__members[] = {
+       sample_members
+       member_def(lost_event, id, T_ULONGLONG, "event id"),
+       member_def(lost_event, lost, T_ULONGLONG, "number of lost events"),
+       { .name = NULL, },
+};
+
+static PyObject *pyrf_lost_event__repr(struct pyrf_event *pevent)
+{
+       PyObject *ret;
+       char *s;
+
+       if (asprintf(&s, "{ type: lost, id: %#" PRIx64 ", "
+                        "lost: %#" PRIx64 " }",
+                    pevent->event.lost.id, pevent->event.lost.lost) < 0) {
+               ret = PyErr_NoMemory();
+       } else {
+               ret = PyString_FromString(s);
+               free(s);
+       }
+       return ret;
+}
+
+static PyTypeObject pyrf_lost_event__type = {
+       PyVarObject_HEAD_INIT(NULL, 0)
+       .tp_name        = "perf.lost_event",
+       .tp_basicsize   = sizeof(struct pyrf_event),
+       .tp_flags       = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+       .tp_doc         = pyrf_lost_event__doc,
+       .tp_members     = pyrf_lost_event__members,
+       .tp_repr        = (reprfunc)pyrf_lost_event__repr,
+};
+
+static char pyrf_read_event__doc[] = PyDoc_STR("perf read event object.");
+
+static PyMemberDef pyrf_read_event__members[] = {
+       sample_members
+       member_def(read_event, pid, T_UINT, "event pid"),
+       member_def(read_event, tid, T_UINT, "event tid"),
+       { .name = NULL, },
+};
+
+static PyObject *pyrf_read_event__repr(struct pyrf_event *pevent)
+{
+       return PyString_FromFormat("{ type: read, pid: %u, tid: %u }",
+                                  pevent->event.read.pid,
+                                  pevent->event.read.tid);
+       /*
+        * FIXME: return the array of read values,
+        * making this method useful ;-)
+        */
+}
+
+static PyTypeObject pyrf_read_event__type = {
+       PyVarObject_HEAD_INIT(NULL, 0)
+       .tp_name        = "perf.read_event",
+       .tp_basicsize   = sizeof(struct pyrf_event),
+       .tp_flags       = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+       .tp_doc         = pyrf_read_event__doc,
+       .tp_members     = pyrf_read_event__members,
+       .tp_repr        = (reprfunc)pyrf_read_event__repr,
+};
+
+static char pyrf_sample_event__doc[] = PyDoc_STR("perf sample event object.");
+
+static PyMemberDef pyrf_sample_event__members[] = {
+       sample_members
+       member_def(perf_event_header, type, T_UINT, "event type"),
+       { .name = NULL, },
+};
+
+static PyObject *pyrf_sample_event__repr(struct pyrf_event *pevent)
+{
+       PyObject *ret;
+       char *s;
+
+       if (asprintf(&s, "{ type: sample }") < 0) {
+               ret = PyErr_NoMemory();
+       } else {
+               ret = PyString_FromString(s);
+               free(s);
+       }
+       return ret;
+}
+
+static PyTypeObject pyrf_sample_event__type = {
+       PyVarObject_HEAD_INIT(NULL, 0)
+       .tp_name        = "perf.sample_event",
+       .tp_basicsize   = sizeof(struct pyrf_event),
+       .tp_flags       = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+       .tp_doc         = pyrf_sample_event__doc,
+       .tp_members     = pyrf_sample_event__members,
+       .tp_repr        = (reprfunc)pyrf_sample_event__repr,
+};
+
  static int pyrf_event__setup_types(void)
  {
         int err;
         pyrf_mmap_event__type.tp_new =
         pyrf_task_event__type.tp_new =
         pyrf_comm_event__type.tp_new =
+       pyrf_lost_event__type.tp_new =
+       pyrf_read_event__type.tp_new =
+       pyrf_sample_event__type.tp_new =
         pyrf_throttle_event__type.tp_new = PyType_GenericNew;
         err = PyType_Ready(&pyrf_mmap_event__type);
+       if (err < 0)
+               goto out;
+       err = PyType_Ready(&pyrf_lost_event__type);
         if (err < 0)
                 goto out;
         err = PyType_Ready(&pyrf_task_event__type);
@@ -206,20 +309,26 @@ static int pyrf_event__setup_types(void)
         err = PyType_Ready(&pyrf_throttle_event__type);
         if (err < 0)
                 goto out;
+       err = PyType_Ready(&pyrf_read_event__type);
+       if (err < 0)
+               goto out;
+       err = PyType_Ready(&pyrf_sample_event__type);
+       if (err < 0)
+               goto out;
  out:
         return err;
  }
  
  static PyTypeObject *pyrf_event__type[] = {
         [PERF_RECORD_MMAP]       = &pyrf_mmap_event__type,
-       [PERF_RECORD_LOST]       = &pyrf_mmap_event__type,
+       [PERF_RECORD_LOST]       = &pyrf_lost_event__type,
         [PERF_RECORD_COMM]       = &pyrf_comm_event__type,
         [PERF_RECORD_EXIT]       = &pyrf_task_event__type,
         [PERF_RECORD_THROTTLE]   = &pyrf_throttle_event__type,
         [PERF_RECORD_UNTHROTTLE] = &pyrf_throttle_event__type,
         [PERF_RECORD_FORK]       = &pyrf_task_event__type,
-       [PERF_RECORD_READ]       = &pyrf_mmap_event__type,
-       [PERF_RECORD_SAMPLE]     = &pyrf_mmap_event__type,
+       [PERF_RECORD_READ]       = &pyrf_read_event__type,
+       [PERF_RECORD_SAMPLE]     = &pyrf_sample_event__type,
  };
  
  static PyObject *pyrf_event__new(union perf_event *event)
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py

index bbc982f5dd8b9ced999577ced4822a21c9a28779..95d370074928841b096a37076040834b8c45cdc9 100644 (file)
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -3,9 +3,27 @@
  from distutils.core import setup, Extension
  from os import getenv
  
+from distutils.command.build_ext   import build_ext   as _build_ext
+from distutils.command.install_lib import install_lib as _install_lib
+
+class build_ext(_build_ext):
+    def finalize_options(self):
+        _build_ext.finalize_options(self)
+        self.build_lib  = build_lib
+        self.build_temp = build_tmp
+
+class install_lib(_install_lib):
+    def finalize_options(self):
+        _install_lib.finalize_options(self)
+        self.build_dir = build_lib
+
+
  cflags = ['-fno-strict-aliasing', '-Wno-write-strings']
  cflags += getenv('CFLAGS', '').split()
  
+build_lib = getenv('PYTHON_EXTBUILD_LIB')
+build_tmp = getenv('PYTHON_EXTBUILD_TMP')
+
  perf = Extension('perf',
                   sources = ['util/python.c', 'util/ctype.c', 'util/evlist.c',
                              'util/evsel.c', 'util/cpumap.c', 'util/thread_map.c',
@@ -21,4 +39,5 @@ setup(name='perf',
        author_email='acme@redhat.com',
        license='GPLv2',
        url='http://perf.wiki.kernel.org',
-      ext_modules=[perf])
+      ext_modules=[perf],
+      cmdclass={'build_ext': build_ext, 'install_lib': install_lib})
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c

index eec196329fd92c2e0567c8af5535ae1a99fcc4b1..469c0264ed298d138698d93206419b87fbb3dd99 100644 (file)
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1504,6 +1504,17 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
         dso->adjust_symbols = 0;
  
         if (strncmp(dso->name, "/tmp/perf-", 10) == 0) {
+               struct stat st;
+
+               if (lstat(dso->name, &st) < 0)
+                       return -1;
+
+               if (st.st_uid && (st.st_uid != geteuid())) {
+                       pr_warning("File %s not owned by current user or root, "
+                               "ignoring it.\n", dso->name);
+                       return -1;
+               }
+
                 ret = dso__load_perf_map(dso, map, filter);
                 dso->symtab_type = ret > 0 ? SYMTAB__JAVA_JIT :
                                               SYMTAB__NOT_FOUND;
@@ -2170,27 +2181,22 @@ size_t machines__fprintf_dsos_buildid(struct rb_root *machines,
         return ret;
  }
  
-struct dso *dso__new_kernel(const char *name)
+static struct dso*
+dso__kernel_findnew(struct machine *machine, const char *name,
+                   const char *short_name, int dso_type)
  {
-       struct dso *dso = dso__new(name ?: "[kernel.kallsyms]");
-
-       if (dso != NULL) {
-               dso__set_short_name(dso, "[kernel]");
-               dso->kernel = DSO_TYPE_KERNEL;
-       }
-
-       return dso;
-}
+       /*
+        * The kernel dso could be created by build_id processing.
+        */
+       struct dso *dso = __dsos__findnew(&machine->kernel_dsos, name);
  
-static struct dso *dso__new_guest_kernel(struct machine *machine,
-                                       const char *name)
-{
-       char bf[PATH_MAX];
-       struct dso *dso = dso__new(name ?: machine__mmap_name(machine, bf,
-                                                             sizeof(bf)));
+       /*
+        * We need to run this in all cases, since during the build_id
+        * processing we had no idea this was the kernel dso.
+        */
         if (dso != NULL) {
-               dso__set_short_name(dso, "[guest.kernel]");
-               dso->kernel = DSO_TYPE_GUEST_KERNEL;
+               dso__set_short_name(dso, short_name);
+               dso->kernel = dso_type;
         }
  
         return dso;
@@ -2208,24 +2214,36 @@ void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine)
                 dso->has_build_id = true;
  }
  
-static struct dso *machine__create_kernel(struct machine *machine)
+static struct dso *machine__get_kernel(struct machine *machine)
  {
         const char *vmlinux_name = NULL;
         struct dso *kernel;
  
         if (machine__is_host(machine)) {
                 vmlinux_name = symbol_conf.vmlinux_name;
-               kernel = dso__new_kernel(vmlinux_name);
+               if (!vmlinux_name)
+                       vmlinux_name = "[kernel.kallsyms]";
+
+               kernel = dso__kernel_findnew(machine, vmlinux_name,
+                                            "[kernel]",
+                                            DSO_TYPE_KERNEL);
         } else {
+               char bf[PATH_MAX];
+
                 if (machine__is_default_guest(machine))
                         vmlinux_name = symbol_conf.default_guest_vmlinux_name;
-               kernel = dso__new_guest_kernel(machine, vmlinux_name);
+               if (!vmlinux_name)
+                       vmlinux_name = machine__mmap_name(machine, bf,
+                                                         sizeof(bf));
+
+               kernel = dso__kernel_findnew(machine, vmlinux_name,
+                                            "[guest.kernel]",
+                                            DSO_TYPE_GUEST_KERNEL);
         }
  
-       if (kernel != NULL) {
+       if (kernel != NULL && (!kernel->has_build_id))
                 dso__read_running_kernel_build_id(kernel, machine);
-               dsos__add(&machine->kernel_dsos, kernel);
-       }
+
         return kernel;
  }
  
@@ -2329,7 +2347,7 @@ void machine__destroy_kernel_maps(struct machine *machine)
  
  int machine__create_kernel_maps(struct machine *machine)
  {
-       struct dso *kernel = machine__create_kernel(machine);
+       struct dso *kernel = machine__get_kernel(machine);
  
         if (kernel == NULL ||
             __machine__create_kernel_maps(machine, kernel) < 0)
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h

index 325ee36a9d29d63d82973d262816f91a064813f2..4f377d92e75a4c686279c99fa15e200f1ac6d5bc 100644 (file)
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -155,7 +155,6 @@ struct dso {
  };
  
  struct dso *dso__new(const char *name);
-struct dso *dso__new_kernel(const char *name);
  void dso__delete(struct dso *dso);
  
  int dso__name_len(const struct dso *dso);
diff --git a/tools/perf/util/ui/browsers/top.c b/tools/perf/util/ui/browsers/top.c

index 5a06538532af5315dd48b27cc142856a4818c327..88403cf8396ad6294d4be6585fdfd92eaf5b4a6a 100644 (file)
--- a/tools/perf/util/ui/browsers/top.c
+++ b/tools/perf/util/ui/browsers/top.c
@@ -208,6 +208,5 @@ int perf_top__tui_browser(struct perf_top *top)
                 },
         };
  
-       ui_helpline__push("Press <- or ESC to exit");
         return perf_top_browser__run(&browser);
  }
diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile

index 94c2cf0a98b88e771ef4377fef83ea16deb72398..e8a03aceceb11fe88b5552a0952431866213ee42 100644 (file)
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -24,7 +24,7 @@
  
  # Set the following to `true' to make a unstripped, unoptimized
  # binary. Leave this set to `false' for production use.
-DEBUG ?=       false
+DEBUG ?=       true
  
  # make the build silent. Set this to something else to make it noisy again.
  V ?=           false
@@ -35,7 +35,7 @@ NLS ?=                true
  
  # Set the following to 'true' to build/install the
  # cpufreq-bench benchmarking tool
-CPUFRQ_BENCH ?= true
+CPUFREQ_BENCH ?= true
  
  # Prefix to the directories we're installing to
  DESTDIR ?=
@@ -137,9 +137,10 @@ CFLAGS +=  -pipe
  ifeq ($(strip $(NLS)),true)
         INSTALL_NLS += install-gmo
         COMPILE_NLS += create-gmo
+       CFLAGS += -DNLS
  endif
  
-ifeq ($(strip $(CPUFRQ_BENCH)),true)
+ifeq ($(strip $(CPUFREQ_BENCH)),true)
         INSTALL_BENCH += install-bench
         COMPILE_BENCH += compile-bench
  endif
diff --git a/tools/power/cpupower/debug/x86_64/Makefile b/tools/power/cpupower/debug/x86_64/Makefile

index dbf13998462a3c3e02b2784dd10c24314d2c006b..3326217dd31158d24446213d709a3f8cb5b7002c 100644 (file)
--- a/tools/power/cpupower/debug/x86_64/Makefile
+++ b/tools/power/cpupower/debug/x86_64/Makefile
@@ -1,10 +1,10 @@
  default: all
  
-centrino-decode: centrino-decode.c
-       $(CC) $(CFLAGS) -o centrino-decode centrino-decode.c
+centrino-decode: ../i386/centrino-decode.c
+       $(CC) $(CFLAGS) -o $@ $<
  
-powernow-k8-decode: powernow-k8-decode.c
-       $(CC) $(CFLAGS) -o powernow-k8-decode powernow-k8-decode.c
+powernow-k8-decode: ../i386/powernow-k8-decode.c
+       $(CC) $(CFLAGS) -o $@ $<
  
  all: centrino-decode powernow-k8-decode
  
diff --git a/tools/power/cpupower/debug/x86_64/centrino-decode.c b/tools/power/cpupower/debug/x86_64/centrino-decode.c

deleted file mode 120000 (symlink)

index 26fb3f1..0000000
--- a/tools/power/cpupower/debug/x86_64/centrino-decode.c
+++ /dev/null
@@ -1 +0,0 @@
-../i386/centrino-decode.c
-\ No newline at end of file
diff --git a/tools/power/cpupower/debug/x86_64/powernow-k8-decode.c b/tools/power/cpupower/debug/x86_64/powernow-k8-decode.c

deleted file mode 120000 (symlink)

index eb30c79..0000000
--- a/tools/power/cpupower/debug/x86_64/powernow-k8-decode.c
+++ /dev/null
@@ -1 +0,0 @@
-../i386/powernow-k8-decode.c
-\ No newline at end of file
diff --git a/tools/power/cpupower/man/cpupower-frequency-info.1 b/tools/power/cpupower/man/cpupower-frequency-info.1

index 3194811d58f55e83c02bc6c3fa7c1aa96fc922a5..bb60a8d1e45abb0e4685792ad02ca187893cac8a 100644 (file)
--- a/tools/power/cpupower/man/cpupower-frequency-info.1
+++ b/tools/power/cpupower/man/cpupower-frequency-info.1
@@ -1,10 +1,10 @@
-.TH "cpufreq-info" "1" "0.1" "Mattia Dongili" ""
+.TH "cpupower-frequency-info" "1" "0.1" "Mattia Dongili" ""
  .SH "NAME"
  .LP 
-cpufreq\-info \- Utility to retrieve cpufreq kernel information
+cpupower frequency\-info \- Utility to retrieve cpufreq kernel information
  .SH "SYNTAX"
  .LP 
-cpufreq\-info [\fIoptions\fP]
+cpupower [ \-c cpulist ] frequency\-info [\fIoptions\fP]
  .SH "DESCRIPTION"
  .LP 
  A small tool which prints out cpufreq information helpful to developers and interested users.
diff --git a/tools/power/cpupower/man/cpupower-frequency-set.1 b/tools/power/cpupower/man/cpupower-frequency-set.1

index 26e3e13eee3b61c7427d579896e9f31f50c01440..685f469093ad200c93e2e9feb3e23dcaec863a5a 100644 (file)
--- a/tools/power/cpupower/man/cpupower-frequency-set.1
+++ b/tools/power/cpupower/man/cpupower-frequency-set.1
@@ -1,13 +1,13 @@
-.TH "cpufreq-set" "1" "0.1" "Mattia Dongili" ""
+.TH "cpupower-freqency-set" "1" "0.1" "Mattia Dongili" ""
  .SH "NAME"
  .LP 
-cpufreq\-set \- A small tool which allows to modify cpufreq settings.
+cpupower frequency\-set \- A small tool which allows to modify cpufreq settings.
  .SH "SYNTAX"
  .LP 
-cpufreq\-set [\fIoptions\fP]
+cpupower [ \-c cpu ] frequency\-set [\fIoptions\fP]
  .SH "DESCRIPTION"
  .LP 
-cpufreq\-set allows you to modify cpufreq settings without having to type e.g. "/sys/devices/system/cpu/cpu0/cpufreq/scaling_set_speed" all the time.
+cpupower frequency\-set allows you to modify cpufreq settings without having to type e.g. "/sys/devices/system/cpu/cpu0/cpufreq/scaling_set_speed" all the time.
  .SH "OPTIONS"
  .LP 
  .TP 
diff --git a/tools/power/cpupower/man/cpupower.1 b/tools/power/cpupower/man/cpupower.1

index 78c20feab85c27447563ee29cd7927aa0cac6363..baf741d06e82543f3529f4f13769bfc1d196cffc 100644 (file)
--- a/tools/power/cpupower/man/cpupower.1
+++ b/tools/power/cpupower/man/cpupower.1
@@ -3,7 +3,7 @@
  cpupower \- Shows and sets processor power related values
  .SH SYNOPSIS
  .ft B
-.B cpupower [ \-c cpulist ] subcommand [ARGS]
+.B cpupower [ \-c cpulist ] <command> [ARGS]
  
  .B cpupower \-v|\-\-version
  
@@ -13,24 +13,24 @@ cpupower \- Shows and sets processor power related values
  \fBcpupower \fP is a collection of tools to examine and tune power saving
  related features of your processor.
  
-The manpages of the subcommands (cpupower\-<subcommand>(1)) provide detailed
+The manpages of the commands (cpupower\-<command>(1)) provide detailed
  descriptions of supported features. Run \fBcpupower help\fP to get an overview
-of supported subcommands.
+of supported commands.
  
  .SH Options
  .PP
  \-\-help, \-h
  .RS 4
-Shows supported subcommands and general usage.
+Shows supported commands and general usage.
  .RE
  .PP
  \-\-cpu cpulist,  \-c cpulist
  .RS 4
  Only show or set values for specific cores.
-This option is not supported by all subcommands, details can be found in the
-manpages of the subcommands.
+This option is not supported by all commands, details can be found in the
+manpages of the commands.
  
-Some subcommands access all cores (typically the *\-set commands), some only
+Some commands access all cores (typically the *\-set commands), some only
  the first core (typically the *\-info commands) by default.
  
  The syntax for <cpulist> is based on how the kernel exports CPU bitmasks via
diff --git a/tools/power/cpupower/utils/builtin.h b/tools/power/cpupower/utils/builtin.h

index c870ffba5219615c4d55823100c6854c66707f38..c10496fbe3c629c270d6b9ab406f587c8b4736c6 100644 (file)
--- a/tools/power/cpupower/utils/builtin.h
+++ b/tools/power/cpupower/utils/builtin.h
@@ -8,11 +8,4 @@ extern int cmd_freq_info(int argc, const char **argv);
  extern int cmd_idle_info(int argc, const char **argv);
  extern int cmd_monitor(int argc, const char **argv);
  
-extern void set_help(void);
-extern void info_help(void);
-extern void freq_set_help(void);
-extern void freq_info_help(void);
-extern void idle_info_help(void);
-extern void monitor_help(void);
-
  #endif
diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c

index 5a1d25f056b3b60c0f547cc43314d03272fcfac7..28953c9a7bd5980ad600d8830c736119809e51e0 100644 (file)
--- a/tools/power/cpupower/utils/cpufreq-info.c
+++ b/tools/power/cpupower/utils/cpufreq-info.c
@@ -510,37 +510,6 @@ static int get_latency(unsigned int cpu, unsigned int human)
         return 0;
  }
  
-void freq_info_help(void)
-{
-       printf(_("Usage: cpupower freqinfo [options]\n"));
-       printf(_("Options:\n"));
-       printf(_("  -e, --debug          Prints out debug information [default]\n"));
-       printf(_("  -f, --freq           Get frequency the CPU currently runs at, according\n"
-              "                       to the cpufreq core *\n"));
-       printf(_("  -w, --hwfreq         Get frequency the CPU currently runs at, by reading\n"
-              "                       it from hardware (only available to root) *\n"));
-       printf(_("  -l, --hwlimits       Determine the minimum and maximum CPU frequency allowed *\n"));
-       printf(_("  -d, --driver         Determines the used cpufreq kernel driver *\n"));
-       printf(_("  -p, --policy         Gets the currently used cpufreq policy *\n"));
-       printf(_("  -g, --governors      Determines available cpufreq governors *\n"));
-       printf(_("  -r, --related-cpus   Determines which CPUs run at the same hardware frequency *\n"));
-       printf(_("  -a, --affected-cpus  Determines which CPUs need to have their frequency\n"
-                       "                       coordinated by software *\n"));
-       printf(_("  -s, --stats          Shows cpufreq statistics if available\n"));
-       printf(_("  -y, --latency        Determines the maximum latency on CPU frequency changes *\n"));
-       printf(_("  -b, --boost          Checks for turbo or boost modes  *\n"));
-       printf(_("  -o, --proc           Prints out information like provided by the /proc/cpufreq\n"
-              "                       interface in 2.4. and early 2.6. kernels\n"));
-       printf(_("  -m, --human          human-readable output for the -f, -w, -s and -y parameters\n"));
-       printf(_("  -h, --help           Prints out this screen\n"));
-
-       printf("\n");
-       printf(_("If no argument is given, full output about\n"
-              "cpufreq is printed which is useful e.g. for reporting bugs.\n\n"));
-       printf(_("By default info of CPU 0 is shown which can be overridden\n"
-                "with the cpupower --cpu main command option.\n"));
-}
-
  static struct option info_opts[] = {
         { .name = "debug",      .has_arg = no_argument,         .flag = NULL,   .val = 'e'},
         { .name = "boost",      .has_arg = no_argument,         .flag = NULL,   .val = 'b'},
@@ -556,7 +525,6 @@ static struct option info_opts[] = {
         { .name = "latency",    .has_arg = no_argument,         .flag = NULL,   .val = 'y'},
         { .name = "proc",       .has_arg = no_argument,         .flag = NULL,   .val = 'o'},
         { .name = "human",      .has_arg = no_argument,         .flag = NULL,   .val = 'm'},
-       { .name = "help",       .has_arg = no_argument,         .flag = NULL,   .val = 'h'},
         { },
  };
  
@@ -570,16 +538,12 @@ int cmd_freq_info(int argc, char **argv)
         int output_param = 0;
  
         do {
-               ret = getopt_long(argc, argv, "hoefwldpgrasmyb", info_opts, NULL);
+               ret = getopt_long(argc, argv, "oefwldpgrasmyb", info_opts, NULL);
                 switch (ret) {
                 case '?':
                         output_param = '?';
                         cont = 0;
                         break;
-               case 'h':
-                       output_param = 'h';
-                       cont = 0;
-                       break;
                 case -1:
                         cont = 0;
                         break;
@@ -642,11 +606,7 @@ int cmd_freq_info(int argc, char **argv)
                 return -EINVAL;
         case '?':
                 printf(_("invalid or unknown argument\n"));
-               freq_info_help();
                 return -EINVAL;
-       case 'h':
-               freq_info_help();
-               return EXIT_SUCCESS;
         case 'o':
                 proc_cpufreq_output();
                 return EXIT_SUCCESS;
diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c

index 5f783622bf31f89e2618ff5b84a50fb37221957b..dd1539eb8c63de3e8d166b24a5f79b65702a4685 100644 (file)
--- a/tools/power/cpupower/utils/cpufreq-set.c
+++ b/tools/power/cpupower/utils/cpufreq-set.c
@@ -20,34 +20,11 @@
  
  #define NORM_FREQ_LEN 32
  
-void freq_set_help(void)
-{
-       printf(_("Usage: cpupower frequency-set [options]\n"));
-       printf(_("Options:\n"));
-       printf(_("  -d FREQ, --min FREQ      new minimum CPU frequency the governor may select\n"));
-       printf(_("  -u FREQ, --max FREQ      new maximum CPU frequency the governor may select\n"));
-       printf(_("  -g GOV, --governor GOV   new cpufreq governor\n"));
-       printf(_("  -f FREQ, --freq FREQ     specific frequency to be set. Requires userspace\n"
-              "                           governor to be available and loaded\n"));
-       printf(_("  -r, --related            Switches all hardware-related CPUs\n"));
-       printf(_("  -h, --help               Prints out this screen\n"));
-       printf("\n");
-       printf(_("Notes:\n"
-              "1. Omitting the -c or --cpu argument is equivalent to setting it to \"all\"\n"));
-       printf(_("2. The -f FREQ, --freq FREQ parameter cannot be combined with any other parameter\n"
-              "   except the -c CPU, --cpu CPU parameter\n"
-              "3. FREQuencies can be passed in Hz, kHz (default), MHz, GHz, or THz\n"
-              "   by postfixing the value with the wanted unit name, without any space\n"
-              "   (FREQuency in kHz =^ Hz * 0.001 =^ MHz * 1000 =^ GHz * 1000000).\n"));
-
-}
-
  static struct option set_opts[] = {
         { .name = "min",        .has_arg = required_argument,   .flag = NULL,   .val = 'd'},
         { .name = "max",        .has_arg = required_argument,   .flag = NULL,   .val = 'u'},
         { .name = "governor",   .has_arg = required_argument,   .flag = NULL,   .val = 'g'},
         { .name = "freq",       .has_arg = required_argument,   .flag = NULL,   .val = 'f'},
-       { .name = "help",       .has_arg = no_argument,         .flag = NULL,   .val = 'h'},
         { .name = "related",    .has_arg = no_argument,         .flag = NULL,   .val='r'},
         { },
  };
@@ -80,7 +57,6 @@ const struct freq_units def_units[] = {
  static void print_unknown_arg(void)
  {
         printf(_("invalid or unknown argument\n"));
-       freq_set_help();
  }
  
  static unsigned long string_to_frequency(const char *str)
@@ -231,14 +207,11 @@ int cmd_freq_set(int argc, char **argv)
  
         /* parameter parsing */
         do {
-               ret = getopt_long(argc, argv, "d:u:g:f:hr", set_opts, NULL);
+               ret = getopt_long(argc, argv, "d:u:g:f:r", set_opts, NULL);
                 switch (ret) {
                 case '?':
                         print_unknown_arg();
                         return -EINVAL;
-               case 'h':
-                       freq_set_help();
-                       return 0;
                 case -1:
                         cont = 0;
                         break;
diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c

index 70da3574f1e99940a33dfa4348a35a046de2c5b8..b028267c1376a6c63c455bbdabeb5c36218aee2a 100644 (file)
--- a/tools/power/cpupower/utils/cpuidle-info.c
+++ b/tools/power/cpupower/utils/cpuidle-info.c
@@ -139,30 +139,14 @@ static void proc_cpuidle_cpu_output(unsigned int cpu)
         }
  }
  
-/* --freq / -f */
-
-void idle_info_help(void)
-{
-       printf(_ ("Usage: cpupower idleinfo [options]\n"));
-       printf(_ ("Options:\n"));
-       printf(_ ("  -s, --silent         Only show general C-state information\n"));
-       printf(_ ("  -o, --proc           Prints out information like provided by the /proc/acpi/processor/*/power\n"
-              "                       interface in older kernels\n"));
-       printf(_ ("  -h, --help           Prints out this screen\n"));
-
-       printf("\n");
-}
-
  static struct option info_opts[] = {
         { .name = "silent",     .has_arg = no_argument, .flag = NULL,   .val = 's'},
         { .name = "proc",       .has_arg = no_argument, .flag = NULL,   .val = 'o'},
-       { .name = "help",       .has_arg = no_argument, .flag = NULL,   .val = 'h'},
         { },
  };
  
  static inline void cpuidle_exit(int fail)
  {
-       idle_info_help();
         exit(EXIT_FAILURE);
  }
  
@@ -174,7 +158,7 @@ int cmd_idle_info(int argc, char **argv)
         unsigned int cpu = 0;
  
         do {
-               ret = getopt_long(argc, argv, "hos", info_opts, NULL);
+               ret = getopt_long(argc, argv, "os", info_opts, NULL);
                 if (ret == -1)
                         break;
                 switch (ret) {
@@ -182,10 +166,6 @@ int cmd_idle_info(int argc, char **argv)
                         output_param = '?';
                         cont = 0;
                         break;
-               case 'h':
-                       output_param = 'h';
-                       cont = 0;
-                       break;
                 case 's':
                         verbose = 0;
                         break;
@@ -211,8 +191,6 @@ int cmd_idle_info(int argc, char **argv)
         case '?':
                 printf(_("invalid or unknown argument\n"));
                 cpuidle_exit(EXIT_FAILURE);
-       case 'h':
-               cpuidle_exit(EXIT_SUCCESS);
         }
  
         /* Default is: show output of CPU 0 only */
diff --git a/tools/power/cpupower/utils/cpupower-info.c b/tools/power/cpupower/utils/cpupower-info.c

index 85253cb7600ef8c7850bcd7332a6639be20b9bf3..3f68632c28c7bccc02f2d0a3266288e4a7bfd74a 100644 (file)
--- a/tools/power/cpupower/utils/cpupower-info.c
+++ b/tools/power/cpupower/utils/cpupower-info.c
@@ -16,31 +16,16 @@
  #include "helpers/helpers.h"
  #include "helpers/sysfs.h"
  
-void info_help(void)
-{
-       printf(_("Usage: cpupower info [ -b ] [ -m ] [ -s ]\n"));
-       printf(_("Options:\n"));
-       printf(_("  -b, --perf-bias    Gets CPU's power vs performance policy on some\n"
-              "                           Intel models [0-15], see manpage for details\n"));
-       printf(_("  -m, --sched-mc     Gets the kernel's multi core scheduler policy.\n"));
-       printf(_("  -s, --sched-smt    Gets the kernel's thread sibling scheduler policy.\n"));
-       printf(_("  -h, --help               Prints out this screen\n"));
-       printf(_("\nPassing no option will show all info, by default only on core 0\n"));
-       printf("\n");
-}
-
  static struct option set_opts[] = {
         { .name = "perf-bias",  .has_arg = optional_argument,   .flag = NULL,   .val = 'b'},
         { .name = "sched-mc",   .has_arg = optional_argument,   .flag = NULL,   .val = 'm'},
         { .name = "sched-smt",  .has_arg = optional_argument,   .flag = NULL,   .val = 's'},
-       { .name = "help",       .has_arg = no_argument,         .flag = NULL,   .val = 'h'},
         { },
  };
  
  static void print_wrong_arg_exit(void)
  {
         printf(_("invalid or unknown argument\n"));
-       info_help();
         exit(EXIT_FAILURE);
  }
  
@@ -64,11 +49,8 @@ int cmd_info(int argc, char **argv)
         textdomain(PACKAGE);
  
         /* parameter parsing */
-       while ((ret = getopt_long(argc, argv, "msbh", set_opts, NULL)) != -1) {
+       while ((ret = getopt_long(argc, argv, "msb", set_opts, NULL)) != -1) {
                 switch (ret) {
-               case 'h':
-                       info_help();
-                       return 0;
                 case 'b':
                         if (params.perf_bias)
                                 print_wrong_arg_exit();
diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c

index bc1b391e46f0ba785ed6ae0bc11f18cc7d3678e3..dc4de37621117f7dc6fe8d47669e761063c9eed0 100644 (file)
--- a/tools/power/cpupower/utils/cpupower-set.c
+++ b/tools/power/cpupower/utils/cpupower-set.c
@@ -17,30 +17,16 @@
  #include "helpers/sysfs.h"
  #include "helpers/bitmask.h"
  
-void set_help(void)
-{
-       printf(_("Usage: cpupower set [ -b val ] [ -m val ] [ -s val ]\n"));
-       printf(_("Options:\n"));
-       printf(_("  -b, --perf-bias [VAL]    Sets CPU's power vs performance policy on some\n"
-              "                           Intel models [0-15], see manpage for details\n"));
-       printf(_("  -m, --sched-mc  [VAL]    Sets the kernel's multi core scheduler policy.\n"));
-       printf(_("  -s, --sched-smt [VAL]    Sets the kernel's thread sibling scheduler policy.\n"));
-       printf(_("  -h, --help               Prints out this screen\n"));
-       printf("\n");
-}
-
  static struct option set_opts[] = {
         { .name = "perf-bias",  .has_arg = optional_argument,   .flag = NULL,   .val = 'b'},
         { .name = "sched-mc",   .has_arg = optional_argument,   .flag = NULL,   .val = 'm'},
         { .name = "sched-smt",  .has_arg = optional_argument,   .flag = NULL,   .val = 's'},
-       { .name = "help",       .has_arg = no_argument,         .flag = NULL,   .val = 'h'},
         { },
  };
  
  static void print_wrong_arg_exit(void)
  {
         printf(_("invalid or unknown argument\n"));
-       set_help();
         exit(EXIT_FAILURE);
  }
  
@@ -66,12 +52,9 @@ int cmd_set(int argc, char **argv)
  
         params.params = 0;
         /* parameter parsing */
-       while ((ret = getopt_long(argc, argv, "m:s:b:h",
+       while ((ret = getopt_long(argc, argv, "m:s:b:",
                                                 set_opts, NULL)) != -1) {
                 switch (ret) {
-               case 'h':
-                       set_help();
-                       return 0;
                 case 'b':
                         if (params.perf_bias)
                                 print_wrong_arg_exit();
@@ -110,10 +93,8 @@ int cmd_set(int argc, char **argv)
                 }
         };
  
-       if (!params.params) {
-               set_help();
-               return -EINVAL;
-       }
+       if (!params.params)
+               print_wrong_arg_exit();
  
         if (params.sched_mc) {
                 ret = sysfs_set_sched("mc", sched_mc);
diff --git a/tools/power/cpupower/utils/cpupower.c b/tools/power/cpupower/utils/cpupower.c

index 5844ae0f786f2a09ca3b01016e338c03585d3af4..52bee591c1c565f3bf9760505c7a824122d2537c 100644 (file)
--- a/tools/power/cpupower/utils/cpupower.c
+++ b/tools/power/cpupower/utils/cpupower.c
@@ -11,6 +11,7 @@
  #include <stdlib.h>
  #include <string.h>
  #include <unistd.h>
+#include <errno.h>
  
  #include "builtin.h"
  #include "helpers/helpers.h"
@@ -19,13 +20,12 @@
  struct cmd_struct {
         const char *cmd;
         int (*main)(int, const char **);
-       void (*usage)(void);
         int needs_root;
  };
  
  #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
  
-int cmd_help(int argc, const char **argv);
+static int cmd_help(int argc, const char **argv);
  
  /* Global cpu_info object available for all binaries
   * Info only retrieved from CPU 0
@@ -44,55 +44,66 @@ int be_verbose;
  static void print_help(void);
  
  static struct cmd_struct commands[] = {
-       { "frequency-info",     cmd_freq_info,  freq_info_help, 0       },
-       { "frequency-set",      cmd_freq_set,   freq_set_help,  1       },
-       { "idle-info",          cmd_idle_info,  idle_info_help, 0       },
-       { "set",                cmd_set,        set_help,       1       },
-       { "info",               cmd_info,       info_help,      0       },
-       { "monitor",            cmd_monitor,    monitor_help,   0       },
-       { "help",               cmd_help,       print_help,     0       },
-       /*      { "bench",      cmd_bench,      NULL,           1       }, */
+       { "frequency-info",     cmd_freq_info,  0       },
+       { "frequency-set",      cmd_freq_set,   1       },
+       { "idle-info",          cmd_idle_info,  0       },
+       { "set",                cmd_set,        1       },
+       { "info",               cmd_info,       0       },
+       { "monitor",            cmd_monitor,    0       },
+       { "help",               cmd_help,       0       },
+       /*      { "bench",      cmd_bench,      1       }, */
  };
  
-int cmd_help(int argc, const char **argv)
-{
-       unsigned int i;
-
-       if (argc > 1) {
-               for (i = 0; i < ARRAY_SIZE(commands); i++) {
-                       struct cmd_struct *p = commands + i;
-                       if (strcmp(p->cmd, argv[1]))
-                               continue;
-                       if (p->usage) {
-                               p->usage();
-                               return EXIT_SUCCESS;
-                       }
-               }
-       }
-       print_help();
-       if (argc == 1)
-               return EXIT_SUCCESS; /* cpupower help */
-       return EXIT_FAILURE;
-}
-
  static void print_help(void)
  {
         unsigned int i;
  
  #ifdef DEBUG
-       printf(_("cpupower [ -d ][ -c cpulist ] subcommand [ARGS]\n"));
-       printf(_("  -d, --debug      May increase output (stderr) on some subcommands\n"));
+       printf(_("Usage:\tcpupower [-d|--debug] [-c|--cpu cpulist ] <command> [<args>]\n"));
  #else
-       printf(_("cpupower [ -c cpulist ] subcommand [ARGS]\n"));
+       printf(_("Usage:\tcpupower [-c|--cpu cpulist ] <command> [<args>]\n"));
  #endif
-       printf(_("cpupower --version\n"));
-       printf(_("Supported subcommands are:\n"));
+       printf(_("Supported commands are:\n"));
         for (i = 0; i < ARRAY_SIZE(commands); i++)
                 printf("\t%s\n", commands[i].cmd);
-       printf(_("\nSome subcommands can make use of the -c cpulist option.\n"));
-       printf(_("Look at the general cpupower manpage how to use it\n"));
-       printf(_("and read up the subcommand's manpage whether it is supported.\n"));
-       printf(_("\nUse cpupower help subcommand for getting help for above subcommands.\n"));
+       printf(_("\nNot all commands can make use of the -c cpulist option.\n"));
+       printf(_("\nUse 'cpupower help <command>' for getting help for above commands.\n"));
+}
+
+static int print_man_page(const char *subpage)
+{
+       int len;
+       char *page;
+
+       len = 10; /* enough for "cpupower-" */
+       if (subpage != NULL)
+               len += strlen(subpage);
+
+       page = malloc(len);
+       if (!page)
+               return -ENOMEM;
+
+       sprintf(page, "cpupower");
+       if ((subpage != NULL) && strcmp(subpage, "help")) {
+               strcat(page, "-");
+               strcat(page, subpage);
+       }
+
+       execlp("man", "man", page, NULL);
+
+       /* should not be reached */
+       return -EINVAL;
+}
+
+static int cmd_help(int argc, const char **argv)
+{
+       if (argc > 1) {
+               print_man_page(argv[1]); /* exits within execlp() */
+               return EXIT_FAILURE;
+       }
+
+       print_help();
+       return EXIT_SUCCESS;
  }
  
  static void print_version(void)
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h

index 592ee362b877c92083d736d7699d7cb6d3e3250a..2747e738efb04d3fa1a53e7686162240cfc7a7ec 100644 (file)
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -16,11 +16,20 @@
  #include "helpers/bitmask.h"
  
  /* Internationalization ****************************/
+#ifdef NLS
+
  #define _(String) gettext(String)
  #ifndef gettext_noop
  #define gettext_noop(String) String
  #endif
  #define N_(String) gettext_noop(String)
+
+#else /* !NLS */
+
+#define _(String) String
+#define N_(String) String
+
+#endif
  /* Internationalization ****************************/
  
  extern int run_as_root;
@@ -96,6 +105,9 @@ struct cpupower_topology {
                 int pkg;
                 int core;
                 int cpu;
+
+               /* flags */
+               unsigned int is_online:1;
         } *core_info;
  };
  
diff --git a/tools/power/cpupower/utils/helpers/sysfs.c b/tools/power/cpupower/utils/helpers/sysfs.c

index 55e2466674c636d767dc3efdb32cf5c145d3fc06..c6343024a61158d094c0a831d369b7d1bd8cdb45 100644 (file)
--- a/tools/power/cpupower/utils/helpers/sysfs.c
+++ b/tools/power/cpupower/utils/helpers/sysfs.c
@@ -56,6 +56,56 @@ static unsigned int sysfs_write_file(const char *path,
         return (unsigned int) numwrite;
  }
  
+/*
+ * Detect whether a CPU is online
+ *
+ * Returns:
+ *     1 -> if CPU is online
+ *     0 -> if CPU is offline
+ *     negative errno values in error case
+ */
+int sysfs_is_cpu_online(unsigned int cpu)
+{
+       char path[SYSFS_PATH_MAX];
+       int fd;
+       ssize_t numread;
+       unsigned long long value;
+       char linebuf[MAX_LINE_LEN];
+       char *endp;
+       struct stat statbuf;
+
+       snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u", cpu);
+
+       if (stat(path, &statbuf) != 0)
+               return 0;
+
+       /*
+        * kernel without CONFIG_HOTPLUG_CPU
+        * -> cpuX directory exists, but not cpuX/online file
+        */
+       snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/online", cpu);
+       if (stat(path, &statbuf) != 0)
+               return 1;
+
+       fd = open(path, O_RDONLY);
+       if (fd == -1)
+               return -errno;
+
+       numread = read(fd, linebuf, MAX_LINE_LEN - 1);
+       if (numread < 1) {
+               close(fd);
+               return -EIO;
+       }
+       linebuf[numread] = '\0';
+       close(fd);
+
+       value = strtoull(linebuf, &endp, 0);
+       if (value > 1 || value < 0)
+               return -EINVAL;
+
+       return value;
+}
+
  /* CPUidle idlestate specific /sys/devices/system/cpu/cpuX/cpuidle/ access */
  
  /*
diff --git a/tools/power/cpupower/utils/helpers/sysfs.h b/tools/power/cpupower/utils/helpers/sysfs.h

index f9373e0906377d78235972950a00e42a6d0c3744..8cb797bbceb0b565a5f698e68c71b856dbe2b37c 100644 (file)
--- a/tools/power/cpupower/utils/helpers/sysfs.h
+++ b/tools/power/cpupower/utils/helpers/sysfs.h
@@ -7,6 +7,8 @@
  
  extern unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen);
  
+extern int sysfs_is_cpu_online(unsigned int cpu);
+
  extern unsigned long sysfs_get_idlestate_latency(unsigned int cpu,
                                                 unsigned int idlestate);
  extern unsigned long sysfs_get_idlestate_usage(unsigned int cpu,
diff --git a/tools/power/cpupower/utils/helpers/topology.c b/tools/power/cpupower/utils/helpers/topology.c

index 385ee5c7570cc6551c64436cd8cf65a213080188..4eae2c47ba48d20d9e62505e023d0dd0a1c7862f 100644 (file)
--- a/tools/power/cpupower/utils/helpers/topology.c
+++ b/tools/power/cpupower/utils/helpers/topology.c
@@ -41,6 +41,8 @@ struct cpuid_core_info {
         unsigned int pkg;
         unsigned int thread;
         unsigned int cpu;
+       /* flags */
+       unsigned int is_online:1;
  };
  
  static int __compare(const void *t1, const void *t2)
@@ -78,6 +80,8 @@ int get_cpu_topology(struct cpupower_topology *cpu_top)
                 return -ENOMEM;
         cpu_top->pkgs = cpu_top->cores = 0;
         for (cpu = 0; cpu < cpus; cpu++) {
+               cpu_top->core_info[cpu].cpu = cpu;
+               cpu_top->core_info[cpu].is_online = sysfs_is_cpu_online(cpu);
                 cpu_top->core_info[cpu].pkg =
                         sysfs_topology_read_file(cpu, "physical_package_id");
                 if ((int)cpu_top->core_info[cpu].pkg != -1 &&
@@ -85,7 +89,6 @@ int get_cpu_topology(struct cpupower_topology *cpu_top)
                         cpu_top->pkgs = cpu_top->core_info[cpu].pkg;
                 cpu_top->core_info[cpu].core =
                         sysfs_topology_read_file(cpu, "core_id");
-               cpu_top->core_info[cpu].cpu = cpu;
         }
         cpu_top->pkgs++;
  
diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c

index d048b96a61553d8ab54ca908acd7edeced287354..bcd22a1a397083d1b6e5ed7d311fcbcf8e01dffb 100644 (file)
--- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
@@ -134,7 +134,7 @@ static struct cpuidle_monitor *cpuidle_register(void)
         /* Assume idle state count is the same for all CPUs */
         cpuidle_sysfs_monitor.hw_states_num = sysfs_get_idlestate_count(0);
  
-       if (cpuidle_sysfs_monitor.hw_states_num == 0)
+       if (cpuidle_sysfs_monitor.hw_states_num <= 0)
                 return NULL;
  
         for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num++) {
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c

index ba4bf068380d9002bac92fd4d8370f2ebd20040c..0d6571e418db4b7eb73017f23be7041c64bbe56c 100644 (file)
--- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
@@ -43,6 +43,12 @@ static struct cpupower_topology cpu_top;
  /* ToDo: Document this in the manpage */
  static char range_abbr[RANGE_MAX] = { 'T', 'C', 'P', 'M', };
  
+static void print_wrong_arg_exit(void)
+{
+       printf(_("invalid or unknown argument\n"));
+       exit(EXIT_FAILURE);
+}
+
  long long timespec_diff_us(struct timespec start, struct timespec end)
  {
         struct timespec temp;
@@ -56,21 +62,6 @@ long long timespec_diff_us(struct timespec start, struct timespec end)
         return (temp.tv_sec * 1000000) + (temp.tv_nsec / 1000);
  }
  
-void monitor_help(void)
-{
-       printf(_("cpupower monitor: [-m <mon1>,[<mon2>],.. ] command\n"));
-       printf(_("cpupower monitor: [-m <mon1>,[<mon2>],.. ] [ -i interval_sec ]\n"));
-       printf(_("cpupower monitor: -l\n"));
-       printf(_("\t command: pass an arbitrary command to measure specific workload\n"));
-       printf(_("\t -i: time intervall to measure for in seconds (default 1)\n"));
-       printf(_("\t -l: list available CPU sleep monitors (for use with -m)\n"));
-       printf(_("\t -m: show specific CPU sleep monitors only (in same order)\n"));
-       printf(_("\t -h: print this help\n"));
-       printf("\n");
-       printf(_("only one of: -l, -m are allowed\nIf none of them is passed,"));
-       printf(_(" all supported monitors are shown\n"));
-}
-
  void print_n_spaces(int n)
  {
         int x;
@@ -149,6 +140,10 @@ void print_results(int topology_depth, int cpu)
         unsigned long long result;
         cstate_t s;
  
+       /* Be careful CPUs may got resorted for pkg value do not just use cpu */
+       if (!bitmask_isbitset(cpus_chosen, cpu_top.core_info[cpu].cpu))
+               return;
+
         if (topology_depth > 2)
                 printf("%4d|", cpu_top.core_info[cpu].pkg);
         if (topology_depth > 1)
@@ -190,9 +185,13 @@ void print_results(int topology_depth, int cpu)
                         }
                 }
         }
-       /* cpu offline */
-       if (cpu_top.core_info[cpu].pkg == -1 ||
-           cpu_top.core_info[cpu].core == -1) {
+       /*
+        * The monitor could still provide useful data, for example
+        * AMD HW counters partly sit in PCI config space.
+        * It's up to the monitor plug-in to check .is_online, this one
+        * is just for additional info.
+        */
+       if (!cpu_top.core_info[cpu].is_online) {
                 printf(_(" *is offline\n"));
                 return;
         } else
@@ -238,7 +237,6 @@ static void parse_monitor_param(char *param)
         if (hits == 0) {
                 printf(_("No matching monitor found in %s, "
                          "try -l option\n"), param);
-               monitor_help();
                 exit(EXIT_FAILURE);
         }
         /* Override detected/registerd monitors array with requested one */
@@ -335,37 +333,27 @@ static void cmdline(int argc, char *argv[])
         int opt;
         progname = basename(argv[0]);
  
-       while ((opt = getopt(argc, argv, "+hli:m:")) != -1) {
+       while ((opt = getopt(argc, argv, "+li:m:")) != -1) {
                 switch (opt) {
-               case 'h':
-                       monitor_help();
-                       exit(EXIT_SUCCESS);
                 case 'l':
-                       if (mode) {
-                               monitor_help();
-                               exit(EXIT_FAILURE);
-                       }
+                       if (mode)
+                               print_wrong_arg_exit();
                         mode = list;
                         break;
                 case 'i':
                         /* only allow -i with -m or no option */
-                       if (mode && mode != show) {
-                               monitor_help();
-                               exit(EXIT_FAILURE);
-                       }
+                       if (mode && mode != show)
+                               print_wrong_arg_exit();
                         interval = atoi(optarg);
                         break;
                 case 'm':
-                       if (mode) {
-                               monitor_help();
-                               exit(EXIT_FAILURE);
-                       }
+                       if (mode)
+                               print_wrong_arg_exit();
                         mode = show;
                         show_monitors_param = optarg;
                         break;
                 default:
-                       monitor_help();
-                       exit(EXIT_FAILURE);
+                       print_wrong_arg_exit();
                 }
         }
         if (!mode)
@@ -385,6 +373,10 @@ int cmd_monitor(int argc, char **argv)
                 return EXIT_FAILURE;
         }
  
+       /* Default is: monitor all CPUs */
+       if (bitmask_isallclear(cpus_chosen))
+               bitmask_setall(cpus_chosen);
+
         dprint("System has up to %d CPU cores\n", cpu_count);
  
         for (num = 0; all_monitors[num]; num++) {
diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c

index 63ca87a05e5ffba9400e8caf493c24b6fdb3e22d..5650ab5a2c206b05e23296ae035aae52b9a528ca 100644 (file)
--- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
@@ -22,12 +22,15 @@
  
  #define MSR_TSC        0x10
  
+#define MSR_AMD_HWCR 0xc0010015
+
  enum mperf_id { C0 = 0, Cx, AVG_FREQ, MPERF_CSTATE_COUNT };
  
  static int mperf_get_count_percent(unsigned int self_id, double *percent,
                                    unsigned int cpu);
  static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
                                 unsigned int cpu);
+static struct timespec time_start, time_end;
  
  static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = {
         {
@@ -54,19 +57,33 @@ static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = {
         },
  };
  
+enum MAX_FREQ_MODE { MAX_FREQ_SYSFS, MAX_FREQ_TSC_REF };
+static int max_freq_mode;
+/*
+ * The max frequency mperf is ticking at (in C0), either retrieved via:
+ *   1) calculated after measurements if we know TSC ticks at mperf/P0 frequency
+ *   2) cpufreq /sys/devices/.../cpu0/cpufreq/cpuinfo_max_freq at init time
+ * 1. Is preferred as it also works without cpufreq subsystem (e.g. on Xen)
+ */
+static unsigned long max_frequency;
+
  static unsigned long long tsc_at_measure_start;
  static unsigned long long tsc_at_measure_end;
-static unsigned long max_frequency;
  static unsigned long long *mperf_previous_count;
  static unsigned long long *aperf_previous_count;
  static unsigned long long *mperf_current_count;
  static unsigned long long *aperf_current_count;
+
  /* valid flag for all CPUs. If a MSR read failed it will be zero */
  static int *is_valid;
  
  static int mperf_get_tsc(unsigned long long *tsc)
  {
-       return read_msr(0, MSR_TSC, tsc);
+       int ret;
+       ret = read_msr(0, MSR_TSC, tsc);
+       if (ret)
+               dprint("Reading TSC MSR failed, returning %llu\n", *tsc);
+       return ret;
  }
  
  static int mperf_init_stats(unsigned int cpu)
@@ -97,36 +114,11 @@ static int mperf_measure_stats(unsigned int cpu)
         return 0;
  }
  
-/*
- * get_average_perf()
- *
- * Returns the average performance (also considers boosted frequencies)
- *
- * Input:
- *   aperf_diff: Difference of the aperf register over a time period
- *   mperf_diff: Difference of the mperf register over the same time period
- *   max_freq:   Maximum frequency (P0)
- *
- * Returns:
- *   Average performance over the time period
- */
-static unsigned long get_average_perf(unsigned long long aperf_diff,
-                                     unsigned long long mperf_diff)
-{
-       unsigned int perf_percent = 0;
-       if (((unsigned long)(-1) / 100) < aperf_diff) {
-               int shift_count = 7;
-               aperf_diff >>= shift_count;
-               mperf_diff >>= shift_count;
-       }
-       perf_percent = (aperf_diff * 100) / mperf_diff;
-       return (max_frequency * perf_percent) / 100;
-}
-
  static int mperf_get_count_percent(unsigned int id, double *percent,
                                    unsigned int cpu)
  {
         unsigned long long aperf_diff, mperf_diff, tsc_diff;
+       unsigned long long timediff;
  
         if (!is_valid[cpu])
                 return -1;
@@ -136,11 +128,19 @@ static int mperf_get_count_percent(unsigned int id, double *percent,
  
         mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu];
         aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu];
-       tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
  
-       *percent = 100.0 * mperf_diff / tsc_diff;
-       dprint("%s: mperf_diff: %llu, tsc_diff: %llu\n",
-              mperf_cstates[id].name, mperf_diff, tsc_diff);
+       if (max_freq_mode == MAX_FREQ_TSC_REF) {
+               tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
+               *percent = 100.0 * mperf_diff / tsc_diff;
+               dprint("%s: TSC Ref - mperf_diff: %llu, tsc_diff: %llu\n",
+                      mperf_cstates[id].name, mperf_diff, tsc_diff);
+       } else if (max_freq_mode == MAX_FREQ_SYSFS) {
+               timediff = timespec_diff_us(time_start, time_end);
+               *percent = 100.0 * mperf_diff / timediff;
+               dprint("%s: MAXFREQ - mperf_diff: %llu, time_diff: %llu\n",
+                      mperf_cstates[id].name, mperf_diff, timediff);
+       } else
+               return -1;
  
         if (id == Cx)
                 *percent = 100.0 - *percent;
@@ -154,7 +154,7 @@ static int mperf_get_count_percent(unsigned int id, double *percent,
  static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
                                 unsigned int cpu)
  {
-       unsigned long long aperf_diff, mperf_diff;
+       unsigned long long aperf_diff, mperf_diff, time_diff, tsc_diff;
  
         if (id != AVG_FREQ)
                 return 1;
@@ -165,11 +165,21 @@ static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
         mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu];
         aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu];
  
-       /* Return MHz for now, might want to return KHz if column width is more
-          generic */
-       *count = get_average_perf(aperf_diff, mperf_diff) / 1000;
-       dprint("%s: %llu\n", mperf_cstates[id].name, *count);
+       if (max_freq_mode == MAX_FREQ_TSC_REF) {
+               /* Calculate max_freq from TSC count */
+               tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
+               time_diff = timespec_diff_us(time_start, time_end);
+               max_frequency = tsc_diff / time_diff;
+       }
  
+       *count = max_frequency * ((double)aperf_diff / mperf_diff);
+       dprint("%s: Average freq based on %s maximum frequency:\n",
+              mperf_cstates[id].name,
+              (max_freq_mode == MAX_FREQ_TSC_REF) ? "TSC calculated" : "sysfs read");
+       dprint("%max_frequency: %lu", max_frequency);
+       dprint("aperf_diff: %llu\n", aperf_diff);
+       dprint("mperf_diff: %llu\n", mperf_diff);
+       dprint("avg freq:   %llu\n", *count);
         return 0;
  }
  
@@ -178,6 +188,7 @@ static int mperf_start(void)
         int cpu;
         unsigned long long dbg;
  
+       clock_gettime(CLOCK_REALTIME, &time_start);
         mperf_get_tsc(&tsc_at_measure_start);
  
         for (cpu = 0; cpu < cpu_count; cpu++)
@@ -193,32 +204,104 @@ static int mperf_stop(void)
         unsigned long long dbg;
         int cpu;
  
-       mperf_get_tsc(&tsc_at_measure_end);
-
         for (cpu = 0; cpu < cpu_count; cpu++)
                 mperf_measure_stats(cpu);
  
+       mperf_get_tsc(&tsc_at_measure_end);
+       clock_gettime(CLOCK_REALTIME, &time_end);
+
         mperf_get_tsc(&dbg);
         dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end);
  
         return 0;
  }
  
-struct cpuidle_monitor mperf_monitor;
-
-struct cpuidle_monitor *mperf_register(void)
+/*
+ * Mperf register is defined to tick at P0 (maximum) frequency
+ *
+ * Instead of reading out P0 which can be tricky to read out from HW,
+ * we use TSC counter if it reliably ticks at P0/mperf frequency.
+ *
+ * Still try to fall back to:
+ * /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq
+ * on older Intel HW without invariant TSC feature.
+ * Or on AMD machines where TSC does not tick at P0 (do not exist yet, but
+ * it's still double checked (MSR_AMD_HWCR)).
+ *
+ * On these machines the user would still get useful mperf
+ * stats when acpi-cpufreq driver is loaded.
+ */
+static int init_maxfreq_mode(void)
  {
+       int ret;
+       unsigned long long hwcr;
         unsigned long min;
  
-       if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF))
-               return NULL;
-
-       /* Assume min/max all the same on all cores */
+       if (!cpupower_cpu_info.caps & CPUPOWER_CAP_INV_TSC)
+               goto use_sysfs;
+
+       if (cpupower_cpu_info.vendor == X86_VENDOR_AMD) {
+               /* MSR_AMD_HWCR tells us whether TSC runs at P0/mperf
+                * freq.
+                * A test whether hwcr is accessable/available would be:
+                * (cpupower_cpu_info.family > 0x10 ||
+                *   cpupower_cpu_info.family == 0x10 &&
+                *   cpupower_cpu_info.model >= 0x2))
+                * This should be the case for all aperf/mperf
+                * capable AMD machines and is therefore safe to test here.
+                * Compare with Linus kernel git commit: acf01734b1747b1ec4
+                */
+               ret = read_msr(0, MSR_AMD_HWCR, &hwcr);
+               /*
+                * If the MSR read failed, assume a Xen system that did
+                * not explicitly provide access to it and assume TSC works
+               */
+               if (ret != 0) {
+                       dprint("TSC read 0x%x failed - assume TSC working\n",
+                              MSR_AMD_HWCR);
+                       return 0;
+               } else if (1 & (hwcr >> 24)) {
+                       max_freq_mode = MAX_FREQ_TSC_REF;
+                       return 0;
+               } else { /* Use sysfs max frequency if available */ }
+       } else if (cpupower_cpu_info.vendor == X86_VENDOR_INTEL) {
+               /*
+                * On Intel we assume mperf (in C0) is ticking at same
+                * rate than TSC
+                */
+               max_freq_mode = MAX_FREQ_TSC_REF;
+               return 0;
+       }
+use_sysfs:
         if (cpufreq_get_hardware_limits(0, &min, &max_frequency)) {
                 dprint("Cannot retrieve max freq from cpufreq kernel "
                        "subsystem\n");
-               return NULL;
+               return -1;
         }
+       max_freq_mode = MAX_FREQ_SYSFS;
+       return 0;
+}
+
+/*
+ * This monitor provides:
+ *
+ * 1) Average frequency a CPU resided in
+ *    This always works if the CPU has aperf/mperf capabilities
+ *
+ * 2) C0 and Cx (any sleep state) time a CPU resided in
+ *    Works if mperf timer stops ticking in sleep states which
+ *    seem to be the case on all current HW.
+ * Both is directly retrieved from HW registers and is independent
+ * from kernel statistics.
+ */
+struct cpuidle_monitor mperf_monitor;
+struct cpuidle_monitor *mperf_register(void)
+{
+       if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF))
+               return NULL;
+
+       if (init_maxfreq_mode())
+               return NULL;
  
         /* Free this at program termination */
         is_valid = calloc(cpu_count, sizeof(int));
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 20 Sep 2011 17:21:52 +0000 (10:21 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 20 Sep 2011 17:21:52 +0000 (10:21 -0700)