Merge branch 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 26 Jul 2008 00:29:03 +0000 (17:29 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 26 Jul 2008 00:29:03 +0000 (17:29 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 26 Jul 2008 00:29:03 +0000 (17:29 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 26 Jul 2008 00:29:03 +0000 (17:29 -0700)
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX

index 1977fab386566e23f501bea233f8f487d16f5c38..6de71308a9060b1a81c27aedb5b91042ff0b668b 100644 (file)
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -361,8 +361,6 @@ telephony/
         - directory with info on telephony (e.g. voice over IP) support.
  time_interpolators.txt
         - info on time interpolators.
-tipar.txt
-       - information about Parallel link cable for Texas Instruments handhelds.
  tty.txt
         - guide to the locking policies of the tty layer.
  uml/
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle

index 6caa146155788c8a67fc940c1879f2faf4fac1bb..1875e502f87205a1dcaf268d0af78127ee336144 100644 (file)
--- a/Documentation/CodingStyle
+++ b/Documentation/CodingStyle
@@ -474,25 +474,29 @@ make a good program).
  So, you can either get rid of GNU emacs, or change it to use saner
  values.  To do the latter, you can stick the following in your .emacs file:
  
-(defun linux-c-mode ()
-  "C mode with adjusted defaults for use with the Linux kernel."
-  (interactive)
-  (c-mode)
-  (c-set-style "K&R")
-  (setq tab-width 8)
-  (setq indent-tabs-mode t)
-  (setq c-basic-offset 8))
-
-This will define the M-x linux-c-mode command.  When hacking on a
-module, if you put the string -*- linux-c -*- somewhere on the first
-two lines, this mode will be automatically invoked. Also, you may want
-to add
-
-(setq auto-mode-alist (cons '("/usr/src/linux.*/.*\\.[ch]$" . linux-c-mode)
-                       auto-mode-alist))
-
-to your .emacs file if you want to have linux-c-mode switched on
-automagically when you edit source files under /usr/src/linux.
+(defun c-lineup-arglist-tabs-only (ignored)
+  "Line up argument lists by tabs, not spaces"
+  (let* ((anchor (c-langelem-pos c-syntactic-element))
+        (column (c-langelem-2nd-pos c-syntactic-element))
+        (offset (- (1+ column) anchor))
+        (steps (floor offset c-basic-offset)))
+    (* (max steps 1)
+       c-basic-offset)))
+
+(add-hook 'c-mode-hook
+          (lambda ()
+            (let ((filename (buffer-file-name)))
+              ;; Enable kernel mode for the appropriate files
+              (when (and filename
+                         (string-match "~/src/linux-trees" filename))
+                (setq indent-tabs-mode t)
+                (c-set-style "linux")
+                (c-set-offset 'arglist-cont-nonempty
+                              '(c-lineup-gcc-asm-reg
+                                c-lineup-arglist-tabs-only))))))
+
+This will make emacs go better with the kernel coding style for C
+files below ~/src/linux-trees.
  
  But even if you fail in getting emacs to do sane formatting, not
  everything is lost: use "indent".
diff --git a/Documentation/DocBook/procfs-guide.tmpl b/Documentation/DocBook/procfs-guide.tmpl

index 1fd6a1ec7591d5f4179cdf2a641f1b055c1f486a..8a5dc6e021ffa8b16b6dc2f997e115e77b1aaf70 100644 (file)
--- a/Documentation/DocBook/procfs-guide.tmpl
+++ b/Documentation/DocBook/procfs-guide.tmpl
@@ -29,12 +29,12 @@
  
      <revhistory>
        <revision>
-       <revnumber>1.0&nbsp;</revnumber>
+       <revnumber>1.0</revnumber>
         <date>May 30, 2001</date>
         <revremark>Initial revision posted to linux-kernel</revremark>
        </revision>
        <revision>
-       <revnumber>1.1&nbsp;</revnumber>
+       <revnumber>1.1</revnumber>
         <date>June 3, 2001</date>
         <revremark>Revised after comments from linux-kernel</revremark>
        </revision>
diff --git a/Documentation/accounting/delay-accounting.txt b/Documentation/accounting/delay-accounting.txt

index 1443cd71d2631241f13286e544296ef286d91c90..8a12f0730c94da018615aebc8e657daf65eadd80 100644 (file)
--- a/Documentation/accounting/delay-accounting.txt
+++ b/Documentation/accounting/delay-accounting.txt
@@ -11,6 +11,7 @@ the delays experienced by a task while
  a) waiting for a CPU (while being runnable)
  b) completion of synchronous block I/O initiated by the task
  c) swapping in pages
+d) memory reclaim
  
  and makes these statistics available to userspace through
  the taskstats interface.
@@ -41,7 +42,7 @@ this structure. See
       include/linux/taskstats.h
  for a description of the fields pertaining to delay accounting.
  It will generally be in the form of counters returning the cumulative
-delay seen for cpu, sync block I/O, swapin etc.
+delay seen for cpu, sync block I/O, swapin, memory reclaim etc.
  
  Taking the difference of two successive readings of a given
  counter (say cpu_delay_total) for a task will give the delay
@@ -94,7 +95,9 @@ CPU   count   real total      virtual total   delay total
         7876    92005750        100000000       24001500
  IO     count   delay total
         0       0
-MEM    count   delay total
+SWAP   count   delay total
+       0       0
+RECLAIM        count   delay total
         0       0
  
  Get delays seen in executing a given simple command
@@ -108,5 +111,7 @@ CPU count   real total      virtual total   delay total
         6       4000250         4000000         0
  IO     count   delay total
         0       0
-MEM    count   delay total
+SWAP   count   delay total
+       0       0
+RECLAIM        count   delay total
         0       0
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c

index 40121b5cca14cab3993000c53078ef2d8484f001..3f7755f3963f11fd31ba598a8f6199371c87202f 100644 (file)
--- a/Documentation/accounting/getdelays.c
+++ b/Documentation/accounting/getdelays.c
@@ -196,14 +196,18 @@ void print_delayacct(struct taskstats *t)
                "      %15llu%15llu%15llu%15llu\n"
                "IO    %15s%15s\n"
                "      %15llu%15llu\n"
-              "MEM   %15s%15s\n"
+              "SWAP  %15s%15s\n"
+              "      %15llu%15llu\n"
+              "RECLAIM  %12s%15s\n"
                "      %15llu%15llu\n",
                "count", "real total", "virtual total", "delay total",
                t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
                t->cpu_delay_total,
                "count", "delay total",
                t->blkio_count, t->blkio_delay_total,
-              "count", "delay total", t->swapin_count, t->swapin_delay_total);
+              "count", "delay total", t->swapin_count, t->swapin_delay_total,
+              "count", "delay total",
+              t->freepages_count, t->freepages_delay_total);
  }
  
  void task_context_switch_counts(struct taskstats *t)
diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt

index cd784f46bf8abefb9ed24257aaf5fcf4df2ecb00..b988d110db599e8474461984ac37c0be94b6ac3d 100644 (file)
--- a/Documentation/accounting/taskstats-struct.txt
+++ b/Documentation/accounting/taskstats-struct.txt
@@ -26,6 +26,8 @@ There are three different groups of fields in the struct taskstats:
  
  5) Time accounting for SMT machines
  
+6) Extended delay accounting fields for memory reclaim
+
  Future extension should add fields to the end of the taskstats struct, and
  should not change the relative position of each field within the struct.
  
@@ -170,4 +172,9 @@ struct taskstats {
         __u64   ac_utimescaled;         /* utime scaled on frequency etc */
         __u64   ac_stimescaled;         /* stime scaled on frequency etc */
         __u64   cpu_scaled_run_real_total; /* scaled cpu_run_real_total */
+
+6) Extended delay accounting fields for memory reclaim
+       /* Delay waiting for memory reclaim */
+       __u64   freepages_count;
+       __u64   freepages_delay_total;
  }
diff --git a/Documentation/bt8xxgpio.txt b/Documentation/bt8xxgpio.txt

new file mode 100644 (file)

index 0000000..d8297e4
--- /dev/null
+++ b/Documentation/bt8xxgpio.txt
@@ -0,0 +1,67 @@
+===============================================================
+==  BT8XXGPIO driver                                         ==
+==                                                           ==
+==  A driver for a selfmade cheap BT8xx based PCI GPIO-card  ==
+==                                                           ==
+==  For advanced documentation, see                          ==
+==  http://www.bu3sch.de/btgpio.php                          ==
+===============================================================
+
+
+A generic digital 24-port PCI GPIO card can be built out of an ordinary
+Brooktree bt848, bt849, bt878 or bt879 based analog TV tuner card. The
+Brooktree chip is used in old analog Hauppauge WinTV PCI cards. You can easily
+find them used for low prices on the net.
+
+The bt8xx chip does have 24 digital GPIO ports.
+These ports are accessible via 24 pins on the SMD chip package.
+
+
+==============================================
+==  How to physically access the GPIO pins  ==
+==============================================
+
+The are several ways to access these pins. One might unsolder the whole chip
+and put it on a custom PCI board, or one might only unsolder each individual
+GPIO pin and solder that to some tiny wire. As the chip package really is tiny
+there are some advanced soldering skills needed in any case.
+
+The physical pinouts are drawn in the following ASCII art.
+The GPIO pins are marked with G00-G23
+
+                                           G G G G G G G G G G G G     G G G G G G
+                                           0 0 0 0 0 0 0 0 0 0 1 1     1 1 1 1 1 1
+                                           0 1 2 3 4 5 6 7 8 9 0 1     2 3 4 5 6 7
+           | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+           ---------------------------------------------------------------------------
+         --|                               ^                                     ^   |--
+         --|                               pin 86                           pin 67   |--
+         --|                                                                         |--
+         --|                                                               pin 61 >  |-- G18
+         --|                                                                         |-- G19
+         --|                                                                         |-- G20
+         --|                                                                         |-- G21
+         --|                                                                         |-- G22
+         --|                                                               pin 56 >  |-- G23
+         --|                                                                         |--
+         --|                           Brooktree 878/879                             |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|                                                                         |--
+         --|   O                                                                     |--
+         --|                                                                         |--
+           ---------------------------------------------------------------------------
+           | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+           ^
+           This is pin 1
+
diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt

index 866b9cd9a9590d6b6b8c3d577038e8d51234082b..9b53d5827361fd647f3388212e648f502defc698 100644 (file)
--- a/Documentation/controllers/memory.txt
+++ b/Documentation/controllers/memory.txt
@@ -242,8 +242,7 @@ rmdir() if there are no tasks.
  1. Add support for accounting huge pages (as a separate controller)
  2. Make per-cgroup scanner reclaim not-shared pages first
  3. Teach controller to account for shared-pages
-4. Start reclamation when the limit is lowered
-5. Start reclamation in the background when the limit is
+4. Start reclamation in the background when the limit is
     not yet hit but the usage is getting closer
  
  Summary
diff --git a/Documentation/edac.txt b/Documentation/edac.txt

index a5c36842ecef4ec103ff7f44b5499ed9963db0c2..ced527388001845d1dd519a0beb4a04de1a1ea93 100644 (file)
--- a/Documentation/edac.txt
+++ b/Documentation/edac.txt
@@ -222,74 +222,9 @@ both csrow2 and csrow3 are populated, this indicates a dual ranked
  set of DIMMs for channels 0 and 1.
  
  
-Within each of the 'mc','mcX' and 'csrowX' directories are several
+Within each of the 'mcX' and 'csrowX' directories are several
  EDAC control and attribute files.
  
-
-============================================================================
-DIRECTORY 'mc'
-
-In directory 'mc' are EDAC system overall control and attribute files:
-
-
-Panic on UE control file:
-
-       'edac_mc_panic_on_ue'
-
-       An uncorrectable error will cause a machine panic.  This is usually
-       desirable.  It is a bad idea to continue when an uncorrectable error
-       occurs - it is indeterminate what was uncorrected and the operating
-       system context might be so mangled that continuing will lead to further
-       corruption. If the kernel has MCE configured, then EDAC will never
-       notice the UE.
-
-       LOAD TIME: module/kernel parameter: panic_on_ue=[0|1]
-
-       RUN TIME:  echo "1" >/sys/devices/system/edac/mc/edac_mc_panic_on_ue
-
-
-Log UE control file:
-
-       'edac_mc_log_ue'
-
-       Generate kernel messages describing uncorrectable errors.  These errors
-       are reported through the system message log system.  UE statistics
-       will be accumulated even when UE logging is disabled.
-
-       LOAD TIME: module/kernel parameter: log_ue=[0|1]
-
-       RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_log_ue
-
-
-Log CE control file:
-
-       'edac_mc_log_ce'
-
-       Generate kernel messages describing correctable errors.  These
-       errors are reported through the system message log system.
-       CE statistics will be accumulated even when CE logging is disabled.
-
-       LOAD TIME: module/kernel parameter: log_ce=[0|1]
-
-       RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_log_ce
-
-
-Polling period control file:
-
-       'edac_mc_poll_msec'
-
-       The time period, in milliseconds, for polling for error information.
-       Too small a value wastes resources.  Too large a value might delay
-       necessary handling of errors and might loose valuable information for
-       locating the error.  1000 milliseconds (once each second) is the current
-       default. Systems which require all the bandwidth they can get, may
-       increase this.
-
-       LOAD TIME: module/kernel parameter: poll_msec=[0|1]
-
-       RUN TIME: echo "1000" >/sys/devices/system/edac/mc/edac_mc_poll_msec
-
-
  ============================================================================
  'mcX' DIRECTORIES
  
@@ -537,7 +472,6 @@ Channel 1 DIMM Label control file:
         motherboard specific and determination of this information
         must occur in userland at this time.
  
-
  ============================================================================
  SYSTEM LOGGING
  
@@ -570,7 +504,6 @@ error type, a notice of "no info" and then an optional,
  driver-specific error message.
  
  
-
  ============================================================================
  PCI Bus Parity Detection
  
@@ -604,6 +537,74 @@ Enable/Disable PCI Parity checking control file:
         echo "0" >/sys/devices/system/edac/pci/check_pci_parity
  
  
+Parity Count:
+
+       'pci_parity_count'
+
+       This attribute file will display the number of parity errors that
+       have been detected.
+
+
+============================================================================
+MODULE PARAMETERS
+
+Panic on UE control file:
+
+       'edac_mc_panic_on_ue'
+
+       An uncorrectable error will cause a machine panic.  This is usually
+       desirable.  It is a bad idea to continue when an uncorrectable error
+       occurs - it is indeterminate what was uncorrected and the operating
+       system context might be so mangled that continuing will lead to further
+       corruption. If the kernel has MCE configured, then EDAC will never
+       notice the UE.
+
+       LOAD TIME: module/kernel parameter: edac_mc_panic_on_ue=[0|1]
+
+       RUN TIME:  echo "1" > /sys/module/edac_core/parameters/edac_mc_panic_on_ue
+
+
+Log UE control file:
+
+       'edac_mc_log_ue'
+
+       Generate kernel messages describing uncorrectable errors.  These errors
+       are reported through the system message log system.  UE statistics
+       will be accumulated even when UE logging is disabled.
+
+       LOAD TIME: module/kernel parameter: edac_mc_log_ue=[0|1]
+
+       RUN TIME: echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ue
+
+
+Log CE control file:
+
+       'edac_mc_log_ce'
+
+       Generate kernel messages describing correctable errors.  These
+       errors are reported through the system message log system.
+       CE statistics will be accumulated even when CE logging is disabled.
+
+       LOAD TIME: module/kernel parameter: edac_mc_log_ce=[0|1]
+
+       RUN TIME: echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ce
+
+
+Polling period control file:
+
+       'edac_mc_poll_msec'
+
+       The time period, in milliseconds, for polling for error information.
+       Too small a value wastes resources.  Too large a value might delay
+       necessary handling of errors and might loose valuable information for
+       locating the error.  1000 milliseconds (once each second) is the current
+       default. Systems which require all the bandwidth they can get, may
+       increase this.
+
+       LOAD TIME: module/kernel parameter: edac_mc_poll_msec=[0|1]
+
+       RUN TIME: echo "1000" > /sys/module/edac_core/parameters/edac_mc_poll_msec
+
  
  Panic on PCI PARITY Error:
  
@@ -614,21 +615,13 @@ Panic on PCI PARITY Error:
         error has been detected.
  
  
-       module/kernel parameter: panic_on_pci_parity=[0|1]
+       module/kernel parameter: edac_panic_on_pci_pe=[0|1]
  
         Enable:
-       echo "1" >/sys/devices/system/edac/pci/panic_on_pci_parity
+       echo "1" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe
  
         Disable:
-       echo "0" >/sys/devices/system/edac/pci/panic_on_pci_parity
-
-
-Parity Count:
-
-       'pci_parity_count'
-
-       This attribute file will display the number of parity errors that
-       have been detected.
+       echo "0" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe
  
  
  
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt

index 09c4a1efb8e3c1b422580c4ee44b70315d6f809d..721c71b86e06812c0f03a9063be324b1824897a1 100644 (file)
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -138,24 +138,6 @@ Who:       Kay Sievers <kay.sievers@suse.de>
  
  ---------------------------
  
-What:  find_task_by_pid
-When:  2.6.26
-Why:   With pid namespaces, calling this funciton will return the
-       wrong task when called from inside a namespace.
-
-       The best way to save a task pid and find a task by this
-       pid later, is to find this task's struct pid pointer (or get
-       it directly from the task) and call pid_task() later.
-
-       If someone really needs to get a task by its pid_t, then
-       he most likely needs the find_task_by_vpid() to get the
-       task from the same namespace as the current task is in, but
-       this may be not so in general.
-
-Who:   Pavel Emelyanov <xemul@openvz.org>
-
----------------------------
-
  What:  ACPI procfs interface
  When:  July 2008
  Why:   ACPI sysfs conversion should be finished by January 2008.
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt

index 2d5e1e582e13272bfaef2fbc494a156d829cc289..bbac4f1d90567c3f7ea0f46a869c091160390076 100644 (file)
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt
@@ -96,6 +96,14 @@ shortname=lower|win95|winnt|mixed
                         emulate the Windows 95 rule for create.
                  Default setting is `lower'.
  
+tz=UTC        -- Interpret timestamps as UTC rather than local time.
+                 This option disables the conversion of timestamps
+                 between local time (as used by Windows on FAT) and UTC
+                 (which Linux uses internally).  This is particuluarly
+                 useful when mounting devices (like digital cameras)
+                 that are set to UTC in order to avoid the pitfalls of
+                 local time.
+
  <bool>: 0,1,yes,no,true,false
  
  TODO
diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt

index c35ca9e40d4ca8ae0cbf7c06c639d8531ae8d9dd..18022e249c53dc1ad991d74d160551fa32f32070 100644 (file)
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -347,15 +347,12 @@ necessarily be nonportable.
  Dynamic definition of GPIOs is not currently standard; for example, as
  a side effect of configuring an add-on board with some GPIO expanders.
  
-These calls are purely for kernel space, but a userspace API could be built
-on top of them.
-
  
  GPIO implementor's framework (OPTIONAL)
  =======================================
  As noted earlier, there is an optional implementation framework making it
  easier for platforms to support different kinds of GPIO controller using
-the same programming interface.
+the same programming interface.  This framework is called "gpiolib".
  
  As a debugging aid, if debugfs is available a /sys/kernel/debug/gpio file
  will be found there.  That will list all the controllers registered through
@@ -392,11 +389,21 @@ either NULL or the label associated with that GPIO when it was requested.
  
  Platform Support
  ----------------
-To support this framework, a platform's Kconfig will "select HAVE_GPIO_LIB"
+To support this framework, a platform's Kconfig will "select" either
+ARCH_REQUIRE_GPIOLIB or ARCH_WANT_OPTIONAL_GPIOLIB
  and arrange that its <asm/gpio.h> includes <asm-generic/gpio.h> and defines
  three functions: gpio_get_value(), gpio_set_value(), and gpio_cansleep().
  They may also want to provide a custom value for ARCH_NR_GPIOS.
  
+ARCH_REQUIRE_GPIOLIB means that the gpio-lib code will always get compiled
+into the kernel on that architecture.
+
+ARCH_WANT_OPTIONAL_GPIOLIB means the gpio-lib code defaults to off and the user
+can enable it and build it into the kernel optionally.
+
+If neither of these options are selected, the platform does not support
+GPIOs through GPIO-lib and the code cannot be enabled by the user.
+
  Trivial implementations of those functions can directly use framework
  code, which always dispatches through the gpio_chip:
  
@@ -439,4 +446,120 @@ becomes available.  That may mean the device should not be registered until
  calls for that GPIO can work.  One way to address such dependencies is for
  such gpio_chip controllers to provide setup() and teardown() callbacks to
  board specific code; those board specific callbacks would register devices
-once all the necessary resources are available.
+once all the necessary resources are available, and remove them later when
+the GPIO controller device becomes unavailable.
+
+
+Sysfs Interface for Userspace (OPTIONAL)
+========================================
+Platforms which use the "gpiolib" implementors framework may choose to
+configure a sysfs user interface to GPIOs.  This is different from the
+debugfs interface, since it provides control over GPIO direction and
+value instead of just showing a gpio state summary.  Plus, it could be
+present on production systems without debugging support.
+
+Given approprate hardware documentation for the system, userspace could
+know for example that GPIO #23 controls the write protect line used to
+protect boot loader segments in flash memory.  System upgrade procedures
+may need to temporarily remove that protection, first importing a GPIO,
+then changing its output state, then updating the code before re-enabling
+the write protection.  In normal use, GPIO #23 would never be touched,
+and the kernel would have no need to know about it.
+
+Again depending on appropriate hardware documentation, on some systems
+userspace GPIO can be used to determine system configuration data that
+standard kernels won't know about.  And for some tasks, simple userspace
+GPIO drivers could be all that the system really needs.
+
+Note that standard kernel drivers exist for common "LEDs and Buttons"
+GPIO tasks:  "leds-gpio" and "gpio_keys", respectively.  Use those
+instead of talking directly to the GPIOs; they integrate with kernel
+frameworks better than your userspace code could.
+
+
+Paths in Sysfs
+--------------
+There are three kinds of entry in /sys/class/gpio:
+
+   -   Control interfaces used to get userspace control over GPIOs;
+
+   -   GPIOs themselves; and
+
+   -   GPIO controllers ("gpio_chip" instances).
+
+That's in addition to standard files including the "device" symlink.
+
+The control interfaces are write-only:
+
+    /sys/class/gpio/
+
+       "export" ... Userspace may ask the kernel to export control of
+               a GPIO to userspace by writing its number to this file.
+
+               Example:  "echo 19 > export" will create a "gpio19" node
+               for GPIO #19, if that's not requested by kernel code.
+
+       "unexport" ... Reverses the effect of exporting to userspace.
+
+               Example:  "echo 19 > unexport" will remove a "gpio19"
+               node exported using the "export" file.
+
+GPIO signals have paths like /sys/class/gpio/gpio42/ (for GPIO #42)
+and have the following read/write attributes:
+
+    /sys/class/gpio/gpioN/
+
+       "direction" ... reads as either "in" or "out".  This value may
+               normally be written.  Writing as "out" defaults to
+               initializing the value as low.  To ensure glitch free
+               operation, values "low" and "high" may be written to
+               configure the GPIO as an output with that initial value.
+
+               Note that this attribute *will not exist* if the kernel
+               doesn't support changing the direction of a GPIO, or
+               it was exported by kernel code that didn't explicitly
+               allow userspace to reconfigure this GPIO's direction.
+
+       "value" ... reads as either 0 (low) or 1 (high).  If the GPIO
+               is configured as an output, this value may be written;
+               any nonzero value is treated as high.
+
+GPIO controllers have paths like /sys/class/gpio/chipchip42/ (for the
+controller implementing GPIOs starting at #42) and have the following
+read-only attributes:
+
+    /sys/class/gpio/gpiochipN/
+
+       "base" ... same as N, the first GPIO managed by this chip
+
+       "label" ... provided for diagnostics (not always unique)
+
+       "ngpio" ... how many GPIOs this manges (N to N + ngpio - 1)
+
+Board documentation should in most cases cover what GPIOs are used for
+what purposes.  However, those numbers are not always stable; GPIOs on
+a daughtercard might be different depending on the base board being used,
+or other cards in the stack.  In such cases, you may need to use the
+gpiochip nodes (possibly in conjunction with schematics) to determine
+the correct GPIO number to use for a given signal.
+
+
+Exporting from Kernel code
+--------------------------
+Kernel code can explicitly manage exports of GPIOs which have already been
+requested using gpio_request():
+
+       /* export the GPIO to userspace */
+       int gpio_export(unsigned gpio, bool direction_may_change);
+
+       /* reverse gpio_export() */
+       void gpio_unexport();
+
+After a kernel driver requests a GPIO, it may only be made available in
+the sysfs interface by gpio_export().  The driver can control whether the
+signal direction may change.  This helps drivers prevent userspace code
+from accidentally clobbering important system state.
+
+This explicit exporting can help with debugging (by making some kinds
+of experiments easier), or can provide an always-there interface that's
+suitable for documenting as part of a board support package.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt

index 497a98dafdaa0157520fa9fdc61a7ed81cdb1e22..e7bea3e853044e54f3ee863ad3d295b0df84a522 100644 (file)
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2159,13 +2159,6 @@ and is between 256 and 4096 characters. It is defined in the file
                         <deci-seconds>: poll all this frequency
                         0: no polling (default)
  
-       tipar.timeout=  [HW,PPT]
-                       Set communications timeout in tenths of a second
-                       (default 15).
-
-       tipar.delay=    [HW,PPT]
-                       Set inter-bit delay in microseconds (default 10).
-
         tmscsim=        [HW,SCSI]
                         See comment before function dc390_setup() in
                         drivers/scsi/tmscsim.c.
diff --git a/Documentation/moxa-smartio b/Documentation/moxa-smartio

index fe24ecc6372e6b3482c3f39972ab9f70077d7b70..5337e80a5b96c6341e523394be6fedc58b87f6ed 100644 (file)
--- a/Documentation/moxa-smartio
+++ b/Documentation/moxa-smartio
@@ -1,14 +1,22 @@
  =============================================================================
-
-       MOXA Smartio Family Device Driver Ver 1.1 Installation Guide
-                   for Linux Kernel 2.2.x and 2.0.3x
-              Copyright (C) 1999, Moxa Technologies Co, Ltd.
+          MOXA Smartio/Industio Family Device Driver Installation Guide
+                   for Linux Kernel 2.4.x, 2.6.x
+              Copyright (C) 2008, Moxa Inc.
  =============================================================================
+Date: 01/21/2008
+
  Content
  
  1. Introduction
  2. System Requirement
  3. Installation
+   3.1 Hardware installation
+   3.2 Driver files
+   3.3 Device naming convention
+   3.4 Module driver configuration
+   3.5 Static driver configuration for Linux kernel 2.4.x and 2.6.x.
+   3.6 Custom configuration
+   3.7 Verify driver installation
  4. Utilities
  5. Setserial
  6. Troubleshooting
@@ -16,27 +24,48 @@ Content
  -----------------------------------------------------------------------------
  1. Introduction
  
-   The Smartio family Linux driver, Ver. 1.1, supports following multiport
+   The Smartio/Industio/UPCI family Linux driver supports following multiport
     boards.
  
-    -C104P/H/HS, C104H/PCI, C104HS/PCI, CI-104J 4 port multiport board.
-    -C168P/H/HS, C168H/PCI 8 port multiport board.
-
-   This driver has been modified a little and cleaned up from the Moxa
-   contributed driver code and merged into Linux 2.2.14pre. In particular
-   official major/minor numbers have been assigned which are different to
-   those the original Moxa supplied driver used.
+    - 2 ports multiport board
+       CP-102U, CP-102UL, CP-102UF
+       CP-132U-I, CP-132UL,
+       CP-132, CP-132I, CP132S, CP-132IS,
+       CI-132, CI-132I, CI-132IS,
+       (C102H, C102HI, C102HIS, C102P, CP-102, CP-102S)
+
+    - 4 ports multiport board
+       CP-104EL,
+       CP-104UL, CP-104JU,
+       CP-134U, CP-134U-I,
+       C104H/PCI, C104HS/PCI,
+       CP-114, CP-114I, CP-114S, CP-114IS, CP-114UL,
+       C104H, C104HS,
+       CI-104J, CI-104JS,
+       CI-134, CI-134I, CI-134IS,
+       (C114HI, CT-114I, C104P)
+       POS-104UL,
+       CB-114,
+       CB-134I
+
+    - 8 ports multiport board
+       CP-118EL, CP-168EL,
+       CP-118U, CP-168U,
+       C168H/PCI,
+       C168H, C168HS,
+       (C168P),
+       CB-108
  
     This driver and installation procedure have been developed upon Linux Kernel
-   2.2.5 and backward compatible to 2.0.3x. This driver supports Intel x86 and
-   Alpha hardware platform. In order to maintain compatibility, this version
-   has also been properly tested with RedHat, OpenLinux, TurboLinux and
-   S.u.S.E Linux. However, if compatibility problem occurs, please contact
-   Moxa at support@moxa.com.tw.
+   2.4.x and 2.6.x. This driver supports Intel x86 hardware platform. In order
+   to maintain compatibility, this version has also been properly tested with
+   RedHat, Mandrake, Fedora and S.u.S.E Linux. However, if compatibility problem
+   occurs, please contact Moxa at support@moxa.com.tw.
  
     In addition to device driver, useful utilities are also provided in this
     version. They are
-    - msdiag     Diagnostic program for detecting installed Moxa Smartio boards.
+    - msdiag     Diagnostic program for displaying installed Moxa
+                 Smartio/Industio boards.
      - msmon      Monitor program to observe data count and line status signals.
      - msterm     A simple terminal program which is useful in testing serial
                  ports.
@@ -47,8 +76,7 @@ Content
     GNU General Public License in this version. Please refer to GNU General
     Public License announcement in each source code file for more detail.
  
-   In Moxa's ftp sites, you may always find latest driver at
-   ftp://ftp.moxa.com  or ftp://ftp.moxa.com.tw.
+   In Moxa's Web sites, you may always find latest driver at http://web.moxa.com.
  
     This version of driver can be installed as Loadable Module (Module driver)
     or built-in into kernel (Static driver). You may refer to following
@@ -61,18 +89,27 @@ Content
  
  -----------------------------------------------------------------------------
  2. System Requirement
-   - Hardware platform: Intel x86 or Alpha machine
-   - Kernel version: 2.0.3x or 2.2.x
+   - Hardware platform: Intel x86 machine
+   - Kernel version: 2.4.x or 2.6.x
     - gcc version 2.72 or later
     - Maximum 4 boards can be installed in combination
  
  -----------------------------------------------------------------------------
  3. Installation
  
+   3.1 Hardware installation
+   3.2 Driver files
+   3.3 Device naming convention
+   3.4 Module driver configuration
+   3.5 Static driver configuration for Linux kernel 2.4.x, 2.6.x.
+   3.6 Custom configuration
+   3.7 Verify driver installation
+
+
     3.1 Hardware installation
  
-       There are two types of buses, ISA and PCI, for Smartio family multiport
-       board.
+       There are two types of buses, ISA and PCI, for Smartio/Industio
+       family multiport board.
  
         ISA board
         ---------
@@ -81,47 +118,57 @@ Content
         installation procedure in User's Manual before proceed any further.
         Please make sure the JP1 is open after the ISA board is set properly.
  
-       PCI board
-       ---------
+       PCI/UPCI board
+       --------------
         You may need to adjust IRQ usage in BIOS to avoid from IRQ conflict
         with other ISA devices. Please refer to hardware installation
         procedure in User's Manual in advance.
  
-       IRQ Sharing
+       PCI IRQ Sharing
         -----------
         Each port within the same multiport board shares the same IRQ. Up to
-       4 Moxa Smartio Family multiport boards can be installed together on
-       one system and they can share the same IRQ.
+       4 Moxa Smartio/Industio PCI Family multiport boards can be installed
+       together on one system and they can share the same IRQ.
+
  
-   3.2 Driver files and device naming convention
+   3.2 Driver files
  
         The driver file may be obtained from ftp, CD-ROM or floppy disk. The
         first step, anyway, is to copy driver file "mxser.tgz" into specified
         directory. e.g. /moxa. The execute commands as below.
  
+       # cd /
+       # mkdir moxa
         # cd /moxa
-       # tar xvf /dev/fd0 
+       # tar xvf /dev/fd0
+
         or
+
+       # cd /
+       # mkdir moxa
         # cd /moxa
         # cp /mnt/cdrom/<driver directory>/mxser.tgz .
         # tar xvfz mxser.tgz
  
+
+   3.3 Device naming convention
+
         You may find all the driver and utilities files in /moxa/mxser.
         Following installation procedure depends on the model you'd like to
-       run the driver. If you prefer module driver, please refer to 3.3.
-       If static driver is required, please refer to 3.4.
+       run the driver. If you prefer module driver, please refer to 3.4.
+       If static driver is required, please refer to 3.5.
  
         Dialin and callout port
         -----------------------
-       This driver remains traditional serial device properties. There're
+       This driver remains traditional serial device properties. There are
         two special file name for each serial port. One is dial-in port
         which is named "ttyMxx". For callout port, the naming convention
         is "cumxx".
  
         Device naming when more than 2 boards installed
         -----------------------------------------------
-       Naming convention for each Smartio multiport board is pre-defined
-       as below.
+       Naming convention for each Smartio/Industio multiport board is
+       pre-defined as below.
  
         Board Num.       Dial-in Port         Callout port
         1st board       ttyM0  - ttyM7        cum0  - cum7
@@ -129,6 +176,12 @@ Content
         3rd board       ttyM16 - ttyM23       cum16 - cum23
         4th board       ttyM24 - ttym31       cum24 - cum31
  
+
+       !!!!!!!!!!!!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+       Under Kernel 2.6 the cum Device is Obsolete. So use ttyM*
+       device instead.
+       !!!!!!!!!!!!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
         Board sequence
         --------------
         This driver will activate ISA boards according to the parameter set
@@ -138,69 +191,131 @@ Content
         For PCI boards, their sequence will be after ISA boards and C168H/PCI
         has higher priority than C104H/PCI boards.
  
-   3.3 Module driver configuration
+   3.4 Module driver configuration
         Module driver is easiest way to install. If you prefer static driver
         installation, please skip this paragraph.
-       1. Find "Makefile" in /moxa/mxser, then run
  
-         # make install
+
+       ------------- Prepare to use the MOXA driver--------------------
+       3.4.1 Create tty device with correct major number
+          Before using MOXA driver, your system must have the tty devices
+          which are created with driver's major number. We offer one shell
+          script "msmknod" to simplify the procedure.
+          This step is only needed to be executed once. But you still
+          need to do this procedure when:
+          a. You change the driver's major number. Please refer the "3.7"
+             section.
+          b. Your total installed MOXA boards number is changed. Maybe you
+             add/delete one MOXA board.
+          c. You want to change the tty name. This needs to modify the
+             shell script "msmknod"
+
+          The procedure is:
+         # cd /moxa/mxser/driver
+         # ./msmknod
+
+          This shell script will require the major number for dial-in
+          device and callout device to create tty device. You also need
+          to specify the total installed MOXA board number. Default major
+          numbers for dial-in device and callout device are 30, 35. If
+          you need to change to other number, please refer section "3.7"
+          for more detailed procedure.
+          Msmknod will delete any special files occupying the same device
+          naming.
+
+       3.4.2 Build the MOXA driver and utilities
+          Before using the MOXA driver and utilities, you need compile the
+          all the source code. This step is only need to be executed once.
+          But you still re-compile the source code if you modify the source
+          code. For example, if you change the driver's major number (see
+          "3.7" section), then you need to do this step again.
+
+          Find "Makefile" in /moxa/mxser, then run
+
+         # make clean; make install
+
+          !!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!
+         For Red Hat 9, Red Hat Enterprise Linux AS3/ES3/WS3 & Fedora Core1:
+         # make clean; make installsp1
+
+         For Red Hat Enterprise Linux AS4/ES4/WS4:
+         # make clean; make installsp2
+          !!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!
  
           The driver files "mxser.o" and utilities will be properly compiled
-         and copied to system directories respectively.Then run
+         and copied to system directories respectively.
  
-         # insmod mxser
+       ------------- Load MOXA driver--------------------
+       3.4.3 Load the MOXA driver
  
-         to activate the modular driver. You may run "lsmod" to check
-         if "mxser.o" is activated.
+         # modprobe mxser <argument>
  
-       2. Create special files by executing "msmknod".
-         # cd /moxa/mxser/driver
-         # ./msmknod
+         will activate the module driver. You may run "lsmod" to check
+         if "mxser" is activated. If the MOXA board is ISA board, the
+          <argument> is needed. Please refer to section "3.4.5" for more
+          information.
+
+
+       ------------- Load MOXA driver on boot --------------------
+       3.4.4 For the above description, you may manually execute
+          "modprobe mxser" to activate this driver and run
+         "rmmod mxser" to remove it.
+          However, it's better to have a boot time configuration to
+          eliminate manual operation. Boot time configuration can be
+          achieved by rc file. We offer one "rc.mxser" file to simplify
+          the procedure under "moxa/mxser/driver".
  
-         Default major numbers for dial-in device and callout device are
-         174, 175. Msmknod will delete any special files occupying the same
-         device naming.
+          But if you use ISA board, please modify the "modprobe ..." command
+          to add the argument (see "3.4.5" section). After modifying the
+          rc.mxser, please try to execute "/moxa/mxser/driver/rc.mxser"
+          manually to make sure the modification is ok. If any error
+          encountered, please try to modify again. If the modification is
+          completed, follow the below step.
  
-       3. Up to now, you may manually execute "insmod mxser" to activate
-         this driver and run "rmmod mxser" to remove it. However, it's
-         better to have a boot time configuration to eliminate manual
-         operation.
-         Boot time configuration can be achieved by rc file. Run following
-         command for setting rc files.
+         Run following command for setting rc files.
  
           # cd /moxa/mxser/driver
           # cp ./rc.mxser /etc/rc.d
           # cd /etc/rc.d
  
-         You may have to modify part of the content in rc.mxser to specify
-          parameters for ISA board. Please refer to rc.mxser for more detail.
-          Find "rc.serial". If "rc.serial" doesn't exist, create it by vi.
-         Add "rc.mxser" in last line. Next, open rc.local by vi
-         and append following content.
+         Check "rc.serial" is existed or not. If "rc.serial" doesn't exist,
+         create it by vi, run "chmod 755 rc.serial" to change the permission.
+         Add "/etc/rc.d/rc.mxser" in last line,
  
-         if [ -f /etc/rc.d/rc.serial ]; then
-            sh /etc/rc.d/rc.serial
-         fi
+          Reboot and check if moxa.o activated by "lsmod" command.
  
-       4. Reboot and check if mxser.o activated by "lsmod" command.
-       5. If you'd like to drive Smartio ISA boards in the system, you'll
-         have to add parameter to specify CAP address of given board while
-          activating "mxser.o". The format for parameters are as follows.
+       3.4.5. If you'd like to drive Smartio/Industio ISA boards in the system,
+          you'll have to add parameter to specify CAP address of given
+         board while activating "mxser.o". The format for parameters are
+         as follows.
  
-         insmod mxser ioaddr=0x???,0x???,0x???,0x???
+         modprobe mxser ioaddr=0x???,0x???,0x???,0x???
                                 |      |     |    |
                                 |      |     |    +- 4th ISA board
                                 |      |     +------ 3rd ISA board
                                 |      +------------ 2nd ISA board
                                 +------------------- 1st ISA board
  
-   3.4 Static driver configuration
+   3.5 Static driver configuration for Linux kernel 2.4.x and 2.6.x
+
+       Note: To use static driver, you must install the linux kernel
+             source package.
+
+       3.5.1 Backup the built-in driver in the kernel.
+          # cd /usr/src/linux/drivers/char
+          # mv mxser.c mxser.c.old
+
+          For Red Hat 7.x user, you need to create link:
+          # cd /usr/src
+          # ln -s linux-2.4 linux
  
-       1. Create link
+       3.5.2 Create link
           # cd /usr/src/linux/drivers/char
           # ln -s /moxa/mxser/driver/mxser.c mxser.c
  
-       2. Add CAP address list for ISA boards
+       3.5.3 Add CAP address list for ISA boards. For PCI boards user,
+          please skip this step.
+
           In module mode, the CAP address for ISA board is given by
           parameter. In static driver configuration, you'll have to
           assign it within driver's source code. If you will not
@@ -222,73 +337,55 @@ Content
              static int mxserBoardCAP[]
              = {0x280, 0x180, 0x00, 0x00};
  
-       3. Modify tty_io.c
-         # cd /usr/src/linux/drivers/char/
-         # vi tty_io.c
-           Find pty_init(), insert "mxser_init()" as
+       3.5.4 Setup kernel configuration
  
-           pty_init();
-           mxser_init();
+          Configure the kernel:
  
-       4. Modify tty.h
-         # cd /usr/src/linux/include/linux
-         # vi tty.h
-           Find extern int tty_init(void), insert "mxser_init()" as
+            # cd /usr/src/linux
+            # make menuconfig
  
-           extern int tty_init(void);
-           extern int mxser_init(void);
-     
-       5. Modify Makefile
-         # cd /usr/src/linux/drivers/char
-         # vi Makefile
-           Find L_OBJS := tty_io.o ...... random.o, add
-           "mxser.o" at last of this line as
-           L_OBJS := tty_io.o ....... mxser.o
+          You will go into a menu-driven system. Please select [Character
+          devices][Non-standard serial port support], enable the [Moxa
+          SmartIO support] driver with "[*]" for built-in (not "[M]"), then
+          select [Exit] to exit this program.
  
-       6. Rebuild kernel
-         The following are for Linux kernel rebuilding,for your reference only.
+       3.5.5 Rebuild kernel
+         The following are for Linux kernel rebuilding, for your
+          reference only.
           For appropriate details, please refer to the Linux document.
  
-         If 'lilo' utility is installed, please use 'make zlilo' to rebuild
-         kernel. If 'lilo' is not installed, please follow the following steps.
-
            a. cd /usr/src/linux
-          b. make clean                             /* take a few minutes */
-          c. make bzImage                 /* take probably 10-20 minutes */
-          d. Backup original boot kernel.                /* optional step */
-          e. cp /usr/src/linux/arch/i386/boot/bzImage /boot/vmlinuz
+          b. make clean             /* take a few minutes */
+          c. make dep               /* take a few minutes */
+          d. make bzImage           /* take probably 10-20 minutes */
+          e. make install           /* copy boot image to correct position */
            f. Please make sure the boot kernel (vmlinuz) is in the
-             correct position. If you use 'lilo' utility, you should
-             check /etc/lilo.conf 'image' item specified the path
-             which is the 'vmlinuz' path, or you will load wrong
-             (or old) boot kernel image (vmlinuz).
-          g. chmod 400 /vmlinuz
-          h. lilo
-          i. rdev -R /vmlinuz 1
-          j. sync
-
-         Note that if the result of "make zImage" is ERROR, then you have to
-         go back to Linux configuration Setup. Type "make config" in directory
-         /usr/src/linux or "setup".
-
-         Since system include file, /usr/src/linux/include/linux/interrupt.h,
-         is modified each time the MOXA driver is installed, kernel rebuilding
-         is inevitable. And it takes about 10 to 20 minutes depends on the
-         machine.
-
-       7. Make utility
-         # cd /moxa/mxser/utility
-         # make install
-       
-       8. Make special file
+             correct position.
+          g. If you use 'lilo' utility, you should check /etc/lilo.conf
+             'image' item specified the path which is the 'vmlinuz' path,
+             or you will load wrong (or old) boot kernel image (vmlinuz).
+             After checking /etc/lilo.conf, please run "lilo".
+
+         Note that if the result of "make bzImage" is ERROR, then you have to
+         go back to Linux configuration Setup. Type "make menuconfig" in
+          directory /usr/src/linux.
+
+
+       3.5.6 Make tty device and special file
            # cd /moxa/mxser/driver
            # ./msmknod
  
-       9. Reboot
+       3.5.7 Make utility
+         # cd /moxa/mxser/utility
+         # make clean; make install
+
+       3.5.8 Reboot
  
-   3.5 Custom configuration
+
+
+   3.6 Custom configuration
         Although this driver already provides you default configuration, you
-       still can change the device name and major number.The instruction to
+       still can change the device name and major number. The instruction to
         change these parameters are shown as below.
  
         Change Device name
@@ -306,33 +403,37 @@ Content
         2 free major numbers for this driver. There are 3 steps to change
         major numbers.
  
-       1. Find free major numbers
+       3.6.1 Find free major numbers
           In /proc/devices, you may find all the major numbers occupied
           in the system. Please select 2 major numbers that are available.
           e.g. 40, 45.
-       2. Create special files
+       3.6.2 Create special files
           Run /moxa/mxser/driver/msmknod to create special files with
           specified major numbers.
-       3. Modify driver with new major number
+       3.6.3 Modify driver with new major number
           Run vi to open /moxa/mxser/driver/mxser.c. Locate the line
           contains "MXSERMAJOR". Change the content as below.
           #define         MXSERMAJOR              40
           #define         MXSERCUMAJOR            45
-       4. Run # make install in /moxa/mxser/driver.
+       3.6.4 Run "make clean; make install" in /moxa/mxser/driver.
  
-   3.6 Verify driver installation
+   3.7 Verify driver installation
         You may refer to /var/log/messages to check the latest status
         log reported by this driver whenever it's activated.
+
  -----------------------------------------------------------------------------
  4. Utilities
     There are 3 utilities contained in this driver. They are msdiag, msmon and
     msterm. These 3 utilities are released in form of source code. They should
     be compiled into executable file and copied into /usr/bin.
  
+   Before using these utilities, please load driver (refer 3.4 & 3.5) and
+   make sure you had run the "msmknod" utility.
+
     msdiag - Diagnostic
     --------------------
-   This utility provides the function to detect what Moxa Smartio multiport
-   board exists in the system.
+   This utility provides the function to display what Moxa Smartio/Industio
+   board found by driver in the system.
  
     msmon - Port Monitoring
     -----------------------
@@ -353,12 +454,13 @@ Content
     application, for example, sending AT command to a modem connected to the
     port or used as a terminal for login purpose. Note that this is only a
     dumb terminal emulation without handling full screen operation.
+
  -----------------------------------------------------------------------------
  5. Setserial
  
     Supported Setserial parameters are listed as below.
  
-   uart          set UART type(16450-->disable FIFO, 16550A-->enable FIFO)
+   uart                  set UART type(16450-->disable FIFO, 16550A-->enable FIFO)
     close_delay   set the amount of time(in 1/100 of a second) that DTR
                   should be kept low while being closed.
     closing_wait   set the amount of time(in 1/100 of a second) that the
@@ -366,7 +468,13 @@ Content
                   being closed, before the receiver is disable.
     spd_hi        Use  57.6kb  when  the application requests 38.4kb.
     spd_vhi       Use  115.2kb  when  the application requests 38.4kb.
+   spd_shi       Use  230.4kb  when  the application requests 38.4kb.
+   spd_warp      Use  460.8kb  when  the application requests 38.4kb.
     spd_normal    Use  38.4kb  when  the application requests 38.4kb.
+   spd_cust      Use  the custom divisor to set the speed when  the
+                 application requests 38.4kb.
+   divisor       This option set the custom divison.
+   baud_base     This option set the base baud rate.
  
  -----------------------------------------------------------------------------
  6. Troubleshooting
@@ -375,8 +483,9 @@ Content
     possible. If all the possible solutions fail, please contact our technical
     support team to get more help.
  
-   Error msg: More than 4 Moxa Smartio family boards found. Fifth board and
-             after are ignored.
+
+   Error msg: More than 4 Moxa Smartio/Industio family boards found. Fifth board
+              and after are ignored.
     Solution:
     To avoid this problem, please unplug fifth and after board, because Moxa
     driver supports up to 4 boards.
@@ -384,7 +493,7 @@ Content
     Error msg: Request_irq fail, IRQ(?) may be conflict with another device.
     Solution:
     Other PCI or ISA devices occupy the assigned IRQ. If you are not sure
-   which device causes the situation,please check /proc/interrupts to find
+   which device causes the situation, please check /proc/interrupts to find
     free IRQ and simply change another free IRQ for Moxa board.
  
     Error msg: Board #: C1xx Series(CAP=xxx) interrupt number invalid.
@@ -397,15 +506,18 @@ Content
     Moxa ISA board needs an interrupt vector.Please refer to user's manual
     "Hardware Installation" chapter to set interrupt vector.
  
-   Error msg: Couldn't install MOXA Smartio family driver!
+   Error msg: Couldn't install MOXA Smartio/Industio family driver!
     Solution:
     Load Moxa driver fail, the major number may conflict with other devices.
-   Please refer to previous section 3.5 to change a free major number for
+   Please refer to previous section 3.7 to change a free major number for
     Moxa driver.
  
-   Error msg: Couldn't install MOXA Smartio family callout driver!
+   Error msg: Couldn't install MOXA Smartio/Industio family callout driver!
     Solution:
     Load Moxa callout driver fail, the callout device major number may
-   conflict with other devices. Please refer to previous section 3.5 to
+   conflict with other devices. Please refer to previous section 3.7 to
     change a free callout device major number for Moxa driver.
+
+
  -----------------------------------------------------------------------------
+
diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt

index ea1b70b357936b6e32ebfb8e3c7ae180f45f9209..99514ced82c564bb4cefa3e14468b8f80d9128bc 100644 (file)
--- a/Documentation/powerpc/booting-without-of.txt
+++ b/Documentation/powerpc/booting-without-of.txt
@@ -59,6 +59,7 @@ Table of Contents
        p) Freescale Synchronous Serial Interface
           q) USB EHCI controllers
        r) MDIO on GPIOs
+      s) SPI busses
  
    VII - Marvell Discovery mv64[345]6x System Controller chips
      1) The /system-controller node
@@ -1883,6 +1884,62 @@ platforms are moved over to use the flattened-device-tree model.
                          &qe_pio_c 6>;
         };
  
+    s) SPI (Serial Peripheral Interface) busses
+
+    SPI busses can be described with a node for the SPI master device
+    and a set of child nodes for each SPI slave on the bus.  For this
+    discussion, it is assumed that the system's SPI controller is in
+    SPI master mode.  This binding does not describe SPI controllers
+    in slave mode.
+
+    The SPI master node requires the following properties:
+    - #address-cells  - number of cells required to define a chip select
+                       address on the SPI bus.
+    - #size-cells     - should be zero.
+    - compatible      - name of SPI bus controller following generic names
+                       recommended practice.
+    No other properties are required in the SPI bus node.  It is assumed
+    that a driver for an SPI bus device will understand that it is an SPI bus.
+    However, the binding does not attempt to define the specific method for
+    assigning chip select numbers.  Since SPI chip select configuration is
+    flexible and non-standardized, it is left out of this binding with the
+    assumption that board specific platform code will be used to manage
+    chip selects.  Individual drivers can define additional properties to
+    support describing the chip select layout.
+
+    SPI slave nodes must be children of the SPI master node and can
+    contain the following properties.
+    - reg             - (required) chip select address of device.
+    - compatible      - (required) name of SPI device following generic names
+                       recommended practice
+    - spi-max-frequency - (required) Maximum SPI clocking speed of device in Hz
+    - spi-cpol        - (optional) Empty property indicating device requires
+                       inverse clock polarity (CPOL) mode
+    - spi-cpha        - (optional) Empty property indicating device requires
+                       shifted clock phase (CPHA) mode
+
+    SPI example for an MPC5200 SPI bus:
+               spi@f00 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "fsl,mpc5200b-spi","fsl,mpc5200-spi";
+                       reg = <0xf00 0x20>;
+                       interrupts = <2 13 0 2 14 0>;
+                       interrupt-parent = <&mpc5200_pic>;
+
+                       ethernet-switch@0 {
+                               compatible = "micrel,ks8995m";
+                               spi-max-frequency = <1000000>;
+                               reg = <0>;
+                       };
+
+                       codec@1 {
+                               compatible = "ti,tlv320aic26";
+                               spi-max-frequency = <100000>;
+                               reg = <1>;
+                       };
+               };
+
  VII - Marvell Discovery mv64[345]6x System Controller chips
  ===========================================================
  
diff --git a/Documentation/unaligned-memory-access.txt b/Documentation/unaligned-memory-access.txt

index b0472ac5226a38ff2f740c185dcfdd5676dbcf97..f866c72291bf4ce2e29a9a6ffd1c40ac740d4bb7 100644 (file)
--- a/Documentation/unaligned-memory-access.txt
+++ b/Documentation/unaligned-memory-access.txt
@@ -218,9 +218,35 @@ If use of such macros is not convenient, another option is to use memcpy(),
  where the source or destination (or both) are of type u8* or unsigned char*.
  Due to the byte-wise nature of this operation, unaligned accesses are avoided.
  
+
+Alignment vs. Networking
+========================
+
+On architectures that require aligned loads, networking requires that the IP
+header is aligned on a four-byte boundary to optimise the IP stack. For
+regular ethernet hardware, the constant NET_IP_ALIGN is used. On most
+architectures this constant has the value 2 because the normal ethernet
+header is 14 bytes long, so in order to get proper alignment one needs to
+DMA to an address which can be expressed as 4*n + 2. One notable exception
+here is powerpc which defines NET_IP_ALIGN to 0 because DMA to unaligned
+addresses can be very expensive and dwarf the cost of unaligned loads.
+
+For some ethernet hardware that cannot DMA to unaligned addresses like
+4*n+2 or non-ethernet hardware, this can be a problem, and it is then
+required to copy the incoming frame into an aligned buffer. Because this is
+unnecessary on architectures that can do unaligned accesses, the code can be
+made dependent on CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS like so:
+
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+       skb = original skb
+#else
+       skb = copy skb
+#endif
+
  --
-Author: Daniel Drake <dsd@gentoo.org>
+Authors: Daniel Drake <dsd@gentoo.org>,
+         Johannes Berg <johannes@sipsolutions.net>
  With help from: Alan Cox, Avuton Olrich, Heikki Orsila, Jan Engelhardt,
-Johannes Berg, Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock,
-Uli Kunitz, Vadim Lobanov
+Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock, Uli Kunitz,
+Vadim Lobanov
  
diff --git a/MAINTAINERS b/MAINTAINERS

index be05ef9b7b429541fcff3a55f9355160e1e44dcb..4cbf6016a9b98059f4b4691a9240b646a740daf6 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1043,6 +1043,12 @@ M:       fujita.tomonori@lab.ntt.co.jp
  L:     linux-scsi@vger.kernel.org
  S:     Supported
  
+BT8XXGPIO DRIVER
+P:     Michael Buesch
+M:     mb@bu3sch.de
+W:     http://bu3sch.de/btgpio.php
+S:     Maintained
+
  BTTV VIDEO4LINUX DRIVER
  P:     Mauro Carvalho Chehab
  M:     mchehab@infradead.org
diff --git a/Makefile b/Makefile

index 4bcd1cf90cb103e8578b4692ea7e75470f8cbe77..3cad7db5eba7a7570b392f5d5b4c99b794ee71e5 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1061,6 +1061,7 @@ modules: $(vmlinux-dirs) $(if $(KBUILD_BUILTIN),vmlinux)
         $(Q)$(AWK) '!x[$$0]++' $(vmlinux-dirs:%=$(objtree)/%/modules.order) > $(objtree)/modules.order
         @echo '  Building modules, stage 2.';
         $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
+       $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.fwinst obj=firmware __fw_modbuild
  
  
  # Target to prepare building external modules
diff --git a/arch/Kconfig b/arch/Kconfig

index 6093c0be58b017a1a325d2b4c29b0f6ff1e771ba..b0fabfa864ffa12be40160f9b2c6d3b1010ab149 100644 (file)
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -27,6 +27,25 @@ config KPROBES
           for kernel debugging, non-intrusive instrumentation and testing.
           If in doubt, say "N".
  
+config HAVE_EFFICIENT_UNALIGNED_ACCESS
+       def_bool n
+       help
+         Some architectures are unable to perform unaligned accesses
+         without the use of get_unaligned/put_unaligned. Others are
+         unable to perform such accesses efficiently (e.g. trap on
+         unaligned access and require fixing it up in the exception
+         handler.)
+
+         This symbol should be selected by an architecture if it can
+         perform unaligned accesses efficiently to allow different
+         code paths to be selected for these cases. Some network
+         drivers, for example, could opt to not fix up alignment
+         problems with received packets if doing so would not help
+         much.
+
+         See Documentation/unaligned-memory-access.txt for more
+         information on the topic of unaligned memory accesses.
+
  config KRETPROBES
         def_bool y
         depends on KPROBES && HAVE_KRETPROBES
diff --git a/arch/alpha/boot/misc.c b/arch/alpha/boot/misc.c

index c00646b25f6e961862900c8e13bd69cb31a89823..3047a1b3a517160f4c016c032f30549e3c20ca19 100644 (file)
--- a/arch/alpha/boot/misc.c
+++ b/arch/alpha/boot/misc.c
@@ -78,8 +78,6 @@ static unsigned outcnt;               /* bytes in output buffer */
  static int  fill_inbuf(void);
  static void flush_window(void);
  static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
  
  static char *input_data;
  static int  input_data_size;
@@ -88,51 +86,18 @@ static uch *output_data;
  static ulg output_ptr;
  static ulg bytes_out;
  
-static void *malloc(int size);
-static void free(void *where);
  static void error(char *m);
  static void gzip_mark(void **);
  static void gzip_release(void **);
  
  extern int end;
  static ulg free_mem_ptr;
-static ulg free_mem_ptr_end;
+static ulg free_mem_end_ptr;
  
  #define HEAP_SIZE 0x3000
  
  #include "../../../lib/inflate.c"
  
-static void *malloc(int size)
-{
-       void *p;
-
-       if (size <0) error("Malloc error");
-       if (free_mem_ptr <= 0) error("Memory error");
-
-       free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
-       p = (void *)free_mem_ptr;
-       free_mem_ptr += size;
-
-       if (free_mem_ptr >= free_mem_ptr_end)
-               error("Out of memory");
-       return p;
-}
-
-static void free(void *where)
-{ /* gzip_mark & gzip_release do the free */
-}
-
-static void gzip_mark(void **ptr)
-{
-       *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
-       free_mem_ptr = (long) *ptr;
-}
-
  /* ===========================================================================
   * Fill the input buffer. This is called only when the buffer is empty
   * and at least one byte is really needed.
@@ -193,7 +158,7 @@ decompress_kernel(void *output_start,
  
         /* FIXME FIXME FIXME */
         free_mem_ptr            = (ulg)output_start + ksize;
-       free_mem_ptr_end        = (ulg)output_start + ksize + 0x200000;
+       free_mem_end_ptr        = (ulg)output_start + ksize + 0x200000;
         /* FIXME FIXME FIXME */
  
         /* put in temp area to reduce initial footprint */
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig

index 6fb4f03369f2715271d5a706ce32d9a4d4990529..dabb015aa40b48ba3647f7f1cbce341205a7dc3f 100644 (file)
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -268,7 +268,7 @@ config ARCH_EP93XX
         select GENERIC_GPIO
         select HAVE_CLK
         select HAVE_CLK
-       select HAVE_GPIO_LIB
+       select ARCH_REQUIRE_GPIOLIB
         help
           This enables support for the Cirrus EP93xx series of CPUs.
  
@@ -447,7 +447,7 @@ config ARCH_PXA
         select ARCH_MTD_XIP
         select GENERIC_GPIO
         select HAVE_CLK
-       select HAVE_GPIO_LIB
+       select ARCH_REQUIRE_GPIOLIB
         select GENERIC_TIME
         select GENERIC_CLOCKEVENTS
         select TICK_ONESHOT
@@ -479,7 +479,7 @@ config ARCH_SA1100
         select GENERIC_CLOCKEVENTS
         select HAVE_CLK
         select TICK_ONESHOT
-       select HAVE_GPIO_LIB
+       select ARCH_REQUIRE_GPIOLIB
         help
           Support for StrongARM 11x0 based boards.
  
@@ -522,7 +522,7 @@ config ARCH_OMAP
         bool "TI OMAP"
         select GENERIC_GPIO
         select HAVE_CLK
-       select HAVE_GPIO_LIB
+       select ARCH_REQUIRE_GPIOLIB
         select GENERIC_TIME
         select GENERIC_CLOCKEVENTS
         help
diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c

index 9b444022cb9bc76933f1fdba6dffb294723fd7ac..7145cc7c04f0b8e36fb4b86205602fa7e0618bac 100644 (file)
--- a/arch/arm/boot/compressed/misc.c
+++ b/arch/arm/boot/compressed/misc.c
@@ -217,8 +217,6 @@ static unsigned outcnt;             /* bytes in output buffer */
  static int  fill_inbuf(void);
  static void flush_window(void);
  static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
  
  extern char input_data[];
  extern char input_data_end[];
@@ -227,64 +225,21 @@ static uch *output_data;
  static ulg output_ptr;
  static ulg bytes_out;
  
-static void *malloc(int size);
-static void free(void *where);
  static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
  
  static void putstr(const char *);
  
  extern int end;
  static ulg free_mem_ptr;
-static ulg free_mem_ptr_end;
+static ulg free_mem_end_ptr;
  
-#define HEAP_SIZE 0x3000
-
-#include "../../../../lib/inflate.c"
-
-#ifndef STANDALONE_DEBUG
-static void *malloc(int size)
-{
-       void *p;
-
-       if (size <0) error("Malloc error");
-       if (free_mem_ptr <= 0) error("Memory error");
-
-       free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
-       p = (void *)free_mem_ptr;
-       free_mem_ptr += size;
-
-       if (free_mem_ptr >= free_mem_ptr_end)
-               error("Out of memory");
-       return p;
-}
-
-static void free(void *where)
-{ /* gzip_mark & gzip_release do the free */
-}
-
-static void gzip_mark(void **ptr)
-{
-       arch_decomp_wdog();
-       *ptr = (void *) free_mem_ptr;
-}
+#ifdef STANDALONE_DEBUG
+#define NO_INFLATE_MALLOC
+#endif
  
-static void gzip_release(void **ptr)
-{
-       arch_decomp_wdog();
-       free_mem_ptr = (long) *ptr;
-}
-#else
-static void gzip_mark(void **ptr)
-{
-}
+#define ARCH_HAS_DECOMP_WDOG
  
-static void gzip_release(void **ptr)
-{
-}
-#endif
+#include "../../../../lib/inflate.c"
  
  /* ===========================================================================
   * Fill the input buffer. This is called only when the buffer is empty
@@ -348,7 +303,7 @@ decompress_kernel(ulg output_start, ulg free_mem_ptr_p, ulg free_mem_ptr_end_p,
  {
         output_data             = (uch *)output_start;  /* Points to kernel start */
         free_mem_ptr            = free_mem_ptr_p;
-       free_mem_ptr_end        = free_mem_ptr_end_p;
+       free_mem_end_ptr        = free_mem_ptr_end_p;
         __machine_arch_type     = arch_id;
  
         arch_decomp_setup();
diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c

index 5ee39e10c8d18a37eb8004e91b0e5c046bd2eff7..d28513f14d05906cec08a849128f76edaaf5ad16 100644 (file)
--- a/arch/arm/kernel/kprobes.c
+++ b/arch/arm/kernel/kprobes.c
@@ -296,8 +296,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
         unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
  
         INIT_HLIST_HEAD(&empty_rp);
-       spin_lock_irqsave(&kretprobe_lock, flags);
-       head = kretprobe_inst_table_head(current);
+       kretprobe_hash_lock(current, &head, &flags);
  
         /*
          * It is possible to have multiple instances associated with a given
@@ -337,7 +336,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
         }
  
         kretprobe_assert(ri, orig_ret_address, trampoline_address);
-       spin_unlock_irqrestore(&kretprobe_lock, flags);
+       kretprobe_hash_unlock(current, &flags);
  
         hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
                 hlist_del(&ri->hlist);
@@ -347,7 +346,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
         return (void *)orig_ret_address;
  }
  
-/* Called with kretprobe_lock held. */
  void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
                                       struct pt_regs *regs)
  {
diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c

index 1903a3491ee9ff7f330e90a3c9a7df5cd4897d22..d8e9c2c3f0f684ad8d9375744a36579363497d37 100644 (file)
--- a/arch/arm/plat-omap/gpio.c
+++ b/arch/arm/plat-omap/gpio.c
@@ -1488,6 +1488,9 @@ static int __init _omap_gpio_init(void)
                 bank->chip.set = gpio_set;
                 if (bank_is_mpuio(bank)) {
                         bank->chip.label = "mpuio";
+#ifdef CONFIG_ARCH_OMAP1
+                       bank->chip.dev = &omap_mpuio_device.dev;
+#endif
                         bank->chip.base = OMAP_MPUIO(0);
                 } else {
                         bank->chip.label = "gpio";
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig

index df4adefedb422696bc71f853ea0290e1bee81d86..7c239a916275b6b66b326e7fc93c6207bdbaf270 100644 (file)
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig
@@ -88,7 +88,7 @@ config PLATFORM_AT32AP
         select SUBARCH_AVR32B
         select MMU
         select PERFORMANCE_COUNTERS
-       select HAVE_GPIO_LIB
+       select ARCH_REQUIRE_GPIOLIB
         select GENERIC_ALLOCATOR
  
  #
diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c

index 60da03ba7117e0c9a62ad4756ac5fa0aecf7d756..296294f8ed81c42a92963c7794f3a39897e9fa81 100644 (file)
--- a/arch/avr32/mach-at32ap/pio.c
+++ b/arch/avr32/mach-at32ap/pio.c
@@ -360,6 +360,8 @@ static int __init pio_probe(struct platform_device *pdev)
         pio->chip.label = pio->name;
         pio->chip.base = pdev->id * 32;
         pio->chip.ngpio = 32;
+       pio->chip.dev = &pdev->dev;
+       pio->chip.owner = THIS_MODULE;
  
         pio->chip.direction_input = direction_input;
         pio->chip.get = gpio_get;
diff --git a/arch/cris/arch-v10/boot/compressed/misc.c b/arch/cris/arch-v10/boot/compressed/misc.c

index 18e13bce140009a43efa0011393e349ee435a700..d933c89889dbfbcfae9699ea0f1179575de4099c 100644 (file)
--- a/arch/cris/arch-v10/boot/compressed/misc.c
+++ b/arch/cris/arch-v10/boot/compressed/misc.c
@@ -102,50 +102,16 @@ extern char *input_data;  /* lives in head.S */
  static long bytes_out = 0;
  static uch *output_data;
  static unsigned long output_ptr = 0;
-
-static void *malloc(int size);
-static void free(void *where);
-static void gzip_mark(void **);
-static void gzip_release(void **);
-
  static void puts(const char *);
  
  /* the "heap" is put directly after the BSS ends, at end */
  
  extern int _end;
  static long free_mem_ptr = (long)&_end;
+static long free_mem_end_ptr;
  
  #include "../../../../../lib/inflate.c"
  
-static void *malloc(int size)
-{
-       void *p;
-
-       if (size < 0)
-               error("Malloc error");
-
-       free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
-       p = (void *)free_mem_ptr;
-       free_mem_ptr += size;
-
-       return p;
-}
-
-static void free(void *where)
-{      /* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
-       *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
-       free_mem_ptr = (long) *ptr;
-}
-
  /* decompressor info and error messages to serial console */
  
  static void
diff --git a/arch/cris/arch-v32/boot/compressed/misc.c b/arch/cris/arch-v32/boot/compressed/misc.c

index 55b2695c5d70b70528ba2cbc2e94b6e8323860ee..3595e16e82bcf10ffff7dfcdab26c30a95d4f141 100644 (file)
--- a/arch/cris/arch-v32/boot/compressed/misc.c
+++ b/arch/cris/arch-v32/boot/compressed/misc.c
@@ -89,20 +89,14 @@ static unsigned outcnt = 0;  /* bytes in output buffer */
  
  static void flush_window(void);
  static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
  
  extern char *input_data;  /* lives in head.S */
  
-static long bytes_out = 0;
+static long bytes_out;
  static uch *output_data;
-static unsigned long output_ptr = 0;
+static unsigned long output_ptr;
  
-static void *malloc(int size);
-static void free(void *where);
  static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
  
  static void puts(const char *);
  
@@ -110,37 +104,10 @@ static void puts(const char *);
  
  extern int _end;
  static long free_mem_ptr = (long)&_end;
+static long free_mem_end_ptr;
  
  #include "../../../../../lib/inflate.c"
  
-static void *malloc(int size)
-{
-       void *p;
-
-       if (size <0) error("Malloc error");
-
-       free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
-       p = (void *)free_mem_ptr;
-       free_mem_ptr += size;
-
-       return p;
-}
-
-static void free(void *where)
-{      /* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
-       *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
-       free_mem_ptr = (long) *ptr;
-}
-
  /* decompressor info and error messages to serial console */
  
  static inline void
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig

index 085dc6ec152b057b6da23c2742a39d0a0012c327..396ab059efa35a9fa1d2e8258aff56cd0f8f0ab4 100644 (file)
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -203,20 +203,6 @@ config UNIX98_PTYS
           Read the instructions in <file:Documentation/Changes> pertaining to
           pseudo terminals. It's safe to say N.
  
-config UNIX98_PTY_COUNT
-       int "Maximum number of Unix98 PTYs in use (0-2048)"
-       depends on UNIX98_PTYS
-       default "256"
-       help
-         The maximum number of Unix98 PTYs that can be used at any one time.
-         The default is 256, and should be enough for desktop systems. Server
-         machines which support incoming telnet/rlogin/ssh connections and/or
-         serve several X terminals may want to increase this: every incoming
-         connection and every xterm uses up one PTY.
-
-         When not in use, each additional set of 256 PTYs occupy
-         approximately 8 KB of kernel memory on 32-bit architectures.
-
  source "drivers/char/pcmcia/Kconfig"
  
  source "drivers/serial/Kconfig"
diff --git a/arch/h8300/boot/compressed/misc.c b/arch/h8300/boot/compressed/misc.c

index 845074588af0aacf98d9b16d2d63590669c65b04..51ab6cbd030f58a9c439e9bc65849fd460d7b749 100644 (file)
--- a/arch/h8300/boot/compressed/misc.c
+++ b/arch/h8300/boot/compressed/misc.c
@@ -67,8 +67,6 @@ static unsigned outcnt = 0;  /* bytes in output buffer */
  static int  fill_inbuf(void);
  static void flush_window(void);
  static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
  
  extern char input_data[];
  extern int input_len;
@@ -77,11 +75,7 @@ static long bytes_out = 0;
  static uch *output_data;
  static unsigned long output_ptr = 0;
  
-static void *malloc(int size);
-static void free(void *where);
  static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
  
  int puts(const char *);
  
@@ -98,38 +92,6 @@ static unsigned long free_mem_end_ptr;
  #define TDR *((volatile unsigned char *)0xffff8b)
  #define SSR *((volatile unsigned char *)0xffff8c)
  
-static void *malloc(int size)
-{
-       void *p;
-
-       if (size <0) error("Malloc error");
-       if (free_mem_ptr == 0) error("Memory error");
-
-       free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
-       p = (void *)free_mem_ptr;
-       free_mem_ptr += size;
-
-       if (free_mem_ptr >= free_mem_end_ptr)
-               error("Out of memory");
-
-       return p;
-}
-
-static void free(void *where)
-{      /* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
-       *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
-       free_mem_ptr = (long) *ptr;
-}
-
  int puts(const char *s)
  {
         return 0;
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c

index 233434f4f88f21cc58da0e0f2c7c726268db4f72..f07688da947c352217246d706435d09256d8894e 100644 (file)
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -429,8 +429,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
                 ((struct fnptr *)kretprobe_trampoline)->ip;
  
         INIT_HLIST_HEAD(&empty_rp);
-       spin_lock_irqsave(&kretprobe_lock, flags);
-       head = kretprobe_inst_table_head(current);
+       kretprobe_hash_lock(current, &head, &flags);
  
         /*
          * It is possible to have multiple instances associated with a given
@@ -485,7 +484,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
         kretprobe_assert(ri, orig_ret_address, trampoline_address);
  
         reset_current_kprobe();
-       spin_unlock_irqrestore(&kretprobe_lock, flags);
+       kretprobe_hash_unlock(current, &flags);
         preempt_enable_no_resched();
  
         hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
@@ -500,7 +499,6 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
         return 1;
  }
  
-/* Called with kretprobe_lock held */
  void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
                                       struct pt_regs *regs)
  {
diff --git a/arch/m32r/boot/compressed/misc.c b/arch/m32r/boot/compressed/misc.c

index 600d40e33495dcb4b8419d70ebbac051407d189c..d394292498c0eb245161cd93c17c16b4b8ccca7b 100644 (file)
--- a/arch/m32r/boot/compressed/misc.c
+++ b/arch/m32r/boot/compressed/misc.c
@@ -70,8 +70,6 @@ static unsigned outcnt = 0;  /* bytes in output buffer */
  static int  fill_inbuf(void);
  static void flush_window(void);
  static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
  
  static unsigned char *input_data;
  static int input_len;
@@ -82,9 +80,6 @@ static unsigned long output_ptr = 0;
  
  #include "m32r_sio.c"
  
-static void *malloc(int size);
-static void free(void *where);
-
  static unsigned long free_mem_ptr;
  static unsigned long free_mem_end_ptr;
  
@@ -92,38 +87,6 @@ static unsigned long free_mem_end_ptr;
  
  #include "../../../../lib/inflate.c"
  
-static void *malloc(int size)
-{
-       void *p;
-
-       if (size <0) error("Malloc error");
-       if (free_mem_ptr == 0) error("Memory error");
-
-       free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
-       p = (void *)free_mem_ptr;
-       free_mem_ptr += size;
-
-       if (free_mem_ptr >= free_mem_end_ptr)
-               error("Out of memory");
-
-       return p;
-}
-
-static void free(void *where)
-{      /* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
-       *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
-       free_mem_ptr = (long) *ptr;
-}
-
  void* memset(void* s, int c, size_t n)
  {
         int i;
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig

index b9c754f4070cc131d0420fea6af9570106eb7978..b4c4eaa5dd265bbdd4646898a99f63e86e01e315 100644 (file)
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -713,7 +713,7 @@ config CSRC_SB1250
  
  config GPIO_TXX9
         select GENERIC_GPIO
-       select HAVE_GPIO_LIB
+       select ARCH_REQUIRE_GPIOLIB
         bool
  
  config CFE
diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c

index c266211ed653d728c83e150215d8325be35f346f..2fefb14414b7db6db1dbfd4b58719082197439ba 100644 (file)
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -11,7 +11,6 @@
  #include <linux/file.h>
  #include <linux/smp_lock.h>
  #include <linux/highuid.h>
-#include <linux/dirent.h>
  #include <linux/resource.h>
  #include <linux/highmem.h>
  #include <linux/time.h>
diff --git a/arch/mn10300/boot/compressed/misc.c b/arch/mn10300/boot/compressed/misc.c

index ded207efc97a540425746506bcdffd7964eb7d9d..f673383518e4cf26e42b42a6389a6b40b2e29440 100644 (file)
--- a/arch/mn10300/boot/compressed/misc.c
+++ b/arch/mn10300/boot/compressed/misc.c
@@ -153,26 +153,9 @@ static uch *output_data;
  static unsigned long output_ptr;
  
  
-static void *malloc(int size);
-
-static inline void free(void *where)
-{      /* Don't care */
-}
-
  static unsigned long free_mem_ptr = (unsigned long) &end;
  static unsigned long free_mem_end_ptr = (unsigned long) &end + 0x90000;
  
-static inline void gzip_mark(void **ptr)
-{
-       kputs(".");
-       *ptr = (void *) free_mem_ptr;
-}
-
-static inline void gzip_release(void **ptr)
-{
-       free_mem_ptr = (unsigned long) *ptr;
-}
-
  #define INPLACE_MOVE_ROUTINE   0x1000
  #define LOW_BUFFER_START       0x2000
  #define LOW_BUFFER_END         0x90000
@@ -186,26 +169,6 @@ static int lines, cols;
  
  #include "../../../../lib/inflate.c"
  
-static void *malloc(int size)
-{
-       void *p;
-
-       if (size < 0)
-               error("Malloc error\n");
-       if (!free_mem_ptr)
-               error("Memory error\n");
-
-       free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
-       p = (void *) free_mem_ptr;
-       free_mem_ptr += size;
-
-       if (free_mem_ptr >= free_mem_end_ptr)
-               error("\nOut of memory\n");
-
-       return p;
-}
-
  static inline void scroll(void)
  {
         int i;
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig

index a487671c282febd507359e824bcda81b1f71983d..fe88418167c51e37a2323d5f40a07c505e1c1c86 100644 (file)
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -110,8 +110,10 @@ config PPC
         default y
         select HAVE_DYNAMIC_FTRACE
         select HAVE_FTRACE
+       select ARCH_WANT_OPTIONAL_GPIOLIB
         select HAVE_IDE
         select HAVE_IOREMAP_PROT
+       select HAVE_EFFICIENT_UNALIGNED_ACCESS
         select HAVE_KPROBES
         select HAVE_ARCH_KGDB
         select HAVE_KRETPROBES
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c

index b936a1dd0a500adcf63f40a24538b57b32bd2609..25a052c16754349c2e5e91ffc4c3b3dac9c7e2bf 100644 (file)
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -23,6 +23,9 @@
  struct cpu_spec* cur_cpu_spec = NULL;
  EXPORT_SYMBOL(cur_cpu_spec);
  
+/* The platform string corresponding to the real PVR */
+const char *powerpc_base_platform;
+
  /* NOTE:
   * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's
   * the responsibility of the appropriate CPU save/restore functions to
@@ -1652,6 +1655,14 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
                         } else
                                 *t = *s;
                         *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
+
+                       /*
+                        * Set the base platform string once; assumes
+                        * we're called with real pvr first.
+                        */
+                       if (powerpc_base_platform == NULL)
+                               powerpc_base_platform = t->platform;
+
  #if defined(CONFIG_PPC64) || defined(CONFIG_BOOKE)
                         /* ppc64 and booke expect identify_cpu to also call
                          * setup_cpu for that processor. I will consolidate
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S

index da52269aec1e86fc05a2e1fb79d46dcc0b846a88..81c8324a4a3c7aeefcf274f4c364e4258db3b55b 100644 (file)
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -148,7 +148,7 @@ transfer_to_handler:
         /* Check to see if the dbcr0 register is set up to debug.  Use the
            internal debug mode bit to do this. */
         lwz     r12,THREAD_DBCR0(r12)
-       andis.  r12,r12,DBCR0_IDM@h
+       andis.  r12,r12,(DBCR0_IDM  | DBSR_DAC1R | DBSR_DAC1W)@h
         beq+    3f
         /* From user and task is ptraced - load up global dbcr0 */
         li      r12,-1                  /* clear all pending debug events */
@@ -292,7 +292,7 @@ syscall_exit_cont:
         /* If the process has its own DBCR0 value, load it up.  The internal
            debug mode bit tells us that dbcr0 should be loaded. */
         lwz     r0,THREAD+THREAD_DBCR0(r2)
-       andis.  r10,r0,DBCR0_IDM@h
+       andis.  r10,r0,(DBCR0_IDM  | DBSR_DAC1R | DBSR_DAC1W)@h
         bnel-   load_dbcr0
  #endif
  #ifdef CONFIG_44x
@@ -720,7 +720,7 @@ restore_user:
         /* Check whether this process has its own DBCR0 value.  The internal
            debug mode bit tells us that dbcr0 should be loaded. */
         lwz     r0,THREAD+THREAD_DBCR0(r2)
-       andis.  r10,r0,DBCR0_IDM@h
+       andis.  r10,r0,(DBCR0_IDM  | DBSR_DAC1R | DBSR_DAC1W)@h
         bnel-   load_dbcr0
  #endif
  
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c

index 2385f68c1751d17774df413d44a30468cf8f5c5e..550a19399bfaac81bceabfbeb60f4a29e0e0752f 100644 (file)
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -49,6 +49,8 @@ static int novmerge = 1;
  
  static int protect4gb = 1;
  
+static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int);
+
  static inline unsigned long iommu_num_pages(unsigned long vaddr,
                                             unsigned long slen)
  {
@@ -191,6 +193,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
  {
         unsigned long entry, flags;
         dma_addr_t ret = DMA_ERROR_CODE;
+       int build_fail;
  
         spin_lock_irqsave(&(tbl->it_lock), flags);
  
@@ -205,9 +208,21 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
         ret = entry << IOMMU_PAGE_SHIFT;        /* Set the return dma address */
  
         /* Put the TCEs in the HW table */
-       ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & IOMMU_PAGE_MASK,
-                        direction, attrs);
+       build_fail = ppc_md.tce_build(tbl, entry, npages,
+                                     (unsigned long)page & IOMMU_PAGE_MASK,
+                                     direction, attrs);
+
+       /* ppc_md.tce_build() only returns non-zero for transient errors.
+        * Clean up the table bitmap in this case and return
+        * DMA_ERROR_CODE. For all other errors the functionality is
+        * not altered.
+        */
+       if (unlikely(build_fail)) {
+               __iommu_free(tbl, ret, npages);
  
+               spin_unlock_irqrestore(&(tbl->it_lock), flags);
+               return DMA_ERROR_CODE;
+       }
  
         /* Flush/invalidate TLB caches if necessary */
         if (ppc_md.tce_flush)
@@ -276,7 +291,7 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
         dma_addr_t dma_next = 0, dma_addr;
         unsigned long flags;
         struct scatterlist *s, *outs, *segstart;
-       int outcount, incount, i;
+       int outcount, incount, i, build_fail = 0;
         unsigned int align;
         unsigned long handle;
         unsigned int max_seg_size;
@@ -337,8 +352,11 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
                             npages, entry, dma_addr);
  
                 /* Insert into HW table */
-               ppc_md.tce_build(tbl, entry, npages, vaddr & IOMMU_PAGE_MASK,
-                                direction, attrs);
+               build_fail = ppc_md.tce_build(tbl, entry, npages,
+                                             vaddr & IOMMU_PAGE_MASK,
+                                             direction, attrs);
+               if(unlikely(build_fail))
+                       goto failure;
  
                 /* If we are in an open segment, try merging */
                 if (segstart != s) {
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c

index 4ba2af125450ea895583efc086a5ea4c5d5724cf..de79915452c87d917728acbeefbd70a2b738d242 100644 (file)
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -144,7 +144,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
         kcb->kprobe_saved_msr = regs->msr;
  }
  
-/* Called with kretprobe_lock held */
  void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
                                       struct pt_regs *regs)
  {
@@ -312,8 +311,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
         unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
  
         INIT_HLIST_HEAD(&empty_rp);
-       spin_lock_irqsave(&kretprobe_lock, flags);
-       head = kretprobe_inst_table_head(current);
+       kretprobe_hash_lock(current, &head, &flags);
  
         /*
          * It is possible to have multiple instances associated with a given
@@ -352,7 +350,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
         regs->nip = orig_ret_address;
  
         reset_current_kprobe();
-       spin_unlock_irqrestore(&kretprobe_lock, flags);
+       kretprobe_hash_unlock(current, &flags);
         preempt_enable_no_resched();
  
         hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c

index 827a5726a035c9a11d5a182b59b5c2c33f2ab618..9f856a0c3e3876eb1bd6bbe1e2e2b41dba70bf16 100644 (file)
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -34,8 +34,9 @@
  #include <asm/time.h>
  #include <asm/prom.h>
  #include <asm/vdso_datapage.h>
+#include <asm/vio.h>
  
-#define MODULE_VERS "1.7"
+#define MODULE_VERS "1.8"
  #define MODULE_NAME "lparcfg"
  
  /* #define LPARCFG_DEBUG */
@@ -129,32 +130,46 @@ static int iseries_lparcfg_data(struct seq_file *m, void *v)
  /*
   * Methods used to fetch LPAR data when running on a pSeries platform.
   */
-static void log_plpar_hcall_return(unsigned long rc, char *tag)
+/**
+ * h_get_mpp
+ * H_GET_MPP hcall returns info in 7 parms
+ */
+int h_get_mpp(struct hvcall_mpp_data *mpp_data)
  {
-       switch(rc) {
-       case 0:
-               return;
-       case H_HARDWARE:
-               printk(KERN_INFO "plpar-hcall (%s) "
-                               "Hardware fault\n", tag);
-               return;
-       case H_FUNCTION:
-               printk(KERN_INFO "plpar-hcall (%s) "
-                               "Function not allowed\n", tag);
-               return;
-       case H_AUTHORITY:
-               printk(KERN_INFO "plpar-hcall (%s) "
-                               "Not authorized to this function\n", tag);
-               return;
-       case H_PARAMETER:
-               printk(KERN_INFO "plpar-hcall (%s) "
-                               "Bad parameter(s)\n",tag);
-               return;
-       default:
-               printk(KERN_INFO "plpar-hcall (%s) "
-                               "Unexpected rc(0x%lx)\n", tag, rc);
-       }
+       int rc;
+       unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+       rc = plpar_hcall9(H_GET_MPP, retbuf);
+
+       mpp_data->entitled_mem = retbuf[0];
+       mpp_data->mapped_mem = retbuf[1];
+
+       mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
+       mpp_data->pool_num = retbuf[2] & 0xffff;
+
+       mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff;
+       mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff;
+       mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff;
+
+       mpp_data->pool_size = retbuf[4];
+       mpp_data->loan_request = retbuf[5];
+       mpp_data->backing_mem = retbuf[6];
+
+       return rc;
  }
+EXPORT_SYMBOL(h_get_mpp);
+
+struct hvcall_ppp_data {
+       u64     entitlement;
+       u64     unallocated_entitlement;
+       u16     group_num;
+       u16     pool_num;
+       u8      capped;
+       u8      weight;
+       u8      unallocated_weight;
+       u16     active_procs_in_pool;
+       u16     active_system_procs;
+};
  
  /*
   * H_GET_PPP hcall returns info in 4 parms.
@@ -176,27 +191,30 @@ static void log_plpar_hcall_return(unsigned long rc, char *tag)
   *              XXXX - Active processors in Physical Processor Pool.
   *                  XXXX  - Processors active on platform.
   */
-static unsigned int h_get_ppp(unsigned long *entitled,
-                             unsigned long *unallocated,
-                             unsigned long *aggregation,
-                             unsigned long *resource)
+static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
  {
         unsigned long rc;
         unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
  
         rc = plpar_hcall(H_GET_PPP, retbuf);
  
-       *entitled = retbuf[0];
-       *unallocated = retbuf[1];
-       *aggregation = retbuf[2];
-       *resource = retbuf[3];
+       ppp_data->entitlement = retbuf[0];
+       ppp_data->unallocated_entitlement = retbuf[1];
+
+       ppp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
+       ppp_data->pool_num = retbuf[2] & 0xffff;
  
-       log_plpar_hcall_return(rc, "H_GET_PPP");
+       ppp_data->capped = (retbuf[3] >> 6 * 8) & 0x01;
+       ppp_data->weight = (retbuf[3] >> 5 * 8) & 0xff;
+       ppp_data->unallocated_weight = (retbuf[3] >> 4 * 8) & 0xff;
+       ppp_data->active_procs_in_pool = (retbuf[3] >> 2 * 8) & 0xffff;
+       ppp_data->active_system_procs = retbuf[3] & 0xffff;
  
         return rc;
  }
  
-static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
+static unsigned h_pic(unsigned long *pool_idle_time,
+                     unsigned long *num_procs)
  {
         unsigned long rc;
         unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
@@ -206,8 +224,87 @@ static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
         *pool_idle_time = retbuf[0];
         *num_procs = retbuf[1];
  
-       if (rc != H_AUTHORITY)
-               log_plpar_hcall_return(rc, "H_PIC");
+       return rc;
+}
+
+/*
+ * parse_ppp_data
+ * Parse out the data returned from h_get_ppp and h_pic
+ */
+static void parse_ppp_data(struct seq_file *m)
+{
+       struct hvcall_ppp_data ppp_data;
+       int rc;
+
+       rc = h_get_ppp(&ppp_data);
+       if (rc)
+               return;
+
+       seq_printf(m, "partition_entitled_capacity=%ld\n",
+                  ppp_data.entitlement);
+       seq_printf(m, "group=%d\n", ppp_data.group_num);
+       seq_printf(m, "system_active_processors=%d\n",
+                  ppp_data.active_system_procs);
+
+       /* pool related entries are apropriate for shared configs */
+       if (lppaca[0].shared_proc) {
+               unsigned long pool_idle_time, pool_procs;
+
+               seq_printf(m, "pool=%d\n", ppp_data.pool_num);
+
+               /* report pool_capacity in percentage */
+               seq_printf(m, "pool_capacity=%d\n",
+                          ppp_data.active_procs_in_pool * 100);
+
+               h_pic(&pool_idle_time, &pool_procs);
+               seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
+               seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
+       }
+
+       seq_printf(m, "unallocated_capacity_weight=%d\n",
+                  ppp_data.unallocated_weight);
+       seq_printf(m, "capacity_weight=%d\n", ppp_data.weight);
+       seq_printf(m, "capped=%d\n", ppp_data.capped);
+       seq_printf(m, "unallocated_capacity=%ld\n",
+                  ppp_data.unallocated_entitlement);
+}
+
+/**
+ * parse_mpp_data
+ * Parse out data returned from h_get_mpp
+ */
+static void parse_mpp_data(struct seq_file *m)
+{
+       struct hvcall_mpp_data mpp_data;
+       int rc;
+
+       rc = h_get_mpp(&mpp_data);
+       if (rc)
+               return;
+
+       seq_printf(m, "entitled_memory=%ld\n", mpp_data.entitled_mem);
+
+       if (mpp_data.mapped_mem != -1)
+               seq_printf(m, "mapped_entitled_memory=%ld\n",
+                          mpp_data.mapped_mem);
+
+       seq_printf(m, "entitled_memory_group_number=%d\n", mpp_data.group_num);
+       seq_printf(m, "entitled_memory_pool_number=%d\n", mpp_data.pool_num);
+
+       seq_printf(m, "entitled_memory_weight=%d\n", mpp_data.mem_weight);
+       seq_printf(m, "unallocated_entitled_memory_weight=%d\n",
+                  mpp_data.unallocated_mem_weight);
+       seq_printf(m, "unallocated_io_mapping_entitlement=%ld\n",
+                  mpp_data.unallocated_entitlement);
+
+       if (mpp_data.pool_size != -1)
+               seq_printf(m, "entitled_memory_pool_size=%ld bytes\n",
+                          mpp_data.pool_size);
+
+       seq_printf(m, "entitled_memory_loan_request=%ld\n",
+                  mpp_data.loan_request);
+
+       seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem);
  }
  
  #define SPLPAR_CHARACTERISTICS_TOKEN 20
@@ -313,6 +410,25 @@ static int lparcfg_count_active_processors(void)
         return count;
  }
  
+static void pseries_cmo_data(struct seq_file *m)
+{
+       int cpu;
+       unsigned long cmo_faults = 0;
+       unsigned long cmo_fault_time = 0;
+
+       if (!firmware_has_feature(FW_FEATURE_CMO))
+               return;
+
+       for_each_possible_cpu(cpu) {
+               cmo_faults += lppaca[cpu].cmo_faults;
+               cmo_fault_time += lppaca[cpu].cmo_fault_time;
+       }
+
+       seq_printf(m, "cmo_faults=%lu\n", cmo_faults);
+       seq_printf(m, "cmo_fault_time_usec=%lu\n",
+                  cmo_fault_time / tb_ticks_per_usec);
+}
+
  static int pseries_lparcfg_data(struct seq_file *m, void *v)
  {
         int partition_potential_processors;
@@ -334,60 +450,13 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
         partition_active_processors = lparcfg_count_active_processors();
  
         if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
-               unsigned long h_entitled, h_unallocated;
-               unsigned long h_aggregation, h_resource;
-               unsigned long pool_idle_time, pool_procs;
-               unsigned long purr;
-
-               h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation,
-                         &h_resource);
-
-               seq_printf(m, "R4=0x%lx\n", h_entitled);
-               seq_printf(m, "R5=0x%lx\n", h_unallocated);
-               seq_printf(m, "R6=0x%lx\n", h_aggregation);
-               seq_printf(m, "R7=0x%lx\n", h_resource);
-
-               purr = get_purr();
-
                 /* this call handles the ibm,get-system-parameter contents */
                 parse_system_parameter_string(m);
+               parse_ppp_data(m);
+               parse_mpp_data(m);
+               pseries_cmo_data(m);
  
-               seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled);
-
-               seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff);
-
-               seq_printf(m, "system_active_processors=%ld\n",
-                          (h_resource >> 0 * 8) & 0xffff);
-
-               /* pool related entries are apropriate for shared configs */
-               if (lppaca[0].shared_proc) {
-
-                       h_pic(&pool_idle_time, &pool_procs);
-
-                       seq_printf(m, "pool=%ld\n",
-                                  (h_aggregation >> 0 * 8) & 0xffff);
-
-                       /* report pool_capacity in percentage */
-                       seq_printf(m, "pool_capacity=%ld\n",
-                                  ((h_resource >> 2 * 8) & 0xffff) * 100);
-
-                       seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
-
-                       seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
-               }
-
-               seq_printf(m, "unallocated_capacity_weight=%ld\n",
-                          (h_resource >> 4 * 8) & 0xFF);
-
-               seq_printf(m, "capacity_weight=%ld\n",
-                          (h_resource >> 5 * 8) & 0xFF);
-
-               seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01);
-
-               seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated);
-
-               seq_printf(m, "purr=%ld\n", purr);
-
+               seq_printf(m, "purr=%ld\n", get_purr());
         } else {                /* non SPLPAR case */
  
                 seq_printf(m, "system_active_processors=%d\n",
@@ -414,6 +483,83 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
         return 0;
  }
  
+static ssize_t update_ppp(u64 *entitlement, u8 *weight)
+{
+       struct hvcall_ppp_data ppp_data;
+       u8 new_weight;
+       u64 new_entitled;
+       ssize_t retval;
+
+       /* Get our current parameters */
+       retval = h_get_ppp(&ppp_data);
+       if (retval)
+               return retval;
+
+       if (entitlement) {
+               new_weight = ppp_data.weight;
+               new_entitled = *entitlement;
+       } else if (weight) {
+               new_weight = *weight;
+               new_entitled = ppp_data.entitlement;
+       } else
+               return -EINVAL;
+
+       pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
+                __FUNCTION__, ppp_data.entitlement, ppp_data.weight);
+
+       pr_debug("%s: new_entitled = %lu, new_weight = %u\n",
+                __FUNCTION__, new_entitled, new_weight);
+
+       retval = plpar_hcall_norets(H_SET_PPP, new_entitled, new_weight);
+       return retval;
+}
+
+/**
+ * update_mpp
+ *
+ * Update the memory entitlement and weight for the partition.  Caller must
+ * specify either a new entitlement or weight, not both, to be updated
+ * since the h_set_mpp call takes both entitlement and weight as parameters.
+ */
+static ssize_t update_mpp(u64 *entitlement, u8 *weight)
+{
+       struct hvcall_mpp_data mpp_data;
+       u64 new_entitled;
+       u8 new_weight;
+       ssize_t rc;
+
+       if (entitlement) {
+               /* Check with vio to ensure the new memory entitlement
+                * can be handled.
+                */
+               rc = vio_cmo_entitlement_update(*entitlement);
+               if (rc)
+                       return rc;
+       }
+
+       rc = h_get_mpp(&mpp_data);
+       if (rc)
+               return rc;
+
+       if (entitlement) {
+               new_weight = mpp_data.mem_weight;
+               new_entitled = *entitlement;
+       } else if (weight) {
+               new_weight = *weight;
+               new_entitled = mpp_data.entitled_mem;
+       } else
+               return -EINVAL;
+
+       pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
+                __FUNCTION__, mpp_data.entitled_mem, mpp_data.mem_weight);
+
+       pr_debug("%s: new_entitled = %lu, new_weight = %u\n",
+                __FUNCTION__, new_entitled, new_weight);
+
+       rc = plpar_hcall_norets(H_SET_MPP, new_entitled, new_weight);
+       return rc;
+}
+
  /*
   * Interface for changing system parameters (variable capacity weight
   * and entitled capacity).  Format of input is "param_name=value";
@@ -427,35 +573,27 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
  static ssize_t lparcfg_write(struct file *file, const char __user * buf,
                              size_t count, loff_t * off)
  {
-       char *kbuf;
+       int kbuf_sz = 64;
+       char kbuf[kbuf_sz];
         char *tmp;
         u64 new_entitled, *new_entitled_ptr = &new_entitled;
         u8 new_weight, *new_weight_ptr = &new_weight;
-
-       unsigned long current_entitled; /* parameters for h_get_ppp */
-       unsigned long dummy;
-       unsigned long resource;
-       u8 current_weight;
-
-       ssize_t retval = -ENOMEM;
+       ssize_t retval;
  
         if (!firmware_has_feature(FW_FEATURE_SPLPAR) ||
                         firmware_has_feature(FW_FEATURE_ISERIES))
                 return -EINVAL;
  
-       kbuf = kmalloc(count, GFP_KERNEL);
-       if (!kbuf)
-               goto out;
+       if (count > kbuf_sz)
+               return -EINVAL;
  
-       retval = -EFAULT;
         if (copy_from_user(kbuf, buf, count))
-               goto out;
+               return -EFAULT;
  
-       retval = -EINVAL;
         kbuf[count - 1] = '\0';
         tmp = strchr(kbuf, '=');
         if (!tmp)
-               goto out;
+               return -EINVAL;
  
         *tmp++ = '\0';
  
@@ -463,34 +601,32 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
                 char *endp;
                 *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
                 if (endp == tmp)
-                       goto out;
-               new_weight_ptr = &current_weight;
+                       return -EINVAL;
+
+               retval = update_ppp(new_entitled_ptr, NULL);
         } else if (!strcmp(kbuf, "capacity_weight")) {
                 char *endp;
                 *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
                 if (endp == tmp)
-                       goto out;
-               new_entitled_ptr = &current_entitled;
-       } else
-               goto out;
-
-       /* Get our current parameters */
-       retval = h_get_ppp(&current_entitled, &dummy, &dummy, &resource);
-       if (retval) {
-               retval = -EIO;
-               goto out;
-       }
-
-       current_weight = (resource >> 5 * 8) & 0xFF;
+                       return -EINVAL;
  
-       pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
-                __func__, current_entitled, current_weight);
+               retval = update_ppp(NULL, new_weight_ptr);
+       } else if (!strcmp(kbuf, "entitled_memory")) {
+               char *endp;
+               *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
+               if (endp == tmp)
+                       return -EINVAL;
  
-       pr_debug("%s: new_entitled = %lu, new_weight = %u\n",
-                __func__, *new_entitled_ptr, *new_weight_ptr);
+               retval = update_mpp(new_entitled_ptr, NULL);
+       } else if (!strcmp(kbuf, "entitled_memory_weight")) {
+               char *endp;
+               *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
+               if (endp == tmp)
+                       return -EINVAL;
  
-       retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr,
-                                   *new_weight_ptr);
+               retval = update_mpp(NULL, new_weight_ptr);
+       } else
+               return -EINVAL;
  
         if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
                 retval = count;
@@ -506,8 +642,6 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
                 retval = -EIO;
         }
  
-out:
-       kfree(kbuf);
         return retval;
  }
  
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c

index 219f3634115e46a05d0c1426d115aa95d58fef48..db2497ccc111a16f39014c9358798a3ebaa8d9c6 100644 (file)
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -47,6 +47,8 @@
  #ifdef CONFIG_PPC64
  #include <asm/firmware.h>
  #endif
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
  
  extern unsigned long _get_SP(void);
  
@@ -239,6 +241,35 @@ void discard_lazy_cpu_state(void)
  }
  #endif /* CONFIG_SMP */
  
+void do_dabr(struct pt_regs *regs, unsigned long address,
+                   unsigned long error_code)
+{
+       siginfo_t info;
+
+       if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
+                       11, SIGSEGV) == NOTIFY_STOP)
+               return;
+
+       if (debugger_dabr_match(regs))
+               return;
+
+       /* Clear the DAC and struct entries.  One shot trigger */
+#if (defined(CONFIG_44x) || defined(CONFIG_BOOKE))
+       mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R | DBSR_DAC1W
+                                                       | DBCR0_IDM));
+#endif
+
+       /* Clear the DABR */
+       set_dabr(0);
+
+       /* Deliver the signal to userspace */
+       info.si_signo = SIGTRAP;
+       info.si_errno = 0;
+       info.si_code = TRAP_HWBKPT;
+       info.si_addr = (void __user *)address;
+       force_sig_info(SIGTRAP, &info, current);
+}
+
  static DEFINE_PER_CPU(unsigned long, current_dabr);
  
  int set_dabr(unsigned long dabr)
@@ -254,6 +285,11 @@ int set_dabr(unsigned long dabr)
  #if defined(CONFIG_PPC64) || defined(CONFIG_6xx)
         mtspr(SPRN_DABR, dabr);
  #endif
+
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+       mtspr(SPRN_DAC1, dabr);
+#endif
+
         return 0;
  }
  
@@ -337,6 +373,12 @@ struct task_struct *__switch_to(struct task_struct *prev,
         if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr))
                 set_dabr(new->thread.dabr);
  
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+       /* If new thread DAC (HW breakpoint) is the same then leave it */
+       if (new->thread.dabr)
+               set_dabr(new->thread.dabr);
+#endif
+
         new_thread = &new->thread;
         old_thread = &current->thread;
  
@@ -525,6 +567,10 @@ void flush_thread(void)
         if (current->thread.dabr) {
                 current->thread.dabr = 0;
                 set_dabr(0);
+
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+               current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W);
+#endif
         }
  }
  
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c

index 1ea8c8d3ce89af4eb230a5a3332d8a020dee20cf..c4ab2195b9cb69e74aaed8899f1c210d5a1cc525 100644 (file)
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -643,6 +643,11 @@ static void __init early_cmdline_parse(void)
  #else
  #define OV5_MSI                        0x00
  #endif /* CONFIG_PCI_MSI */
+#ifdef CONFIG_PPC_SMLPAR
+#define OV5_CMO                        0x80    /* Cooperative Memory Overcommitment */
+#else
+#define OV5_CMO                        0x00
+#endif
  
  /*
   * The architecture vector has an array of PVR mask/value pairs,
@@ -687,10 +692,12 @@ static unsigned char ibm_architecture_vec[] = {
         0,                              /* don't halt */
  
         /* option vector 5: PAPR/OF options */
-       3 - 2,                          /* length */
+       5 - 2,                          /* length */
         0,                              /* don't ignore, don't halt */
         OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY |
         OV5_DONATE_DEDICATE_CPU | OV5_MSI,
+       0,
+       OV5_CMO,
  };
  
  /* Old method - ELF header with PT_NOTE sections */
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c

index 8feb93e7890c74174de4a8161509f47735b3201c..a5d0e78779c813134b544fd14bc4731d34531418 100644 (file)
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -703,7 +703,7 @@ void user_enable_single_step(struct task_struct *task)
  
         if (regs != NULL) {
  #if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
-               task->thread.dbcr0 = DBCR0_IDM | DBCR0_IC;
+               task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC;
                 regs->msr |= MSR_DE;
  #else
                 regs->msr |= MSR_SE;
@@ -716,9 +716,16 @@ void user_disable_single_step(struct task_struct *task)
  {
         struct pt_regs *regs = task->thread.regs;
  
+
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+       /* If DAC then do not single step, skip */
+       if (task->thread.dabr)
+               return;
+#endif
+
         if (regs != NULL) {
  #if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
-               task->thread.dbcr0 = 0;
+               task->thread.dbcr0 &= ~(DBCR0_IC | DBCR0_IDM);
                 regs->msr &= ~MSR_DE;
  #else
                 regs->msr &= ~MSR_SE;
@@ -727,22 +734,75 @@ void user_disable_single_step(struct task_struct *task)
         clear_tsk_thread_flag(task, TIF_SINGLESTEP);
  }
  
-static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
+int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
                                unsigned long data)
  {
-       /* We only support one DABR and no IABRS at the moment */
+       /* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
+        *  For embedded processors we support one DAC and no IAC's at the
+        *  moment.
+        */
         if (addr > 0)
                 return -EINVAL;
  
-       /* The bottom 3 bits are flags */
         if ((data & ~0x7UL) >= TASK_SIZE)
                 return -EIO;
  
-       /* Ensure translation is on */
+#ifdef CONFIG_PPC64
+
+       /* For processors using DABR (i.e. 970), the bottom 3 bits are flags.
+        *  It was assumed, on previous implementations, that 3 bits were
+        *  passed together with the data address, fitting the design of the
+        *  DABR register, as follows:
+        *
+        *  bit 0: Read flag
+        *  bit 1: Write flag
+        *  bit 2: Breakpoint translation
+        *
+        *  Thus, we use them here as so.
+        */
+
+       /* Ensure breakpoint translation bit is set */
         if (data && !(data & DABR_TRANSLATION))
                 return -EIO;
  
+       /* Move contents to the DABR register */
         task->thread.dabr = data;
+
+#endif
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+
+       /* As described above, it was assumed 3 bits were passed with the data
+        *  address, but we will assume only the mode bits will be passed
+        *  as to not cause alignment restrictions for DAC-based processors.
+        */
+
+       /* DAC's hold the whole address without any mode flags */
+       task->thread.dabr = data & ~0x3UL;
+
+       if (task->thread.dabr == 0) {
+               task->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W | DBCR0_IDM);
+               task->thread.regs->msr &= ~MSR_DE;
+               return 0;
+       }
+
+       /* Read or Write bits must be set */
+
+       if (!(data & 0x3UL))
+               return -EINVAL;
+
+       /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0
+          register */
+       task->thread.dbcr0 = DBCR0_IDM;
+
+       /* Check for write and read flags and set DBCR0
+          accordingly */
+       if (data & 0x1UL)
+               task->thread.dbcr0 |= DBSR_DAC1R;
+       if (data & 0x2UL)
+               task->thread.dbcr0 |= DBSR_DAC1W;
+
+       task->thread.regs->msr |= MSR_DE;
+#endif
         return 0;
  }
  
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c

index ad55488939c376f7073b0770ca79ad1f5954ca1e..7aada783ec6a78d54c74e7e2f11a428d014342ee 100644 (file)
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -145,8 +145,12 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs)
          * user space. The DABR will have been cleared if it
          * triggered inside the kernel.
          */
-       if (current->thread.dabr)
+       if (current->thread.dabr) {
                 set_dabr(current->thread.dabr);
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+               mtspr(SPRN_DBCR0, current->thread.dbcr0);
+#endif
+       }
  
         if (is32) {
                 if (ka.sa.sa_flags & SA_SIGINFO)
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c

index aba0ba95f0629cf320df4c006a38923d805f0aa8..800e5e9a087bf1c6d73aefde583876736fe32045 100644 (file)
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -529,7 +529,8 @@ static void register_nodes(void)
  #endif
  
  /* Only valid if CPU is present. */
-static ssize_t show_physical_id(struct sys_device *dev, char *buf)
+static ssize_t show_physical_id(struct sys_device *dev,
+                               struct sysdev_attribute *attr, char *buf)
  {
         struct cpu *cpu = container_of(dev, struct cpu, sysdev);
  
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c

index 878fbddb6ae10fe3150a329b4a214e07ff80f057..81ccb8dd1a54d8f95b0ed84f357d6e64e5395ab1 100644 (file)
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1067,6 +1067,22 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
                 }
  
                 _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
+       } else if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) {
+               regs->msr &= ~MSR_DE;
+
+               if (user_mode(regs)) {
+                       current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W |
+                                                               DBCR0_IDM);
+               } else {
+                       /* Disable DAC interupts */
+                       mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R |
+                                               DBSR_DAC1W | DBCR0_IDM));
+
+                       /* Clear the DAC event */
+                       mtspr(SPRN_DBSR, (DBSR_DAC1R | DBSR_DAC1W));
+               }
+               /* Setup and send the trap to the handler */
+               do_dabr(regs, mfspr(SPRN_DAC1), debug_status);
         }
  }
  #endif /* CONFIG_4xx || CONFIG_BOOKE */
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c

index b77f8af7ddde7e5a4b2228065eddbae361fc05a5..ade8aeaa2e706b496755d65a856f73f4d3bc67b8 100644 (file)
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -1,11 +1,12 @@
  /*
   * IBM PowerPC Virtual I/O Infrastructure Support.
   *
- *    Copyright (c) 2003-2005 IBM Corp.
+ *    Copyright (c) 2003,2008 IBM Corp.
   *     Dave Engebretsen engebret@us.ibm.com
   *     Santiago Leon santil@us.ibm.com
   *     Hollis Blanchard <hollisb@us.ibm.com>
   *     Stephen Rothwell
+ *     Robert Jennings <rcjenn@us.ibm.com>
   *
   *      This program is free software; you can redistribute it and/or
   *      modify it under the terms of the GNU General Public License
@@ -46,6 +47,996 @@ static struct vio_dev vio_bus_device  = { /* fake "parent" device */
         .dev.bus = &vio_bus_type,
  };
  
+#ifdef CONFIG_PPC_SMLPAR
+/**
+ * vio_cmo_pool - A pool of IO memory for CMO use
+ *
+ * @size: The size of the pool in bytes
+ * @free: The amount of free memory in the pool
+ */
+struct vio_cmo_pool {
+       size_t size;
+       size_t free;
+};
+
+/* How many ms to delay queued balance work */
+#define VIO_CMO_BALANCE_DELAY 100
+
+/* Portion out IO memory to CMO devices by this chunk size */
+#define VIO_CMO_BALANCE_CHUNK 131072
+
+/**
+ * vio_cmo_dev_entry - A device that is CMO-enabled and requires entitlement
+ *
+ * @vio_dev: struct vio_dev pointer
+ * @list: pointer to other devices on bus that are being tracked
+ */
+struct vio_cmo_dev_entry {
+       struct vio_dev *viodev;
+       struct list_head list;
+};
+
+/**
+ * vio_cmo - VIO bus accounting structure for CMO entitlement
+ *
+ * @lock: spinlock for entire structure
+ * @balance_q: work queue for balancing system entitlement
+ * @device_list: list of CMO-enabled devices requiring entitlement
+ * @entitled: total system entitlement in bytes
+ * @reserve: pool of memory from which devices reserve entitlement, incl. spare
+ * @excess: pool of excess entitlement not needed for device reserves or spare
+ * @spare: IO memory for device hotplug functionality
+ * @min: minimum necessary for system operation
+ * @desired: desired memory for system operation
+ * @curr: bytes currently allocated
+ * @high: high water mark for IO data usage
+ */
+struct vio_cmo {
+       spinlock_t lock;
+       struct delayed_work balance_q;
+       struct list_head device_list;
+       size_t entitled;
+       struct vio_cmo_pool reserve;
+       struct vio_cmo_pool excess;
+       size_t spare;
+       size_t min;
+       size_t desired;
+       size_t curr;
+       size_t high;
+} vio_cmo;
+
+/**
+ * vio_cmo_OF_devices - Count the number of OF devices that have DMA windows
+ */
+static int vio_cmo_num_OF_devs(void)
+{
+       struct device_node *node_vroot;
+       int count = 0;
+
+       /*
+        * Count the number of vdevice entries with an
+        * ibm,my-dma-window OF property
+        */
+       node_vroot = of_find_node_by_name(NULL, "vdevice");
+       if (node_vroot) {
+               struct device_node *of_node;
+               struct property *prop;
+
+               for_each_child_of_node(node_vroot, of_node) {
+                       prop = of_find_property(of_node, "ibm,my-dma-window",
+                                              NULL);
+                       if (prop)
+                               count++;
+               }
+       }
+       of_node_put(node_vroot);
+       return count;
+}
+
+/**
+ * vio_cmo_alloc - allocate IO memory for CMO-enable devices
+ *
+ * @viodev: VIO device requesting IO memory
+ * @size: size of allocation requested
+ *
+ * Allocations come from memory reserved for the devices and any excess
+ * IO memory available to all devices.  The spare pool used to service
+ * hotplug must be equal to %VIO_CMO_MIN_ENT for the excess pool to be
+ * made available.
+ *
+ * Return codes:
+ *  0 for successful allocation and -ENOMEM for a failure
+ */
+static inline int vio_cmo_alloc(struct vio_dev *viodev, size_t size)
+{
+       unsigned long flags;
+       size_t reserve_free = 0;
+       size_t excess_free = 0;
+       int ret = -ENOMEM;
+
+       spin_lock_irqsave(&vio_cmo.lock, flags);
+
+       /* Determine the amount of free entitlement available in reserve */
+       if (viodev->cmo.entitled > viodev->cmo.allocated)
+               reserve_free = viodev->cmo.entitled - viodev->cmo.allocated;
+
+       /* If spare is not fulfilled, the excess pool can not be used. */
+       if (vio_cmo.spare >= VIO_CMO_MIN_ENT)
+               excess_free = vio_cmo.excess.free;
+
+       /* The request can be satisfied */
+       if ((reserve_free + excess_free) >= size) {
+               vio_cmo.curr += size;
+               if (vio_cmo.curr > vio_cmo.high)
+                       vio_cmo.high = vio_cmo.curr;
+               viodev->cmo.allocated += size;
+               size -= min(reserve_free, size);
+               vio_cmo.excess.free -= size;
+               ret = 0;
+       }
+
+       spin_unlock_irqrestore(&vio_cmo.lock, flags);
+       return ret;
+}
+
+/**
+ * vio_cmo_dealloc - deallocate IO memory from CMO-enable devices
+ * @viodev: VIO device freeing IO memory
+ * @size: size of deallocation
+ *
+ * IO memory is freed by the device back to the correct memory pools.
+ * The spare pool is replenished first from either memory pool, then
+ * the reserve pool is used to reduce device entitlement, the excess
+ * pool is used to increase the reserve pool toward the desired entitlement
+ * target, and then the remaining memory is returned to the pools.
+ *
+ */
+static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size)
+{
+       unsigned long flags;
+       size_t spare_needed = 0;
+       size_t excess_freed = 0;
+       size_t reserve_freed = size;
+       size_t tmp;
+       int balance = 0;
+
+       spin_lock_irqsave(&vio_cmo.lock, flags);
+       vio_cmo.curr -= size;
+
+       /* Amount of memory freed from the excess pool */
+       if (viodev->cmo.allocated > viodev->cmo.entitled) {
+               excess_freed = min(reserve_freed, (viodev->cmo.allocated -
+                                                  viodev->cmo.entitled));
+               reserve_freed -= excess_freed;
+       }
+
+       /* Remove allocation from device */
+       viodev->cmo.allocated -= (reserve_freed + excess_freed);
+
+       /* Spare is a subset of the reserve pool, replenish it first. */
+       spare_needed = VIO_CMO_MIN_ENT - vio_cmo.spare;
+
+       /*
+        * Replenish the spare in the reserve pool from the excess pool.
+        * This moves entitlement into the reserve pool.
+        */
+       if (spare_needed && excess_freed) {
+               tmp = min(excess_freed, spare_needed);
+               vio_cmo.excess.size -= tmp;
+               vio_cmo.reserve.size += tmp;
+               vio_cmo.spare += tmp;
+               excess_freed -= tmp;
+               spare_needed -= tmp;
+               balance = 1;
+       }
+
+       /*
+        * Replenish the spare in the reserve pool from the reserve pool.
+        * This removes entitlement from the device down to VIO_CMO_MIN_ENT,
+        * if needed, and gives it to the spare pool. The amount of used
+        * memory in this pool does not change.
+        */
+       if (spare_needed && reserve_freed) {
+               tmp = min(spare_needed, min(reserve_freed,
+                                           (viodev->cmo.entitled -
+                                            VIO_CMO_MIN_ENT)));
+
+               vio_cmo.spare += tmp;
+               viodev->cmo.entitled -= tmp;
+               reserve_freed -= tmp;
+               spare_needed -= tmp;
+               balance = 1;
+       }
+
+       /*
+        * Increase the reserve pool until the desired allocation is met.
+        * Move an allocation freed from the excess pool into the reserve
+        * pool and schedule a balance operation.
+        */
+       if (excess_freed && (vio_cmo.desired > vio_cmo.reserve.size)) {
+               tmp = min(excess_freed, (vio_cmo.desired - vio_cmo.reserve.size));
+
+               vio_cmo.excess.size -= tmp;
+               vio_cmo.reserve.size += tmp;
+               excess_freed -= tmp;
+               balance = 1;
+       }
+
+       /* Return memory from the excess pool to that pool */
+       if (excess_freed)
+               vio_cmo.excess.free += excess_freed;
+
+       if (balance)
+               schedule_delayed_work(&vio_cmo.balance_q, VIO_CMO_BALANCE_DELAY);
+       spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+/**
+ * vio_cmo_entitlement_update - Manage system entitlement changes
+ *
+ * @new_entitlement: new system entitlement to attempt to accommodate
+ *
+ * Increases in entitlement will be used to fulfill the spare entitlement
+ * and the rest is given to the excess pool.  Decreases, if they are
+ * possible, come from the excess pool and from unused device entitlement
+ *
+ * Returns: 0 on success, -ENOMEM when change can not be made
+ */
+int vio_cmo_entitlement_update(size_t new_entitlement)
+{
+       struct vio_dev *viodev;
+       struct vio_cmo_dev_entry *dev_ent;
+       unsigned long flags;
+       size_t avail, delta, tmp;
+
+       spin_lock_irqsave(&vio_cmo.lock, flags);
+
+       /* Entitlement increases */
+       if (new_entitlement > vio_cmo.entitled) {
+               delta = new_entitlement - vio_cmo.entitled;
+
+               /* Fulfill spare allocation */
+               if (vio_cmo.spare < VIO_CMO_MIN_ENT) {
+                       tmp = min(delta, (VIO_CMO_MIN_ENT - vio_cmo.spare));
+                       vio_cmo.spare += tmp;
+                       vio_cmo.reserve.size += tmp;
+                       delta -= tmp;
+               }
+
+               /* Remaining new allocation goes to the excess pool */
+               vio_cmo.entitled += delta;
+               vio_cmo.excess.size += delta;
+               vio_cmo.excess.free += delta;
+
+               goto out;
+       }
+
+       /* Entitlement decreases */
+       delta = vio_cmo.entitled - new_entitlement;
+       avail = vio_cmo.excess.free;
+
+       /*
+        * Need to check how much unused entitlement each device can
+        * sacrifice to fulfill entitlement change.
+        */
+       list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+               if (avail >= delta)
+                       break;
+
+               viodev = dev_ent->viodev;
+               if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
+                   (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
+                               avail += viodev->cmo.entitled -
+                                        max_t(size_t, viodev->cmo.allocated,
+                                              VIO_CMO_MIN_ENT);
+       }
+
+       if (delta <= avail) {
+               vio_cmo.entitled -= delta;
+
+               /* Take entitlement from the excess pool first */
+               tmp = min(vio_cmo.excess.free, delta);
+               vio_cmo.excess.size -= tmp;
+               vio_cmo.excess.free -= tmp;
+               delta -= tmp;
+
+               /*
+                * Remove all but VIO_CMO_MIN_ENT bytes from devices
+                * until entitlement change is served
+                */
+               list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+                       if (!delta)
+                               break;
+
+                       viodev = dev_ent->viodev;
+                       tmp = 0;
+                       if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
+                           (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
+                               tmp = viodev->cmo.entitled -
+                                     max_t(size_t, viodev->cmo.allocated,
+                                           VIO_CMO_MIN_ENT);
+                       viodev->cmo.entitled -= min(tmp, delta);
+                       delta -= min(tmp, delta);
+               }
+       } else {
+               spin_unlock_irqrestore(&vio_cmo.lock, flags);
+               return -ENOMEM;
+       }
+
+out:
+       schedule_delayed_work(&vio_cmo.balance_q, 0);
+       spin_unlock_irqrestore(&vio_cmo.lock, flags);
+       return 0;
+}
+
+/**
+ * vio_cmo_balance - Balance entitlement among devices
+ *
+ * @work: work queue structure for this operation
+ *
+ * Any system entitlement above the minimum needed for devices, or
+ * already allocated to devices, can be distributed to the devices.
+ * The list of devices is iterated through to recalculate the desired
+ * entitlement level and to determine how much entitlement above the
+ * minimum entitlement is allocated to devices.
+ *
+ * Small chunks of the available entitlement are given to devices until
+ * their requirements are fulfilled or there is no entitlement left to give.
+ * Upon completion sizes of the reserve and excess pools are calculated.
+ *
+ * The system minimum entitlement level is also recalculated here.
+ * Entitlement will be reserved for devices even after vio_bus_remove to
+ * accommodate reloading the driver.  The OF tree is walked to count the
+ * number of devices present and this will remove entitlement for devices
+ * that have actually left the system after having vio_bus_remove called.
+ */
+static void vio_cmo_balance(struct work_struct *work)
+{
+       struct vio_cmo *cmo;
+       struct vio_dev *viodev;
+       struct vio_cmo_dev_entry *dev_ent;
+       unsigned long flags;
+       size_t avail = 0, level, chunk, need;
+       int devcount = 0, fulfilled;
+
+       cmo = container_of(work, struct vio_cmo, balance_q.work);
+
+       spin_lock_irqsave(&vio_cmo.lock, flags);
+
+       /* Calculate minimum entitlement and fulfill spare */
+       cmo->min = vio_cmo_num_OF_devs() * VIO_CMO_MIN_ENT;
+       BUG_ON(cmo->min > cmo->entitled);
+       cmo->spare = min_t(size_t, VIO_CMO_MIN_ENT, (cmo->entitled - cmo->min));
+       cmo->min += cmo->spare;
+       cmo->desired = cmo->min;
+
+       /*
+        * Determine how much entitlement is available and reset device
+        * entitlements
+        */
+       avail = cmo->entitled - cmo->spare;
+       list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+               viodev = dev_ent->viodev;
+               devcount++;
+               viodev->cmo.entitled = VIO_CMO_MIN_ENT;
+               cmo->desired += (viodev->cmo.desired - VIO_CMO_MIN_ENT);
+               avail -= max_t(size_t, viodev->cmo.allocated, VIO_CMO_MIN_ENT);
+       }
+
+       /*
+        * Having provided each device with the minimum entitlement, loop
+        * over the devices portioning out the remaining entitlement
+        * until there is nothing left.
+        */
+       level = VIO_CMO_MIN_ENT;
+       while (avail) {
+               fulfilled = 0;
+               list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+                       viodev = dev_ent->viodev;
+
+                       if (viodev->cmo.desired <= level) {
+                               fulfilled++;
+                               continue;
+                       }
+
+                       /*
+                        * Give the device up to VIO_CMO_BALANCE_CHUNK
+                        * bytes of entitlement, but do not exceed the
+                        * desired level of entitlement for the device.
+                        */
+                       chunk = min_t(size_t, avail, VIO_CMO_BALANCE_CHUNK);
+                       chunk = min(chunk, (viodev->cmo.desired -
+                                           viodev->cmo.entitled));
+                       viodev->cmo.entitled += chunk;
+
+                       /*
+                        * If the memory for this entitlement increase was
+                        * already allocated to the device it does not come
+                        * from the available pool being portioned out.
+                        */
+                       need = max(viodev->cmo.allocated, viodev->cmo.entitled)-
+                              max(viodev->cmo.allocated, level);
+                       avail -= need;
+
+               }
+               if (fulfilled == devcount)
+                       break;
+               level += VIO_CMO_BALANCE_CHUNK;
+       }
+
+       /* Calculate new reserve and excess pool sizes */
+       cmo->reserve.size = cmo->min;
+       cmo->excess.free = 0;
+       cmo->excess.size = 0;
+       need = 0;
+       list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+               viodev = dev_ent->viodev;
+               /* Calculated reserve size above the minimum entitlement */
+               if (viodev->cmo.entitled)
+                       cmo->reserve.size += (viodev->cmo.entitled -
+                                             VIO_CMO_MIN_ENT);
+               /* Calculated used excess entitlement */
+               if (viodev->cmo.allocated > viodev->cmo.entitled)
+                       need += viodev->cmo.allocated - viodev->cmo.entitled;
+       }
+       cmo->excess.size = cmo->entitled - cmo->reserve.size;
+       cmo->excess.free = cmo->excess.size - need;
+
+       cancel_delayed_work(container_of(work, struct delayed_work, work));
+       spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size,
+                                          dma_addr_t *dma_handle, gfp_t flag)
+{
+       struct vio_dev *viodev = to_vio_dev(dev);
+       void *ret;
+
+       if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) {
+               atomic_inc(&viodev->cmo.allocs_failed);
+               return NULL;
+       }
+
+       ret = dma_iommu_ops.alloc_coherent(dev, size, dma_handle, flag);
+       if (unlikely(ret == NULL)) {
+               vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
+               atomic_inc(&viodev->cmo.allocs_failed);
+       }
+
+       return ret;
+}
+
+static void vio_dma_iommu_free_coherent(struct device *dev, size_t size,
+                                        void *vaddr, dma_addr_t dma_handle)
+{
+       struct vio_dev *viodev = to_vio_dev(dev);
+
+       dma_iommu_ops.free_coherent(dev, size, vaddr, dma_handle);
+
+       vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
+}
+
+static dma_addr_t vio_dma_iommu_map_single(struct device *dev, void *vaddr,
+                                           size_t size,
+                                           enum dma_data_direction direction,
+                                           struct dma_attrs *attrs)
+{
+       struct vio_dev *viodev = to_vio_dev(dev);
+       dma_addr_t ret = DMA_ERROR_CODE;
+
+       if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) {
+               atomic_inc(&viodev->cmo.allocs_failed);
+               return ret;
+       }
+
+       ret = dma_iommu_ops.map_single(dev, vaddr, size, direction, attrs);
+       if (unlikely(dma_mapping_error(ret))) {
+               vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
+               atomic_inc(&viodev->cmo.allocs_failed);
+       }
+
+       return ret;
+}
+
+static void vio_dma_iommu_unmap_single(struct device *dev,
+               dma_addr_t dma_handle, size_t size,
+               enum dma_data_direction direction,
+               struct dma_attrs *attrs)
+{
+       struct vio_dev *viodev = to_vio_dev(dev);
+
+       dma_iommu_ops.unmap_single(dev, dma_handle, size, direction, attrs);
+
+       vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
+}
+
+static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
+                                int nelems, enum dma_data_direction direction,
+                                struct dma_attrs *attrs)
+{
+       struct vio_dev *viodev = to_vio_dev(dev);
+       struct scatterlist *sgl;
+       int ret, count = 0;
+       size_t alloc_size = 0;
+
+       for (sgl = sglist; count < nelems; count++, sgl++)
+               alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE);
+
+       if (vio_cmo_alloc(viodev, alloc_size)) {
+               atomic_inc(&viodev->cmo.allocs_failed);
+               return 0;
+       }
+
+       ret = dma_iommu_ops.map_sg(dev, sglist, nelems, direction, attrs);
+
+       if (unlikely(!ret)) {
+               vio_cmo_dealloc(viodev, alloc_size);
+               atomic_inc(&viodev->cmo.allocs_failed);
+       }
+
+       for (sgl = sglist, count = 0; count < ret; count++, sgl++)
+               alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
+       if (alloc_size)
+               vio_cmo_dealloc(viodev, alloc_size);
+
+       return ret;
+}
+
+static void vio_dma_iommu_unmap_sg(struct device *dev,
+               struct scatterlist *sglist, int nelems,
+               enum dma_data_direction direction,
+               struct dma_attrs *attrs)
+{
+       struct vio_dev *viodev = to_vio_dev(dev);
+       struct scatterlist *sgl;
+       size_t alloc_size = 0;
+       int count = 0;
+
+       for (sgl = sglist; count < nelems; count++, sgl++)
+               alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
+
+       dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs);
+
+       vio_cmo_dealloc(viodev, alloc_size);
+}
+
+struct dma_mapping_ops vio_dma_mapping_ops = {
+       .alloc_coherent = vio_dma_iommu_alloc_coherent,
+       .free_coherent  = vio_dma_iommu_free_coherent,
+       .map_single     = vio_dma_iommu_map_single,
+       .unmap_single   = vio_dma_iommu_unmap_single,
+       .map_sg         = vio_dma_iommu_map_sg,
+       .unmap_sg       = vio_dma_iommu_unmap_sg,
+};
+
+/**
+ * vio_cmo_set_dev_desired - Set desired entitlement for a device
+ *
+ * @viodev: struct vio_dev for device to alter
+ * @new_desired: new desired entitlement level in bytes
+ *
+ * For use by devices to request a change to their entitlement at runtime or
+ * through sysfs.  The desired entitlement level is changed and a balancing
+ * of system resources is scheduled to run in the future.
+ */
+void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired)
+{
+       unsigned long flags;
+       struct vio_cmo_dev_entry *dev_ent;
+       int found = 0;
+
+       if (!firmware_has_feature(FW_FEATURE_CMO))
+               return;
+
+       spin_lock_irqsave(&vio_cmo.lock, flags);
+       if (desired < VIO_CMO_MIN_ENT)
+               desired = VIO_CMO_MIN_ENT;
+
+       /*
+        * Changes will not be made for devices not in the device list.
+        * If it is not in the device list, then no driver is loaded
+        * for the device and it can not receive entitlement.
+        */
+       list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
+               if (viodev == dev_ent->viodev) {
+                       found = 1;
+                       break;
+               }
+       if (!found)
+               return;
+
+       /* Increase/decrease in desired device entitlement */
+       if (desired >= viodev->cmo.desired) {
+               /* Just bump the bus and device values prior to a balance*/
+               vio_cmo.desired += desired - viodev->cmo.desired;
+               viodev->cmo.desired = desired;
+       } else {
+               /* Decrease bus and device values for desired entitlement */
+               vio_cmo.desired -= viodev->cmo.desired - desired;
+               viodev->cmo.desired = desired;
+               /*
+                * If less entitlement is desired than current entitlement, move
+                * any reserve memory in the change region to the excess pool.
+                */
+               if (viodev->cmo.entitled > desired) {
+                       vio_cmo.reserve.size -= viodev->cmo.entitled - desired;
+                       vio_cmo.excess.size += viodev->cmo.entitled - desired;
+                       /*
+                        * If entitlement moving from the reserve pool to the
+                        * excess pool is currently unused, add to the excess
+                        * free counter.
+                        */
+                       if (viodev->cmo.allocated < viodev->cmo.entitled)
+                               vio_cmo.excess.free += viodev->cmo.entitled -
+                                                      max(viodev->cmo.allocated, desired);
+                       viodev->cmo.entitled = desired;
+               }
+       }
+       schedule_delayed_work(&vio_cmo.balance_q, 0);
+       spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+/**
+ * vio_cmo_bus_probe - Handle CMO specific bus probe activities
+ *
+ * @viodev - Pointer to struct vio_dev for device
+ *
+ * Determine the devices IO memory entitlement needs, attempting
+ * to satisfy the system minimum entitlement at first and scheduling
+ * a balance operation to take care of the rest at a later time.
+ *
+ * Returns: 0 on success, -EINVAL when device doesn't support CMO, and
+ *          -ENOMEM when entitlement is not available for device or
+ *          device entry.
+ *
+ */
+static int vio_cmo_bus_probe(struct vio_dev *viodev)
+{
+       struct vio_cmo_dev_entry *dev_ent;
+       struct device *dev = &viodev->dev;
+       struct vio_driver *viodrv = to_vio_driver(dev->driver);
+       unsigned long flags;
+       size_t size;
+
+       /*
+        * Check to see that device has a DMA window and configure
+        * entitlement for the device.
+        */
+       if (of_get_property(viodev->dev.archdata.of_node,
+                           "ibm,my-dma-window", NULL)) {
+               /* Check that the driver is CMO enabled and get desired DMA */
+               if (!viodrv->get_desired_dma) {
+                       dev_err(dev, "%s: device driver does not support CMO\n",
+                               __func__);
+                       return -EINVAL;
+               }
+
+               viodev->cmo.desired = IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev));
+               if (viodev->cmo.desired < VIO_CMO_MIN_ENT)
+                       viodev->cmo.desired = VIO_CMO_MIN_ENT;
+               size = VIO_CMO_MIN_ENT;
+
+               dev_ent = kmalloc(sizeof(struct vio_cmo_dev_entry),
+                                 GFP_KERNEL);
+               if (!dev_ent)
+                       return -ENOMEM;
+
+               dev_ent->viodev = viodev;
+               spin_lock_irqsave(&vio_cmo.lock, flags);
+               list_add(&dev_ent->list, &vio_cmo.device_list);
+       } else {
+               viodev->cmo.desired = 0;
+               size = 0;
+               spin_lock_irqsave(&vio_cmo.lock, flags);
+       }
+
+       /*
+        * If the needs for vio_cmo.min have not changed since they
+        * were last set, the number of devices in the OF tree has
+        * been constant and the IO memory for this is already in
+        * the reserve pool.
+        */
+       if (vio_cmo.min == ((vio_cmo_num_OF_devs() + 1) *
+                           VIO_CMO_MIN_ENT)) {
+               /* Updated desired entitlement if device requires it */
+               if (size)
+                       vio_cmo.desired += (viodev->cmo.desired -
+                                       VIO_CMO_MIN_ENT);
+       } else {
+               size_t tmp;
+
+               tmp = vio_cmo.spare + vio_cmo.excess.free;
+               if (tmp < size) {
+                       dev_err(dev, "%s: insufficient free "
+                               "entitlement to add device. "
+                               "Need %lu, have %lu\n", __func__,
+                               size, (vio_cmo.spare + tmp));
+                       spin_unlock_irqrestore(&vio_cmo.lock, flags);
+                       return -ENOMEM;
+               }
+
+               /* Use excess pool first to fulfill request */
+               tmp = min(size, vio_cmo.excess.free);
+               vio_cmo.excess.free -= tmp;
+               vio_cmo.excess.size -= tmp;
+               vio_cmo.reserve.size += tmp;
+
+               /* Use spare if excess pool was insufficient */
+               vio_cmo.spare -= size - tmp;
+
+               /* Update bus accounting */
+               vio_cmo.min += size;
+               vio_cmo.desired += viodev->cmo.desired;
+       }
+       spin_unlock_irqrestore(&vio_cmo.lock, flags);
+       return 0;
+}
+
+/**
+ * vio_cmo_bus_remove - Handle CMO specific bus removal activities
+ *
+ * @viodev - Pointer to struct vio_dev for device
+ *
+ * Remove the device from the cmo device list.  The minimum entitlement
+ * will be reserved for the device as long as it is in the system.  The
+ * rest of the entitlement the device had been allocated will be returned
+ * to the system.
+ */
+static void vio_cmo_bus_remove(struct vio_dev *viodev)
+{
+       struct vio_cmo_dev_entry *dev_ent;
+       unsigned long flags;
+       size_t tmp;
+
+       spin_lock_irqsave(&vio_cmo.lock, flags);
+       if (viodev->cmo.allocated) {
+               dev_err(&viodev->dev, "%s: device had %lu bytes of IO "
+                       "allocated after remove operation.\n",
+                       __func__, viodev->cmo.allocated);
+               BUG();
+       }
+
+       /*
+        * Remove the device from the device list being maintained for
+        * CMO enabled devices.
+        */
+       list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
+               if (viodev == dev_ent->viodev) {
+                       list_del(&dev_ent->list);
+                       kfree(dev_ent);
+                       break;
+               }
+
+       /*
+        * Devices may not require any entitlement and they do not need
+        * to be processed.  Otherwise, return the device's entitlement
+        * back to the pools.
+        */
+       if (viodev->cmo.entitled) {
+               /*
+                * This device has not yet left the OF tree, it's
+                * minimum entitlement remains in vio_cmo.min and
+                * vio_cmo.desired
+                */
+               vio_cmo.desired -= (viodev->cmo.desired - VIO_CMO_MIN_ENT);
+
+               /*
+                * Save min allocation for device in reserve as long
+                * as it exists in OF tree as determined by later
+                * balance operation
+                */
+               viodev->cmo.entitled -= VIO_CMO_MIN_ENT;
+
+               /* Replenish spare from freed reserve pool */
+               if (viodev->cmo.entitled && (vio_cmo.spare < VIO_CMO_MIN_ENT)) {
+                       tmp = min(viodev->cmo.entitled, (VIO_CMO_MIN_ENT -
+                                                        vio_cmo.spare));
+                       vio_cmo.spare += tmp;
+                       viodev->cmo.entitled -= tmp;
+               }
+
+               /* Remaining reserve goes to excess pool */
+               vio_cmo.excess.size += viodev->cmo.entitled;
+               vio_cmo.excess.free += viodev->cmo.entitled;
+               vio_cmo.reserve.size -= viodev->cmo.entitled;
+
+               /*
+                * Until the device is removed it will keep a
+                * minimum entitlement; this will guarantee that
+                * a module unload/load will result in a success.
+                */
+               viodev->cmo.entitled = VIO_CMO_MIN_ENT;
+               viodev->cmo.desired = VIO_CMO_MIN_ENT;
+               atomic_set(&viodev->cmo.allocs_failed, 0);
+       }
+
+       spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+static void vio_cmo_set_dma_ops(struct vio_dev *viodev)
+{
+       vio_dma_mapping_ops.dma_supported = dma_iommu_ops.dma_supported;
+       viodev->dev.archdata.dma_ops = &vio_dma_mapping_ops;
+}
+
+/**
+ * vio_cmo_bus_init - CMO entitlement initialization at bus init time
+ *
+ * Set up the reserve and excess entitlement pools based on available
+ * system entitlement and the number of devices in the OF tree that
+ * require entitlement in the reserve pool.
+ */
+static void vio_cmo_bus_init(void)
+{
+       struct hvcall_mpp_data mpp_data;
+       int err;
+
+       memset(&vio_cmo, 0, sizeof(struct vio_cmo));
+       spin_lock_init(&vio_cmo.lock);
+       INIT_LIST_HEAD(&vio_cmo.device_list);
+       INIT_DELAYED_WORK(&vio_cmo.balance_q, vio_cmo_balance);
+
+       /* Get current system entitlement */
+       err = h_get_mpp(&mpp_data);
+
+       /*
+        * On failure, continue with entitlement set to 0, will panic()
+        * later when spare is reserved.
+        */
+       if (err != H_SUCCESS) {
+               printk(KERN_ERR "%s: unable to determine system IO "\
+                      "entitlement. (%d)\n", __func__, err);
+               vio_cmo.entitled = 0;
+       } else {
+               vio_cmo.entitled = mpp_data.entitled_mem;
+       }
+
+       /* Set reservation and check against entitlement */
+       vio_cmo.spare = VIO_CMO_MIN_ENT;
+       vio_cmo.reserve.size = vio_cmo.spare;
+       vio_cmo.reserve.size += (vio_cmo_num_OF_devs() *
+                                VIO_CMO_MIN_ENT);
+       if (vio_cmo.reserve.size > vio_cmo.entitled) {
+               printk(KERN_ERR "%s: insufficient system entitlement\n",
+                      __func__);
+               panic("%s: Insufficient system entitlement", __func__);
+       }
+
+       /* Set the remaining accounting variables */
+       vio_cmo.excess.size = vio_cmo.entitled - vio_cmo.reserve.size;
+       vio_cmo.excess.free = vio_cmo.excess.size;
+       vio_cmo.min = vio_cmo.reserve.size;
+       vio_cmo.desired = vio_cmo.reserve.size;
+}
+
+/* sysfs device functions and data structures for CMO */
+
+#define viodev_cmo_rd_attr(name)                                        \
+static ssize_t viodev_cmo_##name##_show(struct device *dev,             \
+                                        struct device_attribute *attr,  \
+                                         char *buf)                     \
+{                                                                       \
+       return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name);        \
+}
+
+static ssize_t viodev_cmo_allocs_failed_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct vio_dev *viodev = to_vio_dev(dev);
+       return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed));
+}
+
+static ssize_t viodev_cmo_allocs_failed_reset(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t count)
+{
+       struct vio_dev *viodev = to_vio_dev(dev);
+       atomic_set(&viodev->cmo.allocs_failed, 0);
+       return count;
+}
+
+static ssize_t viodev_cmo_desired_set(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t count)
+{
+       struct vio_dev *viodev = to_vio_dev(dev);
+       size_t new_desired;
+       int ret;
+
+       ret = strict_strtoul(buf, 10, &new_desired);
+       if (ret)
+               return ret;
+
+       vio_cmo_set_dev_desired(viodev, new_desired);
+       return count;
+}
+
+viodev_cmo_rd_attr(desired);
+viodev_cmo_rd_attr(entitled);
+viodev_cmo_rd_attr(allocated);
+
+static ssize_t name_show(struct device *, struct device_attribute *, char *);
+static ssize_t devspec_show(struct device *, struct device_attribute *, char *);
+static struct device_attribute vio_cmo_dev_attrs[] = {
+       __ATTR_RO(name),
+       __ATTR_RO(devspec),
+       __ATTR(cmo_desired,       S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
+              viodev_cmo_desired_show, viodev_cmo_desired_set),
+       __ATTR(cmo_entitled,      S_IRUGO, viodev_cmo_entitled_show,      NULL),
+       __ATTR(cmo_allocated,     S_IRUGO, viodev_cmo_allocated_show,     NULL),
+       __ATTR(cmo_allocs_failed, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
+              viodev_cmo_allocs_failed_show, viodev_cmo_allocs_failed_reset),
+       __ATTR_NULL
+};
+
+/* sysfs bus functions and data structures for CMO */
+
+#define viobus_cmo_rd_attr(name)                                        \
+static ssize_t                                                          \
+viobus_cmo_##name##_show(struct bus_type *bt, char *buf)                \
+{                                                                       \
+       return sprintf(buf, "%lu\n", vio_cmo.name);                     \
+}
+
+#define viobus_cmo_pool_rd_attr(name, var)                              \
+static ssize_t                                                          \
+viobus_cmo_##name##_pool_show_##var(struct bus_type *bt, char *buf)     \
+{                                                                       \
+       return sprintf(buf, "%lu\n", vio_cmo.name.var);                 \
+}
+
+static ssize_t viobus_cmo_high_reset(struct bus_type *bt, const char *buf,
+                                     size_t count)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&vio_cmo.lock, flags);
+       vio_cmo.high = vio_cmo.curr;
+       spin_unlock_irqrestore(&vio_cmo.lock, flags);
+
+       return count;
+}
+
+viobus_cmo_rd_attr(entitled);
+viobus_cmo_pool_rd_attr(reserve, size);
+viobus_cmo_pool_rd_attr(excess, size);
+viobus_cmo_pool_rd_attr(excess, free);
+viobus_cmo_rd_attr(spare);
+viobus_cmo_rd_attr(min);
+viobus_cmo_rd_attr(desired);
+viobus_cmo_rd_attr(curr);
+viobus_cmo_rd_attr(high);
+
+static struct bus_attribute vio_cmo_bus_attrs[] = {
+       __ATTR(cmo_entitled, S_IRUGO, viobus_cmo_entitled_show, NULL),
+       __ATTR(cmo_reserve_size, S_IRUGO, viobus_cmo_reserve_pool_show_size, NULL),
+       __ATTR(cmo_excess_size, S_IRUGO, viobus_cmo_excess_pool_show_size, NULL),
+       __ATTR(cmo_excess_free, S_IRUGO, viobus_cmo_excess_pool_show_free, NULL),
+       __ATTR(cmo_spare,   S_IRUGO, viobus_cmo_spare_show,   NULL),
+       __ATTR(cmo_min,     S_IRUGO, viobus_cmo_min_show,     NULL),
+       __ATTR(cmo_desired, S_IRUGO, viobus_cmo_desired_show, NULL),
+       __ATTR(cmo_curr,    S_IRUGO, viobus_cmo_curr_show,    NULL),
+       __ATTR(cmo_high,    S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
+              viobus_cmo_high_show, viobus_cmo_high_reset),
+       __ATTR_NULL
+};
+
+static void vio_cmo_sysfs_init(void)
+{
+       vio_bus_type.dev_attrs = vio_cmo_dev_attrs;
+       vio_bus_type.bus_attrs = vio_cmo_bus_attrs;
+}
+#else /* CONFIG_PPC_SMLPAR */
+/* Dummy functions for iSeries platform */
+int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; }
+void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {}
+static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; }
+static void vio_cmo_bus_remove(struct vio_dev *viodev) {}
+static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {}
+static void vio_cmo_bus_init() {}
+static void vio_cmo_sysfs_init() { }
+#endif /* CONFIG_PPC_SMLPAR */
+EXPORT_SYMBOL(vio_cmo_entitlement_update);
+EXPORT_SYMBOL(vio_cmo_set_dev_desired);
+
  static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
  {
         const unsigned char *dma_window;
@@ -114,8 +1105,17 @@ static int vio_bus_probe(struct device *dev)
                 return error;
  
         id = vio_match_device(viodrv->id_table, viodev);
-       if (id)
+       if (id) {
+               memset(&viodev->cmo, 0, sizeof(viodev->cmo));
+               if (firmware_has_feature(FW_FEATURE_CMO)) {
+                       error = vio_cmo_bus_probe(viodev);
+                       if (error)
+                               return error;
+               }
                 error = viodrv->probe(viodev, id);
+               if (error)
+                       vio_cmo_bus_remove(viodev);
+       }
  
         return error;
  }
@@ -125,12 +1125,23 @@ static int vio_bus_remove(struct device *dev)
  {
         struct vio_dev *viodev = to_vio_dev(dev);
         struct vio_driver *viodrv = to_vio_driver(dev->driver);
+       struct device *devptr;
+       int ret = 1;
+
+       /*
+        * Hold a reference to the device after the remove function is called
+        * to allow for CMO accounting cleanup for the device.
+        */
+       devptr = get_device(dev);
  
         if (viodrv->remove)
-               return viodrv->remove(viodev);
+               ret = viodrv->remove(viodev);
+
+       if (!ret && firmware_has_feature(FW_FEATURE_CMO))
+               vio_cmo_bus_remove(viodev);
  
-       /* driver can't remove */
-       return 1;
+       put_device(devptr);
+       return ret;
  }
  
  /**
@@ -215,7 +1226,11 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
                         viodev->unit_address = *unit_address;
         }
         viodev->dev.archdata.of_node = of_node_get(of_node);
-       viodev->dev.archdata.dma_ops = &dma_iommu_ops;
+
+       if (firmware_has_feature(FW_FEATURE_CMO))
+               vio_cmo_set_dma_ops(viodev);
+       else
+               viodev->dev.archdata.dma_ops = &dma_iommu_ops;
         viodev->dev.archdata.dma_data = vio_build_iommu_table(viodev);
         viodev->dev.archdata.numa_node = of_node_to_nid(of_node);
  
@@ -245,6 +1260,9 @@ static int __init vio_bus_init(void)
         int err;
         struct device_node *node_vroot;
  
+       if (firmware_has_feature(FW_FEATURE_CMO))
+               vio_cmo_sysfs_init();
+
         err = bus_register(&vio_bus_type);
         if (err) {
                 printk(KERN_ERR "failed to register VIO bus\n");
@@ -262,6 +1280,9 @@ static int __init vio_bus_init(void)
                 return err;
         }
  
+       if (firmware_has_feature(FW_FEATURE_CMO))
+               vio_cmo_bus_init();
+
         node_vroot = of_find_node_by_name(NULL, "vdevice");
         if (node_vroot) {
                 struct device_node *of_node;
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S

index a914411bced5defb4179628c9b0049d68a360ab0..4a8ce62fe1121c1a774f04a9af6223a96f945c23 100644 (file)
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -85,7 +85,7 @@ SECTIONS
  
         /* The dummy segment contents for the bug workaround mentioned above
            near PHDRS.  */
-       .dummy : {
+       .dummy : AT(ADDR(.dummy) - LOAD_OFFSET) {
                 LONG(0xf177)
         } :kernel :dummy
  
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c

index 1707d00331fc931dde7e7865def1422ff8e8c1e9..565b7a237c847929e885eb5597fe4cdb14746cf3 100644 (file)
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -100,31 +100,6 @@ static int store_updates_sp(struct pt_regs *regs)
         return 0;
  }
  
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
-static void do_dabr(struct pt_regs *regs, unsigned long address,
-                   unsigned long error_code)
-{
-       siginfo_t info;
-
-       if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
-                       11, SIGSEGV) == NOTIFY_STOP)
-               return;
-
-       if (debugger_dabr_match(regs))
-               return;
-
-       /* Clear the DABR */
-       set_dabr(0);
-
-       /* Deliver the signal to userspace */
-       info.si_signo = SIGTRAP;
-       info.si_errno = 0;
-       info.si_code = TRAP_HWBKPT;
-       info.si_addr = (void __user *)address;
-       force_sig_info(SIGTRAP, &info, current);
-}
-#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
-
  /*
   * For 600- and 800-family processors, the error_code parameter is DSISR
   * for a data fault, SRR1 for an instruction fault. For 400-family processors
diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig

index d664b1bce381afcc30b2988f4beb1d28e1508df0..696a5ee4962d20c81737b5a36db3ece5a13041bd 100644 (file)
--- a/arch/powerpc/platforms/52xx/Kconfig
+++ b/arch/powerpc/platforms/52xx/Kconfig
@@ -1,7 +1,6 @@
  config PPC_MPC52xx
         bool "52xx-based boards"
         depends on PPC_MULTIPLATFORM && PPC32
-       select FSL_SOC
         select PPC_CLOCK
         select PPC_PCI_CHOICE
  
@@ -48,6 +47,7 @@ config PPC_MPC5200_BUGFIX
  config PPC_MPC5200_GPIO
         bool "MPC5200 GPIO support"
         depends on PPC_MPC52xx
-       select HAVE_GPIO_LIB
+       select ARCH_REQUIRE_GPIOLIB
+       select GENERIC_GPIO
         help
           Enable gpiolib support for mpc5200 based boards
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c

index 208005ca262c62ac58b466c94e21ff3f47932924..e06420af5fe9116cd007645594ee6cd0a962a6ea 100644 (file)
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -172,7 +172,7 @@ static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte,
         }
  }
  
-static void tce_build_cell(struct iommu_table *tbl, long index, long npages,
+static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
                 unsigned long uaddr, enum dma_data_direction direction,
                 struct dma_attrs *attrs)
  {
@@ -213,6 +213,7 @@ static void tce_build_cell(struct iommu_table *tbl, long index, long npages,
  
         pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n",
                  index, npages, direction, base_pte);
+       return 0;
  }
  
  static void tce_free_cell(struct iommu_table *tbl, long index, long npages)
@@ -1150,12 +1151,23 @@ static int iommu_fixed_disabled;
  
  static int __init setup_iommu_fixed(char *str)
  {
+       struct device_node *pciep;
+
         if (strcmp(str, "off") == 0)
                 iommu_fixed_disabled = 1;
  
-       else if (strcmp(str, "weak") == 0)
+       /* If we can find a pcie-endpoint in the device tree assume that
+        * we're on a triblade or a CAB so by default the fixed mapping
+        * should be set to be weakly ordered; but only if the boot
+        * option WASN'T set for strong ordering
+        */
+       pciep = of_find_node_by_type(NULL, "pcie-endpoint");
+
+       if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0))
                 iommu_fixed_is_weak = 1;
  
+       of_node_put(pciep);
+
         return 1;
  }
  __setup("iommu_fixed=", setup_iommu_fixed);
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c

index 34654743363dccf871534441f36bbcf2149459ef..2deeeba7eccfc9c555d5d32aca370507f08268be 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -312,10 +312,27 @@ static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff,
          */
         node = cpu_to_node(raw_smp_processor_id());
         for (n = 0; n < MAX_NUMNODES; n++, node++) {
+               int available_spus;
+
                 node = (node < MAX_NUMNODES) ? node : 0;
                 if (!node_allowed(ctx, node))
                         continue;
+
+               available_spus = 0;
                 mutex_lock(&cbe_spu_info[node].list_mutex);
+               list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+                       if (spu->ctx && spu->ctx->gang
+                                       && spu->ctx->aff_offset == 0)
+                               available_spus -=
+                                       (spu->ctx->gang->contexts - 1);
+                       else
+                               available_spus++;
+               }
+               if (available_spus < ctx->gang->contexts) {
+                       mutex_unlock(&cbe_spu_info[node].list_mutex);
+                       continue;
+               }
+
                 list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
                         if ((!mem_aff || spu->has_mem_affinity) &&
                                                         sched_spu(spu)) {
@@ -389,6 +406,9 @@ static int has_affinity(struct spu_context *ctx)
         if (list_empty(&ctx->aff_list))
                 return 0;
  
+       if (atomic_read(&ctx->gang->aff_sched_count) == 0)
+               ctx->gang->aff_ref_spu = NULL;
+
         if (!gang->aff_ref_spu) {
                 if (!(gang->aff_flags & AFF_MERGED))
                         aff_merge_remaining_ctxs(gang);
@@ -416,14 +436,8 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
         if (spu->ctx->flags & SPU_CREATE_NOSCHED)
                 atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
  
-       if (ctx->gang){
-               mutex_lock(&ctx->gang->aff_mutex);
-               if (has_affinity(ctx)) {
-                       if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
-                               ctx->gang->aff_ref_spu = NULL;
-               }
-               mutex_unlock(&ctx->gang->aff_mutex);
-       }
+       if (ctx->gang)
+               atomic_dec_if_positive(&ctx->gang->aff_sched_count);
  
         spu_switch_notify(spu, NULL);
         spu_unmap_mappings(ctx);
@@ -562,10 +576,7 @@ static struct spu *spu_get_idle(struct spu_context *ctx)
                                 goto found;
                         mutex_unlock(&cbe_spu_info[node].list_mutex);
  
-                       mutex_lock(&ctx->gang->aff_mutex);
-                       if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
-                               ctx->gang->aff_ref_spu = NULL;
-                       mutex_unlock(&ctx->gang->aff_mutex);
+                       atomic_dec(&ctx->gang->aff_sched_count);
                         goto not_found;
                 }
                 mutex_unlock(&ctx->gang->aff_mutex);
diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.c b/arch/powerpc/platforms/cell/spufs/sputrace.c

index 8c0e95766a628503860ccb7b194132e29622e4f3..92d20e993ede097d5e732a2bf0b751f9931eb7d1 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/sputrace.c
+++ b/arch/powerpc/platforms/cell/spufs/sputrace.c
@@ -196,8 +196,7 @@ static int __init sputrace_init(void)
         struct proc_dir_entry *entry;
         int i, error = -ENOMEM;
  
-       sputrace_log = kcalloc(sizeof(struct sputrace),
-                               bufsize, GFP_KERNEL);
+       sputrace_log = kcalloc(bufsize, sizeof(struct sputrace), GFP_KERNEL);
         if (!sputrace_log)
                 goto out;
  
diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c

index bc818e4e203321b50557361ae2f5c4f2717cd4dc..bb464d1211b2f5842bdb6d09862396600b6d9e33 100644 (file)
--- a/arch/powerpc/platforms/iseries/iommu.c
+++ b/arch/powerpc/platforms/iseries/iommu.c
@@ -41,7 +41,7 @@
  #include <asm/iseries/hv_call_event.h>
  #include <asm/iseries/iommu.h>
  
-static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
+static int tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
                 unsigned long uaddr, enum dma_data_direction direction,
                 struct dma_attrs *attrs)
  {
@@ -71,6 +71,7 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
                 index++;
                 uaddr += TCE_PAGE_SIZE;
         }
+       return 0;
  }
  
  static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c

index 70541b7a5013ab68faed9758d86487be360bbe8c..a0ff03a3d8daf566350a2ead9e05131df36b65e7 100644 (file)
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -83,7 +83,7 @@ static u32 *iob_l2_base;
  static struct iommu_table iommu_table_iobmap;
  static int iommu_table_iobmap_inited;
  
-static void iobmap_build(struct iommu_table *tbl, long index,
+static int iobmap_build(struct iommu_table *tbl, long index,
                          long npages, unsigned long uaddr,
                          enum dma_data_direction direction,
                          struct dma_attrs *attrs)
@@ -108,6 +108,7 @@ static void iobmap_build(struct iommu_table *tbl, long index,
                 uaddr += IOBMAP_PAGE_SIZE;
                 bus_addr += IOBMAP_PAGE_SIZE;
         }
+       return 0;
  }
  
  
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig

index 757c0296e0b83e7456d28f5402f5db3c5918d4f4..97619fd51e393870d1c0c1da8a6bdb1926a52a57 100644 (file)
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -40,3 +40,26 @@ config PPC_PSERIES_DEBUG
         depends on PPC_PSERIES && PPC_EARLY_DEBUG
         bool "Enable extra debug logging in platforms/pseries"
         default y
+
+config PPC_SMLPAR
+       bool "Support for shared-memory logical partitions"
+       depends on PPC_PSERIES
+       select LPARCFG
+       default n
+       help
+         Select this option to enable shared memory partition support.
+         With this option a system running in an LPAR can be given more
+         memory than physically available and will allow firmware to
+         balance memory across many LPARs.
+
+config CMM
+       tristate "Collaborative memory management"
+       depends on PPC_SMLPAR
+       default y
+       help
+         Select this option, if you want to enable the kernel interface
+         to reduce the memory size of the system. This is accomplished
+         by allocating pages of memory and put them "on hold". This only
+         makes sense for a system running in an LPAR where the unused pages
+         will be reused for other LPARs. The interface allows firmware to
+         balance memory across many LPARs.
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile

index 554c6e42ef2a9ff1f78340a0d5a9c49f5d46b611..dfe574af2dc090bd5ce3aaac503fef02e8bddb91 100644 (file)
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -24,3 +24,4 @@ obj-$(CONFIG_HVC_CONSOLE)     += hvconsole.o
  obj-$(CONFIG_HVCS)             += hvcserver.o
  obj-$(CONFIG_HCALL_STATS)      += hvCall_inst.o
  obj-$(CONFIG_PHYP_DUMP)        += phyp_dump.o
+obj-$(CONFIG_CMM)              += cmm.o
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c

new file mode 100644 (file)

index 0000000..c6b3be0
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -0,0 +1,468 @@
+/*
+ * Collaborative memory management interface.
+ *
+ * Copyright (C) 2008 IBM Corporation
+ * Author(s): Brian King (brking@linux.vnet.ibm.com),
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/oom.h>
+#include <linux/sched.h>
+#include <linux/stringify.h>
+#include <linux/swap.h>
+#include <linux/sysdev.h>
+#include <asm/firmware.h>
+#include <asm/hvcall.h>
+#include <asm/mmu.h>
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+
+#include "plpar_wrappers.h"
+
+#define CMM_DRIVER_VERSION     "1.0.0"
+#define CMM_DEFAULT_DELAY      1
+#define CMM_DEBUG                      0
+#define CMM_DISABLE            0
+#define CMM_OOM_KB             1024
+#define CMM_MIN_MEM_MB         256
+#define KB2PAGES(_p)           ((_p)>>(PAGE_SHIFT-10))
+#define PAGES2KB(_p)           ((_p)<<(PAGE_SHIFT-10))
+
+static unsigned int delay = CMM_DEFAULT_DELAY;
+static unsigned int oom_kb = CMM_OOM_KB;
+static unsigned int cmm_debug = CMM_DEBUG;
+static unsigned int cmm_disabled = CMM_DISABLE;
+static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
+static struct sys_device cmm_sysdev;
+
+MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(CMM_DRIVER_VERSION);
+
+module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
+                "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
+module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
+                "[Default=" __stringify(CMM_OOM_KB) "]");
+module_param_named(min_mem_mb, min_mem_mb, ulong, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
+                "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
+module_param_named(debug, cmm_debug, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
+                "[Default=" __stringify(CMM_DEBUG) "]");
+
+#define CMM_NR_PAGES ((PAGE_SIZE - sizeof(void *) - sizeof(unsigned long)) / sizeof(unsigned long))
+
+#define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
+
+struct cmm_page_array {
+       struct cmm_page_array *next;
+       unsigned long index;
+       unsigned long page[CMM_NR_PAGES];
+};
+
+static unsigned long loaned_pages;
+static unsigned long loaned_pages_target;
+static unsigned long oom_freed_pages;
+
+static struct cmm_page_array *cmm_page_list;
+static DEFINE_SPINLOCK(cmm_lock);
+
+static struct task_struct *cmm_thread_ptr;
+
+/**
+ * cmm_alloc_pages - Allocate pages and mark them as loaned
+ * @nr:        number of pages to allocate
+ *
+ * Return value:
+ *     number of pages requested to be allocated which were not
+ **/
+static long cmm_alloc_pages(long nr)
+{
+       struct cmm_page_array *pa, *npa;
+       unsigned long addr;
+       long rc;
+
+       cmm_dbg("Begin request for %ld pages\n", nr);
+
+       while (nr) {
+               addr = __get_free_page(GFP_NOIO | __GFP_NOWARN |
+                                      __GFP_NORETRY | __GFP_NOMEMALLOC);
+               if (!addr)
+                       break;
+               spin_lock(&cmm_lock);
+               pa = cmm_page_list;
+               if (!pa || pa->index >= CMM_NR_PAGES) {
+                       /* Need a new page for the page list. */
+                       spin_unlock(&cmm_lock);
+                       npa = (struct cmm_page_array *)__get_free_page(GFP_NOIO | __GFP_NOWARN |
+                                                                      __GFP_NORETRY | __GFP_NOMEMALLOC);
+                       if (!npa) {
+                               pr_info("%s: Can not allocate new page list\n", __FUNCTION__);
+                               free_page(addr);
+                               break;
+                       }
+                       spin_lock(&cmm_lock);
+                       pa = cmm_page_list;
+
+                       if (!pa || pa->index >= CMM_NR_PAGES) {
+                               npa->next = pa;
+                               npa->index = 0;
+                               pa = npa;
+                               cmm_page_list = pa;
+                       } else
+                               free_page((unsigned long) npa);
+               }
+
+               if ((rc = plpar_page_set_loaned(__pa(addr)))) {
+                       pr_err("%s: Can not set page to loaned. rc=%ld\n", __FUNCTION__, rc);
+                       spin_unlock(&cmm_lock);
+                       free_page(addr);
+                       break;
+               }
+
+               pa->page[pa->index++] = addr;
+               loaned_pages++;
+               totalram_pages--;
+               spin_unlock(&cmm_lock);
+               nr--;
+       }
+
+       cmm_dbg("End request with %ld pages unfulfilled\n", nr);
+       return nr;
+}
+
+/**
+ * cmm_free_pages - Free pages and mark them as active
+ * @nr:        number of pages to free
+ *
+ * Return value:
+ *     number of pages requested to be freed which were not
+ **/
+static long cmm_free_pages(long nr)
+{
+       struct cmm_page_array *pa;
+       unsigned long addr;
+
+       cmm_dbg("Begin free of %ld pages.\n", nr);
+       spin_lock(&cmm_lock);
+       pa = cmm_page_list;
+       while (nr) {
+               if (!pa || pa->index <= 0)
+                       break;
+               addr = pa->page[--pa->index];
+
+               if (pa->index == 0) {
+                       pa = pa->next;
+                       free_page((unsigned long) cmm_page_list);
+                       cmm_page_list = pa;
+               }
+
+               plpar_page_set_active(__pa(addr));
+               free_page(addr);
+               loaned_pages--;
+               nr--;
+               totalram_pages++;
+       }
+       spin_unlock(&cmm_lock);
+       cmm_dbg("End request with %ld pages unfulfilled\n", nr);
+       return nr;
+}
+
+/**
+ * cmm_oom_notify - OOM notifier
+ * @self:      notifier block struct
+ * @dummy:     not used
+ * @parm:      returned - number of pages freed
+ *
+ * Return value:
+ *     NOTIFY_OK
+ **/
+static int cmm_oom_notify(struct notifier_block *self,
+                         unsigned long dummy, void *parm)
+{
+       unsigned long *freed = parm;
+       long nr = KB2PAGES(oom_kb);
+
+       cmm_dbg("OOM processing started\n");
+       nr = cmm_free_pages(nr);
+       loaned_pages_target = loaned_pages;
+       *freed += KB2PAGES(oom_kb) - nr;
+       oom_freed_pages += KB2PAGES(oom_kb) - nr;
+       cmm_dbg("OOM processing complete\n");
+       return NOTIFY_OK;
+}
+
+/**
+ * cmm_get_mpp - Read memory performance parameters
+ *
+ * Makes hcall to query the current page loan request from the hypervisor.
+ *
+ * Return value:
+ *     nothing
+ **/
+static void cmm_get_mpp(void)
+{
+       int rc;
+       struct hvcall_mpp_data mpp_data;
+       unsigned long active_pages_target;
+       signed long page_loan_request;
+
+       rc = h_get_mpp(&mpp_data);
+
+       if (rc != H_SUCCESS)
+               return;
+
+       page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE);
+       loaned_pages_target = page_loan_request + loaned_pages;
+       if (loaned_pages_target > oom_freed_pages)
+               loaned_pages_target -= oom_freed_pages;
+       else
+               loaned_pages_target = 0;
+
+       active_pages_target = totalram_pages + loaned_pages - loaned_pages_target;
+
+       if ((min_mem_mb * 1024 * 1024) > (active_pages_target * PAGE_SIZE))
+               loaned_pages_target = totalram_pages + loaned_pages -
+                       ((min_mem_mb * 1024 * 1024) / PAGE_SIZE);
+
+       cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
+               page_loan_request, loaned_pages, loaned_pages_target,
+               oom_freed_pages, totalram_pages);
+}
+
+static struct notifier_block cmm_oom_nb = {
+       .notifier_call = cmm_oom_notify
+};
+
+/**
+ * cmm_thread - CMM task thread
+ * @dummy:     not used
+ *
+ * Return value:
+ *     0
+ **/
+static int cmm_thread(void *dummy)
+{
+       unsigned long timeleft;
+
+       while (1) {
+               timeleft = msleep_interruptible(delay * 1000);
+
+               if (kthread_should_stop() || timeleft) {
+                       loaned_pages_target = loaned_pages;
+                       break;
+               }
+
+               cmm_get_mpp();
+
+               if (loaned_pages_target > loaned_pages) {
+                       if (cmm_alloc_pages(loaned_pages_target - loaned_pages))
+                               loaned_pages_target = loaned_pages;
+               } else if (loaned_pages_target < loaned_pages)
+                       cmm_free_pages(loaned_pages - loaned_pages_target);
+       }
+       return 0;
+}
+
+#define CMM_SHOW(name, format, args...)                        \
+       static ssize_t show_##name(struct sys_device *dev, char *buf)   \
+       {                                                       \
+               return sprintf(buf, format, ##args);            \
+       }                                                       \
+       static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
+
+CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages));
+CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
+
+static ssize_t show_oom_pages(struct sys_device *dev, char *buf)
+{
+       return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
+}
+
+static ssize_t store_oom_pages(struct sys_device *dev,
+                              const char *buf, size_t count)
+{
+       unsigned long val = simple_strtoul (buf, NULL, 10);
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       if (val != 0)
+               return -EBADMSG;
+
+       oom_freed_pages = 0;
+       return count;
+}
+
+static SYSDEV_ATTR(oom_freed_kb, S_IWUSR| S_IRUGO,
+                  show_oom_pages, store_oom_pages);
+
+static struct sysdev_attribute *cmm_attrs[] = {
+       &attr_loaned_kb,
+       &attr_loaned_target_kb,
+       &attr_oom_freed_kb,
+};
+
+static struct sysdev_class cmm_sysdev_class = {
+       .name = "cmm",
+};
+
+/**
+ * cmm_sysfs_register - Register with sysfs
+ *
+ * Return value:
+ *     0 on success / other on failure
+ **/
+static int cmm_sysfs_register(struct sys_device *sysdev)
+{
+       int i, rc;
+
+       if ((rc = sysdev_class_register(&cmm_sysdev_class)))
+               return rc;
+
+       sysdev->id = 0;
+       sysdev->cls = &cmm_sysdev_class;
+
+       if ((rc = sysdev_register(sysdev)))
+               goto class_unregister;
+
+       for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
+               if ((rc = sysdev_create_file(sysdev, cmm_attrs[i])))
+                       goto fail;
+       }
+
+       return 0;
+
+fail:
+       while (--i >= 0)
+               sysdev_remove_file(sysdev, cmm_attrs[i]);
+       sysdev_unregister(sysdev);
+class_unregister:
+       sysdev_class_unregister(&cmm_sysdev_class);
+       return rc;
+}
+
+/**
+ * cmm_unregister_sysfs - Unregister from sysfs
+ *
+ **/
+static void cmm_unregister_sysfs(struct sys_device *sysdev)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
+               sysdev_remove_file(sysdev, cmm_attrs[i]);
+       sysdev_unregister(sysdev);
+       sysdev_class_unregister(&cmm_sysdev_class);
+}
+
+/**
+ * cmm_init - Module initialization
+ *
+ * Return value:
+ *     0 on success / other on failure
+ **/
+static int cmm_init(void)
+{
+       int rc = -ENOMEM;
+
+       if (!firmware_has_feature(FW_FEATURE_CMO))
+               return -EOPNOTSUPP;
+
+       if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0)
+               return rc;
+
+       if ((rc = cmm_sysfs_register(&cmm_sysdev)))
+               goto out_oom_notifier;
+
+       if (cmm_disabled)
+               return rc;
+
+       cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
+       if (IS_ERR(cmm_thread_ptr)) {
+               rc = PTR_ERR(cmm_thread_ptr);
+               goto out_unregister_sysfs;
+       }
+
+       return rc;
+
+out_unregister_sysfs:
+       cmm_unregister_sysfs(&cmm_sysdev);
+out_oom_notifier:
+       unregister_oom_notifier(&cmm_oom_nb);
+       return rc;
+}
+
+/**
+ * cmm_exit - Module exit
+ *
+ * Return value:
+ *     nothing
+ **/
+static void cmm_exit(void)
+{
+       if (cmm_thread_ptr)
+               kthread_stop(cmm_thread_ptr);
+       unregister_oom_notifier(&cmm_oom_nb);
+       cmm_free_pages(loaned_pages);
+       cmm_unregister_sysfs(&cmm_sysdev);
+}
+
+/**
+ * cmm_set_disable - Disable/Enable CMM
+ *
+ * Return value:
+ *     0 on success / other on failure
+ **/
+static int cmm_set_disable(const char *val, struct kernel_param *kp)
+{
+       int disable = simple_strtoul(val, NULL, 10);
+
+       if (disable != 0 && disable != 1)
+               return -EINVAL;
+
+       if (disable && !cmm_disabled) {
+               if (cmm_thread_ptr)
+                       kthread_stop(cmm_thread_ptr);
+               cmm_thread_ptr = NULL;
+               cmm_free_pages(loaned_pages);
+       } else if (!disable && cmm_disabled) {
+               cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
+               if (IS_ERR(cmm_thread_ptr))
+                       return PTR_ERR(cmm_thread_ptr);
+       }
+
+       cmm_disabled = disable;
+       return 0;
+}
+
+module_param_call(disable, cmm_set_disable, param_get_uint,
+                 &cmm_disabled, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
+                "[Default=" __stringify(CMM_DISABLE) "]");
+
+module_init(cmm_init);
+module_exit(cmm_exit);
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c

index 5377dd4b849a5bc8fced028eae7bd2b59c758207..a8c446697f9e3b0e737b418f1f966fbec3bd1f49 100644 (file)
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -48,7 +48,7 @@
  #include "plpar_wrappers.h"
  
  
-static void tce_build_pSeries(struct iommu_table *tbl, long index,
+static int tce_build_pSeries(struct iommu_table *tbl, long index,
                               long npages, unsigned long uaddr,
                               enum dma_data_direction direction,
                               struct dma_attrs *attrs)
@@ -72,6 +72,7 @@ static void tce_build_pSeries(struct iommu_table *tbl, long index,
                 uaddr += TCE_PAGE_SIZE;
                 tcep++;
         }
+       return 0;
  }
  
  
@@ -94,14 +95,19 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
         return *tcep;
  }
  
-static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
+static void tce_free_pSeriesLP(struct iommu_table*, long, long);
+static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
+
+static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
                                 long npages, unsigned long uaddr,
                                 enum dma_data_direction direction,
                                 struct dma_attrs *attrs)
  {
-       u64 rc;
+       u64 rc = 0;
         u64 proto_tce, tce;
         u64 rpn;
+       int ret = 0;
+       long tcenum_start = tcenum, npages_start = npages;
  
         rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
         proto_tce = TCE_PCI_READ;
@@ -112,6 +118,13 @@ static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
                 tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
                 rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
  
+               if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
+                       ret = (int)rc;
+                       tce_free_pSeriesLP(tbl, tcenum_start,
+                                          (npages_start - (npages + 1)));
+                       break;
+               }
+
                 if (rc && printk_ratelimit()) {
                         printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
                         printk("\tindex   = 0x%lx\n", (u64)tbl->it_index);
@@ -123,25 +136,27 @@ static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
                 tcenum++;
                 rpn++;
         }
+       return ret;
  }
  
  static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
  
-static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
                                      long npages, unsigned long uaddr,
                                      enum dma_data_direction direction,
                                      struct dma_attrs *attrs)
  {
-       u64 rc;
+       u64 rc = 0;
         u64 proto_tce;
         u64 *tcep;
         u64 rpn;
         long l, limit;
+       long tcenum_start = tcenum, npages_start = npages;
+       int ret = 0;
  
         if (npages == 1) {
-               tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
-                                   direction, attrs);
-               return;
+               return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
+                                          direction, attrs);
         }
  
         tcep = __get_cpu_var(tce_page);
@@ -153,9 +168,8 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
                 tcep = (u64 *)__get_free_page(GFP_ATOMIC);
                 /* If allocation fails, fall back to the loop implementation */
                 if (!tcep) {
-                       tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
+                       return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
                                             direction, attrs);
-                       return;
                 }
                 __get_cpu_var(tce_page) = tcep;
         }
@@ -187,6 +201,13 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
                 tcenum += limit;
         } while (npages > 0 && !rc);
  
+       if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
+               ret = (int)rc;
+               tce_freemulti_pSeriesLP(tbl, tcenum_start,
+                                       (npages_start - (npages + limit)));
+               return ret;
+       }
+
         if (rc && printk_ratelimit()) {
                 printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
                 printk("\tindex   = 0x%lx\n", (u64)tbl->it_index);
@@ -194,6 +215,7 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
                 printk("\ttce[0] val = 0x%lx\n", tcep[0]);
                 show_stack(current, (unsigned long *)__get_SP());
         }
+       return ret;
  }
  
  static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h

index d8680b589dc96c325afb09725c47f5c6a008a3e6..a437267c6bf86efb7443a44bfbf9ed693614371a 100644 (file)
--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
+++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h
@@ -42,6 +42,16 @@ static inline long register_slb_shadow(unsigned long cpu, unsigned long vpa)
         return vpa_call(0x3, cpu, vpa);
  }
  
+static inline long plpar_page_set_loaned(unsigned long vpa)
+{
+       return plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa, 0);
+}
+
+static inline long plpar_page_set_active(unsigned long vpa)
+{
+       return plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa, 0);
+}
+
  extern void vpa_init(int cpu);
  
  static inline long plpar_pte_enter(unsigned long flags,
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c

index 90beb444e1dd670f64836fdceff20c247b79ed07..063a0d2fba30e2576771a300460220c4e21c4cec 100644 (file)
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -314,6 +314,76 @@ static int pseries_set_xdabr(unsigned long dabr)
                         H_DABRX_KERNEL | H_DABRX_USER);
  }
  
+#define CMO_CHARACTERISTICS_TOKEN 44
+#define CMO_MAXLENGTH 1026
+
+/**
+ * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
+ * handle that here. (Stolen from parse_system_parameter_string)
+ */
+void pSeries_cmo_feature_init(void)
+{
+       char *ptr, *key, *value, *end;
+       int call_status;
+       int PrPSP = -1;
+       int SecPSP = -1;
+
+       pr_debug(" -> fw_cmo_feature_init()\n");
+       spin_lock(&rtas_data_buf_lock);
+       memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
+       call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+                               NULL,
+                               CMO_CHARACTERISTICS_TOKEN,
+                               __pa(rtas_data_buf),
+                               RTAS_DATA_BUF_SIZE);
+
+       if (call_status != 0) {
+               spin_unlock(&rtas_data_buf_lock);
+               pr_debug("CMO not available\n");
+               pr_debug(" <- fw_cmo_feature_init()\n");
+               return;
+       }
+
+       end = rtas_data_buf + CMO_MAXLENGTH - 2;
+       ptr = rtas_data_buf + 2;        /* step over strlen value */
+       key = value = ptr;
+
+       while (*ptr && (ptr <= end)) {
+               /* Separate the key and value by replacing '=' with '\0' and
+                * point the value at the string after the '='
+                */
+               if (ptr[0] == '=') {
+                       ptr[0] = '\0';
+                       value = ptr + 1;
+               } else if (ptr[0] == '\0' || ptr[0] == ',') {
+                       /* Terminate the string containing the key/value pair */
+                       ptr[0] = '\0';
+
+                       if (key == value) {
+                               pr_debug("Malformed key/value pair\n");
+                               /* Never found a '=', end processing */
+                               break;
+                       }
+
+                       if (0 == strcmp(key, "PrPSP"))
+                               PrPSP = simple_strtol(value, NULL, 10);
+                       else if (0 == strcmp(key, "SecPSP"))
+                               SecPSP = simple_strtol(value, NULL, 10);
+                       value = key = ptr + 1;
+               }
+               ptr++;
+       }
+
+       if (PrPSP != -1 || SecPSP != -1) {
+               pr_info("CMO enabled\n");
+               pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", PrPSP, SecPSP);
+               powerpc_firmware_features |= FW_FEATURE_CMO;
+       } else
+               pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", PrPSP, SecPSP);
+       spin_unlock(&rtas_data_buf_lock);
+       pr_debug(" <- fw_cmo_feature_init()\n");
+}
+
  /*
   * Early initialization.  Relocation is on but do not reference unbolted pages
   */
@@ -329,6 +399,7 @@ static void __init pSeries_init_early(void)
         else if (firmware_has_feature(FW_FEATURE_XDABR))
                 ppc_md.set_dabr = pseries_set_xdabr;
  
+       pSeries_cmo_feature_init();
         iommu_init_early_pSeries();
  
         pr_debug(" <- pSeries_init_early()\n");
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c

index de8c8b542cfa157d76281163e7ca21dfe1071b36..89639ecbf38162621ca3d57654866ff08f4033d5 100644 (file)
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -147,7 +147,7 @@ static void dart_flush(struct iommu_table *tbl)
         }
  }
  
-static void dart_build(struct iommu_table *tbl, long index,
+static int dart_build(struct iommu_table *tbl, long index,
                        long npages, unsigned long uaddr,
                        enum dma_data_direction direction,
                        struct dma_attrs *attrs)
@@ -184,6 +184,7 @@ static void dart_build(struct iommu_table *tbl, long index,
         } else {
                 dart_dirty = 1;
         }
+       return 0;
  }
  
  
diff --git a/arch/powerpc/sysdev/qe_lib/Kconfig b/arch/powerpc/sysdev/qe_lib/Kconfig

index 4bb18f57901e5c3585bc5e9bd60d18704239c96c..1ce546462be5af559539fb00314c8fc89d0c03b1 100644 (file)
--- a/arch/powerpc/sysdev/qe_lib/Kconfig
+++ b/arch/powerpc/sysdev/qe_lib/Kconfig
@@ -29,7 +29,7 @@ config QE_GPIO
         bool "QE GPIO support"
         depends on QUICC_ENGINE
         select GENERIC_GPIO
-       select HAVE_GPIO_LIB
+       select ARCH_REQUIRE_GPIOLIB
         help
           Say Y here if you're going to use hardware that connects to the
           QE GPIOs.
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig

index eb530b4128ba9353a9fa6d397102b0e52de77cd7..2ed88122be9319ea735da3b7b7a6cfd8e6700f67 100644 (file)
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -565,6 +565,7 @@ bool "s390 guest support (EXPERIMENTAL)"
         depends on 64BIT && EXPERIMENTAL
         select VIRTIO
         select VIRTIO_RING
+       select VIRTIO_CONSOLE
         help
           Select this option if you want to run the kernel under s390 linux
  endmenu
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c

index 288ad490a6dd7ce931c0622c353d078eaf7a9799..4f82e5b5f879f53d5f76effb049ed8020e019b61 100644 (file)
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -270,7 +270,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
         __ctl_store(kcb->kprobe_saved_ctl, 9, 11);
  }
  
-/* Called with kretprobe_lock held */
  void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
                                         struct pt_regs *regs)
  {
@@ -377,8 +376,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
         unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
  
         INIT_HLIST_HEAD(&empty_rp);
-       spin_lock_irqsave(&kretprobe_lock, flags);
-       head = kretprobe_inst_table_head(current);
+       kretprobe_hash_lock(current, &head, &flags);
  
         /*
          * It is possible to have multiple instances associated with a given
@@ -417,7 +415,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
         regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
  
         reset_current_kprobe();
-       spin_unlock_irqrestore(&kretprobe_lock, flags);
+       kretprobe_hash_unlock(current, &flags);
         preempt_enable_no_resched();
  
         hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c

index b358e18273b07f2e1e31315e77835fd84f880bfc..62122bad1e3316f2112ae66c22e51859c7214b52 100644 (file)
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -54,6 +54,7 @@
  #include <asm/sections.h>
  #include <asm/ebcdic.h>
  #include <asm/compat.h>
+#include <asm/kvm_virtio.h>
  
  long psw_kernel_bits   = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY |
                            PSW_MASK_MCHECK | PSW_DEFAULT_KEY);
@@ -766,7 +767,8 @@ setup_arch(char **cmdline_p)
                 printk("We are running under VM (64 bit mode)\n");
         else if (MACHINE_IS_KVM) {
                 printk("We are running under KVM (64 bit mode)\n");
-               add_preferred_console("ttyS", 1, NULL);
+               add_preferred_console("hvc", 0, NULL);
+               s390_virtio_console_init();
         } else
                 printk("We are running native (64 bit mode)\n");
  #endif /* CONFIG_64BIT */
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c

index 212d618b00952a17152e9985b8a3d27a3ac657cd..632b13e100538704758cea2eebd75c6cfdc15bf7 100644 (file)
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -9,7 +9,6 @@
  #include <linux/device.h>
  #include <linux/bootmem.h>
  #include <linux/sched.h>
-#include <linux/kthread.h>
  #include <linux/workqueue.h>
  #include <linux/cpu.h>
  #include <linux/smp.h>
@@ -230,20 +229,9 @@ void arch_update_cpu_topology(void)
         }
  }
  
-static int topology_kthread(void *data)
-{
-       arch_reinit_sched_domains();
-       return 0;
-}
-
  static void topology_work_fn(struct work_struct *work)
  {
-       /* We can't call arch_reinit_sched_domains() from a multi-threaded
-        * workqueue context since it may deadlock in case of cpu hotplug.
-        * So we have to create a kernel thread in order to call
-        * arch_reinit_sched_domains().
-        */
-       kthread_run(topology_kthread, NULL, "topology_update");
+       arch_reinit_sched_domains();
  }
  
  void topology_schedule_update(void)
diff --git a/arch/sh/boot/compressed/misc_32.c b/arch/sh/boot/compressed/misc_32.c

index adcea31e663eade000c375f47badafa14189caf8..f386997e4d9c3b0c347b209b54852d3d4194e2bb 100644 (file)
--- a/arch/sh/boot/compressed/misc_32.c
+++ b/arch/sh/boot/compressed/misc_32.c
@@ -74,8 +74,6 @@ static unsigned outcnt = 0;  /* bytes in output buffer */
  static int  fill_inbuf(void);
  static void flush_window(void);
  static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
  
  extern char input_data[];
  extern int input_len;
@@ -84,11 +82,7 @@ static long bytes_out = 0;
  static uch *output_data;
  static unsigned long output_ptr = 0;
  
-static void *malloc(int size);
-static void free(void *where);
  static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
  
  int puts(const char *);
  
@@ -101,38 +95,6 @@ static unsigned long free_mem_end_ptr;
  
  #include "../../../../lib/inflate.c"
  
-static void *malloc(int size)
-{
-       void *p;
-
-       if (size <0) error("Malloc error");
-       if (free_mem_ptr == 0) error("Memory error");
-
-       free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
-       p = (void *)free_mem_ptr;
-       free_mem_ptr += size;
-
-       if (free_mem_ptr >= free_mem_end_ptr)
-               error("Out of memory");
-
-       return p;
-}
-
-static void free(void *where)
-{      /* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
-       *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
-       free_mem_ptr = (long) *ptr;
-}
-
  #ifdef CONFIG_SH_STANDARD_BIOS
  size_t strlen(const char *s)
  {
diff --git a/arch/sh/boot/compressed/misc_64.c b/arch/sh/boot/compressed/misc_64.c

index a006ef89b9dd26bbf655f5e35a10cf73495cf331..2941657e18aafe3aad77352c574100771864728d 100644 (file)
--- a/arch/sh/boot/compressed/misc_64.c
+++ b/arch/sh/boot/compressed/misc_64.c
@@ -72,8 +72,6 @@ static unsigned outcnt = 0;   /* bytes in output buffer */
  static int fill_inbuf(void);
  static void flush_window(void);
  static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
  
  extern char input_data[];
  extern int input_len;
@@ -82,11 +80,7 @@ static long bytes_out = 0;
  static uch *output_data;
  static unsigned long output_ptr = 0;
  
-static void *malloc(int size);
-static void free(void *where);
  static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
  
  static void puts(const char *);
  
@@ -99,40 +93,6 @@ static unsigned long free_mem_end_ptr;
  
  #include "../../../../lib/inflate.c"
  
-static void *malloc(int size)
-{
-       void *p;
-
-       if (size < 0)
-               error("Malloc error\n");
-       if (free_mem_ptr == 0)
-               error("Memory error\n");
-
-       free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
-       p = (void *) free_mem_ptr;
-       free_mem_ptr += size;
-
-       if (free_mem_ptr >= free_mem_end_ptr)
-               error("\nOut of memory\n");
-
-       return p;
-}
-
-static void free(void *where)
-{                              /* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
-       *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
-       free_mem_ptr = (long) *ptr;
-}
-
  void puts(const char *s)
  {
  }
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig

index 789724e61e8345e90ca7b155e1c23b543781de3c..375de7c6d082b09547fd285bc647441e02fec89d 100644 (file)
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -298,20 +298,6 @@ config UNIX98_PTYS
           Read the instructions in <file:Documentation/Changes> pertaining to
           pseudo terminals. It's safe to say N.
  
-config UNIX98_PTY_COUNT
-       int "Maximum number of Unix98 PTYs in use (0-2048)"
-       depends on UNIX98_PTYS
-       default "256"
-       help
-         The maximum number of Unix98 PTYs that can be used at any one time.
-         The default is 256, and should be enough for desktop systems. Server
-         machines which support incoming telnet/rlogin/ssh connections and/or
-         serve several X terminals may want to increase this: every incoming
-         connection and every xterm uses up one PTY.
-
-         When not in use, each additional set of 256 PTYs occupy
-         approximately 8 KB of kernel memory on 32-bit architectures.
-
  endmenu
  
  source "fs/Kconfig"
diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c

index f43b5d7553545ab8dcf7ec3900f9ba9100602591..201a6e547e4aa0d256d062d251525c30ca49ea8f 100644 (file)
--- a/arch/sparc64/kernel/kprobes.c
+++ b/arch/sparc64/kernel/kprobes.c
@@ -478,9 +478,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
         return 0;
  }
  
-/* Called with kretprobe_lock held.  The value stored in the return
- * address register is actually 2 instructions before where the
- * callee will return to.  Sequences usually look something like this
+/* The value stored in the return address register is actually 2
+ * instructions before where the callee will return to.
+ * Sequences usually look something like this
   *
   *             call    some_function   <--- return register points here
   *              nop                    <--- call delay slot
@@ -512,8 +512,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
         unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
  
         INIT_HLIST_HEAD(&empty_rp);
-       spin_lock_irqsave(&kretprobe_lock, flags);
-       head = kretprobe_inst_table_head(current);
+       kretprobe_hash_lock(current, &head, &flags);
  
         /*
          * It is possible to have multiple instances associated with a given
@@ -553,7 +552,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
         regs->tnpc = orig_ret_address + 4;
  
         reset_current_kprobe();
-       spin_unlock_irqrestore(&kretprobe_lock, flags);
+       kretprobe_hash_unlock(current, &flags);
         preempt_enable_no_resched();
  
         hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig

index b2ddfcf0172848f645370e1c390fdb29347961ae..e3cba0b45600fc69f7ecf11844bece0bad8b9acc 100644 (file)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -23,11 +23,13 @@ config X86
         select HAVE_OPROFILE
         select HAVE_IOREMAP_PROT
         select HAVE_KPROBES
+       select ARCH_WANT_OPTIONAL_GPIOLIB if !X86_RDC321X
         select HAVE_KRETPROBES
         select HAVE_DYNAMIC_FTRACE
         select HAVE_FTRACE
         select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
         select HAVE_ARCH_KGDB if !X86_VOYAGER
+       select HAVE_EFFICIENT_UNALIGNED_ACCESS
  
  config ARCH_DEFCONFIG
         string
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c

index bc5553b496f719c665ed1e7876eae3e0f2d69b1a..9fea737064798bfd10ab78cf729b14cfd8b87681 100644 (file)
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -182,8 +182,6 @@ static unsigned             outcnt;
  static int  fill_inbuf(void);
  static void flush_window(void);
  static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
  
  /*
   * This is set up by the setup-routine at boot-time
@@ -196,9 +194,6 @@ extern int input_len;
  
  static long bytes_out;
  
-static void *malloc(int size);
-static void free(void *where);
-
  static void *memset(void *s, int c, unsigned n);
  static void *memcpy(void *dest, const void *src, unsigned n);
  
@@ -220,40 +215,6 @@ static int lines, cols;
  
  #include "../../../../lib/inflate.c"
  
-static void *malloc(int size)
-{
-       void *p;
-
-       if (size < 0)
-               error("Malloc error");
-       if (free_mem_ptr <= 0)
-               error("Memory error");
-
-       free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
-       p = (void *)free_mem_ptr;
-       free_mem_ptr += size;
-
-       if (free_mem_ptr >= free_mem_end_ptr)
-               error("Out of memory");
-
-       return p;
-}
-
-static void free(void *where)
-{      /* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
-       *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
-       free_mem_ptr = (memptr) *ptr;
-}
-
  static void scroll(void)
  {
         int i;
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c

index 43c019f85f0db749425e451957bd2279cee47b80..6c27679ec6aa1753be87e05b6c72a10553f69574 100644 (file)
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -431,7 +431,6 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
                 regs->ip = (unsigned long)p->ainsn.insn;
  }
  
-/* Called with kretprobe_lock held */
  void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
                                       struct pt_regs *regs)
  {
@@ -682,8 +681,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
         unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
  
         INIT_HLIST_HEAD(&empty_rp);
-       spin_lock_irqsave(&kretprobe_lock, flags);
-       head = kretprobe_inst_table_head(current);
+       kretprobe_hash_lock(current, &head, &flags);
         /* fixup registers */
  #ifdef CONFIG_X86_64
         regs->cs = __KERNEL_CS;
@@ -732,7 +730,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
  
         kretprobe_assert(ri, orig_ret_address, trampoline_address);
  
-       spin_unlock_irqrestore(&kretprobe_lock, flags);
+       kretprobe_hash_unlock(current, &flags);
  
         hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
                 hlist_del(&ri->hlist);
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c

index 151f2d171f7c7386b7a14f574d4c7f34edfab0f3..19e7fc7c2c4ff4c9be369fec3b3fdfae3b3d2467 100644 (file)
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -29,6 +29,7 @@
  #include <linux/mm.h>
  #include <linux/spinlock.h>
  #include <linux/string.h>
+#include <linux/crash_dump.h>
  #include <linux/dma-mapping.h>
  #include <linux/bitops.h>
  #include <linux/pci_ids.h>
@@ -167,6 +168,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl);
  static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev);
  static void calioc2_tce_cache_blast(struct iommu_table *tbl);
  static void calioc2_dump_error_regs(struct iommu_table *tbl);
+static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl);
+static void get_tce_space_from_tar(void);
  
  static struct cal_chipset_ops calgary_chip_ops = {
         .handle_quirks = calgary_handle_quirks,
@@ -830,7 +833,11 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
  
         tbl = pci_iommu(dev->bus);
         tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
-       tce_free(tbl, 0, tbl->it_size);
+
+       if (is_kdump_kernel())
+               calgary_init_bitmap_from_tce_table(tbl);
+       else
+               tce_free(tbl, 0, tbl->it_size);
  
         if (is_calgary(dev->device))
                 tbl->chip_ops = &calgary_chip_ops;
@@ -1209,6 +1216,10 @@ static int __init calgary_init(void)
         if (ret)
                 return ret;
  
+       /* Purely for kdump kernel case */
+       if (is_kdump_kernel())
+               get_tce_space_from_tar();
+
         do {
                 dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev);
                 if (!dev)
@@ -1339,6 +1350,61 @@ static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev)
         return (val != 0xffffffff);
  }
  
+/*
+ * calgary_init_bitmap_from_tce_table():
+ * Funtion for kdump case. In the second/kdump kernel initialize
+ * the bitmap based on the tce table entries obtained from first kernel
+ */
+static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
+{
+       u64 *tp;
+       unsigned int index;
+       tp = ((u64 *)tbl->it_base);
+       for (index = 0 ; index < tbl->it_size; index++) {
+               if (*tp != 0x0)
+                       set_bit(index, tbl->it_map);
+               tp++;
+       }
+}
+
+/*
+ * get_tce_space_from_tar():
+ * Function for kdump case. Get the tce tables from first kernel
+ * by reading the contents of the base adress register of calgary iommu
+ */
+static void get_tce_space_from_tar()
+{
+       int bus;
+       void __iomem *target;
+       unsigned long tce_space;
+
+       for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
+               struct calgary_bus_info *info = &bus_info[bus];
+               unsigned short pci_device;
+               u32 val;
+
+               val = read_pci_config(bus, 0, 0, 0);
+               pci_device = (val & 0xFFFF0000) >> 16;
+
+               if (!is_cal_pci_dev(pci_device))
+                       continue;
+               if (info->translation_disabled)
+                       continue;
+
+               if (calgary_bus_has_devices(bus, pci_device) ||
+                                               translate_empty_slots) {
+                       target = calgary_reg(bus_info[bus].bbar,
+                                               tar_offset(bus));
+                       tce_space = be64_to_cpu(readq(target));
+                       tce_space = tce_space & TAR_SW_BITS;
+
+                       tce_space = tce_space & (~specified_table_size);
+                       info->tce_space = (u64 *)__va(tce_space);
+               }
+       }
+       return;
+}
+
  void __init detect_calgary(void)
  {
         int bus;
@@ -1394,7 +1460,8 @@ void __init detect_calgary(void)
                 return;
         }
  
-       specified_table_size = determine_tce_table_size(max_pfn * PAGE_SIZE);
+       specified_table_size = determine_tce_table_size((is_kdump_kernel() ?
+                                       saved_max_pfn : max_pfn) * PAGE_SIZE);
  
         for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
                 struct calgary_bus_info *info = &bus_info[bus];
@@ -1412,10 +1479,16 @@ void __init detect_calgary(void)
  
                 if (calgary_bus_has_devices(bus, pci_device) ||
                     translate_empty_slots) {
-                       tbl = alloc_tce_table();
-                       if (!tbl)
-                               goto cleanup;
-                       info->tce_space = tbl;
+                       /*
+                        * If it is kdump kernel, find and use tce tables
+                        * from first kernel, else allocate tce tables here
+                        */
+                       if (!is_kdump_kernel()) {
+                               tbl = alloc_tce_table();
+                               if (!tbl)
+                                       goto cleanup;
+                               info->tce_space = tbl;
+                       }
                         calgary_found = 1;
                 }
         }
diff --git a/block/ioctl.c b/block/ioctl.c

index 52d6385216ad43d1c76c88adae613b7513efa6b5..77185e5c026a659e300c69b276de3c5ccbb7327b 100644 (file)
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -17,6 +17,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
         long long start, length;
         int part;
         int i;
+       int err;
  
         if (!capable(CAP_SYS_ADMIN))
                 return -EACCES;
@@ -61,9 +62,9 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
                                 }
                         }
                         /* all seems OK */
-                       add_partition(disk, part, start, length, ADDPART_FLAG_NONE);
+                       err = add_partition(disk, part, start, length, ADDPART_FLAG_NONE);
                         mutex_unlock(&bdev->bd_mutex);
-                       return 0;
+                       return err;
                 case BLKPG_DEL_PARTITION:
                         if (!disk->part[part-1])
                                 return -ENXIO;
diff --git a/drivers/Makefile b/drivers/Makefile

index 808e0ae66aa80cfeead838ea27731457177a22a7..54ec5e718c0e32296ea0c78e515930713f05c811 100644 (file)
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -5,7 +5,7 @@
  # Rewritten to use lists instead of if-statements.
  #
  
-obj-$(CONFIG_HAVE_GPIO_LIB)    += gpio/
+obj-y                          += gpio/
  obj-$(CONFIG_PCI)              += pci/
  obj-$(CONFIG_PARISC)           += parisc/
  obj-$(CONFIG_RAPIDIO)          += rapidio/
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c

index dc7596f028b69437d8e833a688e15142c97675c8..ef3e5522e1a4a41979ff5ec56b72ed860d004275 100644 (file)
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1273,7 +1273,7 @@ static ssize_t ahci_transmit_led_message(struct ata_port *ap, u32 state,
         void __iomem *mmio = ap->host->iomap[AHCI_PCI_BAR];
         u32 em_ctl;
         u32 message[] = {0, 0};
-       unsigned int flags;
+       unsigned long flags;
         int pmp;
         struct ahci_em_priv *emp;
  
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c

index b0be1d18fee23d5c25d865bba20c0a3560546195..c9c92b00fd555768730b731695bdbac1deeab1e9 100644 (file)
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -184,7 +184,7 @@ firmware_data_read(struct kobject *kobj, struct bin_attribute *bin_attr,
         struct device *dev = to_dev(kobj);
         struct firmware_priv *fw_priv = dev_get_drvdata(dev);
         struct firmware *fw;
-       ssize_t ret_count = count;
+       ssize_t ret_count;
  
         mutex_lock(&fw_lock);
         fw = fw_priv->fw;
@@ -192,14 +192,8 @@ firmware_data_read(struct kobject *kobj, struct bin_attribute *bin_attr,
                 ret_count = -ENODEV;
                 goto out;
         }
-       if (offset > fw->size) {
-               ret_count = 0;
-               goto out;
-       }
-       if (offset + ret_count > fw->size)
-               ret_count = fw->size - offset;
-
-       memcpy(buffer, fw->data + offset, ret_count);
+       ret_count = memory_read_from_buffer(buffer, count, &offset,
+                                               fw->data, fw->size);
  out:
         mutex_unlock(&fw_lock);
         return ret_count;
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c

index c04440cd6a32d5bc5031aed5ec4fe50c4d50dd67..181ebb85f0be582e95206aa144c04ee26ff016bc 100644 (file)
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -6,6 +6,7 @@
  
  #include <linux/hdreg.h>
  #include <linux/blkdev.h>
+#include <linux/completion.h>
  #include <linux/delay.h>
  #include <linux/smp_lock.h>
  #include "aoe.h"
@@ -36,7 +37,7 @@ struct ErrMsg {
  
  static struct ErrMsg emsgs[NMSG];
  static int emsgs_head_idx, emsgs_tail_idx;
-static struct semaphore emsgs_sema;
+static struct completion emsgs_comp;
  static spinlock_t emsgs_lock;
  static int nblocked_emsgs_readers;
  static struct class *aoe_class;
@@ -141,7 +142,7 @@ bail:               spin_unlock_irqrestore(&emsgs_lock, flags);
         spin_unlock_irqrestore(&emsgs_lock, flags);
  
         if (nblocked_emsgs_readers)
-               up(&emsgs_sema);
+               complete(&emsgs_comp);
  }
  
  static ssize_t
@@ -221,7 +222,7 @@ aoechr_read(struct file *filp, char __user *buf, size_t cnt, loff_t *off)
  
                 spin_unlock_irqrestore(&emsgs_lock, flags);
  
-               n = down_interruptible(&emsgs_sema);
+               n = wait_for_completion_interruptible(&emsgs_comp);
  
                 spin_lock_irqsave(&emsgs_lock, flags);
  
@@ -269,7 +270,7 @@ aoechr_init(void)
                 printk(KERN_ERR "aoe: can't register char device\n");
                 return n;
         }
-       sema_init(&emsgs_sema, 0);
+       init_completion(&emsgs_comp);
         spin_lock_init(&emsgs_lock);
         aoe_class = class_create(THIS_MODULE, "aoe");
         if (IS_ERR(aoe_class)) {
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c

index dd7ea203f940dc664a9ac74ef9fa157ac3a36b5b..42251095134fa7746e8748150089c2dd570d21d8 100644 (file)
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -196,6 +196,7 @@ static int virtblk_probe(struct virtio_device *vdev)
         int err;
         u64 cap;
         u32 v;
+       u32 blk_size;
  
         if (index_to_minor(index) >= 1 << MINORBITS)
                 return -ENOSPC;
@@ -290,6 +291,13 @@ static int virtblk_probe(struct virtio_device *vdev)
         if (!err)
                 blk_queue_max_hw_segments(vblk->disk->queue, v);
  
+       /* Host can optionally specify the block size of the device */
+       err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
+                               offsetof(struct virtio_blk_config, blk_size),
+                               &blk_size);
+       if (!err)
+               blk_queue_hardsect_size(vblk->disk->queue, blk_size);
+
         add_disk(vblk->disk);
         return 0;
  
@@ -330,7 +338,7 @@ static struct virtio_device_id id_table[] = {
  
  static unsigned int features[] = {
         VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
-       VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO,
+       VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
  };
  
  static struct virtio_driver virtio_blk = {
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig

index 67b07576f8bfd982149e37b4b760fd1f0e3f241b..d0ac944e16961775d5bb428e851ab0ea0e4d7494 100644 (file)
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -36,6 +36,14 @@ config VT
           If unsure, say Y, or else you won't be able to do much with your new
           shiny Linux system :-)
  
+config CONSOLE_TRANSLATIONS
+       depends on VT
+       default y
+       bool "Enable character translations in console" if EMBEDDED
+       ---help---
+         This enables support for font mapping and Unicode translation
+         on virtual consoles.
+
  config VT_CONSOLE
         bool "Support for console on virtual terminal" if EMBEDDED
         depends on VT
@@ -578,11 +586,14 @@ config HVC_DRIVER
           It will automatically be selected if one of the back-end console drivers
           is selected.
  
+config HVC_IRQ
+       bool
  
  config HVC_CONSOLE
         bool "pSeries Hypervisor Virtual Console support"
         depends on PPC_PSERIES
         select HVC_DRIVER
+       select HVC_IRQ
         help
           pSeries machines when partitioned support a hypervisor virtual
           console. This driver allows each pSeries partition to have a console
@@ -593,6 +604,7 @@ config HVC_ISERIES
         depends on PPC_ISERIES
         default y
         select HVC_DRIVER
+       select HVC_IRQ
         help
           iSeries machines support a hypervisor virtual console.
  
@@ -614,13 +626,18 @@ config HVC_XEN
         bool "Xen Hypervisor Console support"
         depends on XEN
         select HVC_DRIVER
+       select HVC_IRQ
         default y
         help
           Xen virtual console device driver
  
  config VIRTIO_CONSOLE
-       bool
+       tristate "Virtio console"
+       depends on VIRTIO
         select HVC_DRIVER
+       help
+         Virtio console for use with lguest and other hypervisors.
+
  
  config HVCS
         tristate "IBM Hypervisor Virtual Console Server support"
diff --git a/drivers/char/Makefile b/drivers/char/Makefile

index 4b6e736cfa02992513ccb33ef7855d54b52f2177..8a161c30e1dc1670ebd5576ccc7669fdc857aaf7 100644 (file)
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -12,8 +12,8 @@ obj-y  += mem.o random.o tty_io.o n_tty.o tty_ioctl.o tty_ldisc.o
  obj-$(CONFIG_LEGACY_PTYS)      += pty.o
  obj-$(CONFIG_UNIX98_PTYS)      += pty.o
  obj-y                          += misc.o
-obj-$(CONFIG_VT)               += vt_ioctl.o vc_screen.o consolemap.o \
-                                  consolemap_deftbl.o selection.o keyboard.o
+obj-$(CONFIG_VT)               += vt_ioctl.o vc_screen.o selection.o keyboard.o
+obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o
  obj-$(CONFIG_HW_CONSOLE)       += vt.o defkeymap.o
  obj-$(CONFIG_AUDIT)            += tty_audit.o
  obj-$(CONFIG_MAGIC_SYSRQ)      += sysrq.o
@@ -48,6 +48,7 @@ obj-$(CONFIG_HVC_ISERIES)     += hvc_iseries.o
  obj-$(CONFIG_HVC_RTAS)         += hvc_rtas.o
  obj-$(CONFIG_HVC_BEAT)         += hvc_beat.o
  obj-$(CONFIG_HVC_DRIVER)       += hvc_console.o
+obj-$(CONFIG_HVC_IRQ)          += hvc_irq.o
  obj-$(CONFIG_HVC_XEN)          += hvc_xen.o
  obj-$(CONFIG_VIRTIO_CONSOLE)   += virtio_console.o
  obj-$(CONFIG_RAW_DRIVER)       += raw.o
@@ -63,7 +64,6 @@ obj-$(CONFIG_BRIQ_PANEL)      += briq_panel.o
  obj-$(CONFIG_BFIN_OTP)         += bfin-otp.o
  
  obj-$(CONFIG_PRINTER)          += lp.o
-obj-$(CONFIG_TIPAR)            += tipar.o
  
  obj-$(CONFIG_APM_EMULATION)    += apm-emulation.o
  
diff --git a/drivers/char/ds1302.c b/drivers/char/ds1302.c

index fada6ddefbae0bf45138f9bfc5167dab29265945..c5e67a623951bbc89f05cb7f9b5ade1530866fc1 100644 (file)
--- a/drivers/char/ds1302.c
+++ b/drivers/char/ds1302.c
@@ -20,10 +20,11 @@
  #include <linux/miscdevice.h>
  #include <linux/delay.h>
  #include <linux/bcd.h>
+#include <linux/smp_lock.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
  
-#include <asm/uaccess.h>
  #include <asm/system.h>
-#include <asm/io.h>
  #include <asm/rtc.h>
  #if defined(CONFIG_M32R)
  #include <asm/m32r.h>
@@ -153,9 +154,7 @@ static unsigned char days_in_mo[] =
  
  /* ioctl that supports RTC_RD_TIME and RTC_SET_TIME (read and set time/date). */
  
-static int
-rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
-         unsigned long arg)
+static long rtc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
  {
         unsigned long flags;
  
@@ -165,7 +164,9 @@ rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
                         struct rtc_time rtc_tm;
  
                         memset(&rtc_tm, 0, sizeof (struct rtc_time));
+                       lock_kernel();
                         get_rtc_time(&rtc_tm);
+                       unlock_kernel();
                         if (copy_to_user((struct rtc_time*)arg, &rtc_tm, sizeof(struct rtc_time)))
                                 return -EFAULT;
                         return 0;
@@ -217,6 +218,7 @@ rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
                         BIN_TO_BCD(mon);
                         BIN_TO_BCD(yrs);
  
+                       lock_kernel();
                         local_irq_save(flags);
                         CMOS_WRITE(yrs, RTC_YEAR);
                         CMOS_WRITE(mon, RTC_MONTH);
@@ -225,6 +227,7 @@ rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
                         CMOS_WRITE(min, RTC_MINUTES);
                         CMOS_WRITE(sec, RTC_SECONDS);
                         local_irq_restore(flags);
+                       unlock_kernel();
  
                         /* Notice that at this point, the RTC is updated but
                          * the kernel is still running with the old time.
@@ -244,8 +247,10 @@ rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
                         if(copy_from_user(&tcs_val, (int*)arg, sizeof(int)))
                                 return -EFAULT;
  
+                       lock_kernel();
                         tcs_val = RTC_TCR_PATTERN | (tcs_val & 0x0F);
                         ds1302_writereg(RTC_TRICKLECHARGER, tcs_val);
+                       unlock_kernel();
                         return 0;
                 }
                 default:
@@ -282,7 +287,7 @@ get_rtc_status(char *buf)
  
  static const struct file_operations rtc_fops = {
         .owner          = THIS_MODULE,
-       .ioctl          = rtc_ioctl,
+       .unlocked_ioctl = rtc_ioctl,
  };
  
  /* Probe for the chip by writing something to its RAM and try reading it back. */
diff --git a/drivers/char/dsp56k.c b/drivers/char/dsp56k.c

index 33c466a4888f092c15475601b503a473808a20fc..19b88504e960987fcd63880b3a8823ee2a2da366 100644 (file)
--- a/drivers/char/dsp56k.c
+++ b/drivers/char/dsp56k.c
@@ -36,10 +36,10 @@
  #include <linux/smp_lock.h>
  #include <linux/firmware.h>
  #include <linux/platform_device.h>
+#include <linux/uaccess.h>     /* For put_user and get_user */
  
  #include <asm/atarihw.h>
  #include <asm/traps.h>
-#include <asm/uaccess.h>       /* For put_user and get_user */
  
  #include <asm/dsp56k.h>
  
@@ -303,8 +303,8 @@ static ssize_t dsp56k_write(struct file *file, const char __user *buf, size_t co
         }
  }
  
-static int dsp56k_ioctl(struct inode *inode, struct file *file,
-                       unsigned int cmd, unsigned long arg)
+static long dsp56k_ioctl(struct file *file, unsigned int cmd,
+                                                       unsigned long arg)
  {
         int dev = iminor(inode) & 0x0f;
         void __user *argp = (void __user *)arg;
@@ -331,8 +331,9 @@ static int dsp56k_ioctl(struct inode *inode, struct file *file,
                         if (len > DSP56K_MAX_BINARY_LENGTH) {
                                 return -EINVAL;
                         }
-    
+                       lock_kernel();
                         r = dsp56k_upload(bin, len);
+                       unlock_kernel();
                         if (r < 0) {
                                 return r;
                         }
@@ -342,12 +343,16 @@ static int dsp56k_ioctl(struct inode *inode, struct file *file,
                 case DSP56K_SET_TX_WSIZE:
                         if (arg > 4 || arg < 1)
                                 return -EINVAL;
+                       lock_kernel();
                         dsp56k.tx_wsize = (int) arg;
+                       unlock_kernel();
                         break;
                 case DSP56K_SET_RX_WSIZE:
                         if (arg > 4 || arg < 1)
                                 return -EINVAL;
+                       lock_kernel();
                         dsp56k.rx_wsize = (int) arg;
+                       unlock_kernel();
                         break;
                 case DSP56K_HOST_FLAGS:
                 {
@@ -359,6 +364,7 @@ static int dsp56k_ioctl(struct inode *inode, struct file *file,
                         if(get_user(out, &hf->out) < 0)
                                 return -EFAULT;
  
+                       lock_kernel();
                         if ((dir & 0x1) && (out & 0x1))
                                 dsp56k_host_interface.icr |= DSP56K_ICR_HF0;
                         else if (dir & 0x1)
@@ -373,14 +379,16 @@ static int dsp56k_ioctl(struct inode *inode, struct file *file,
                         if (dsp56k_host_interface.icr & DSP56K_ICR_HF1) status |= 0x2;
                         if (dsp56k_host_interface.isr & DSP56K_ISR_HF2) status |= 0x4;
                         if (dsp56k_host_interface.isr & DSP56K_ISR_HF3) status |= 0x8;
-
+                       unlock_kernel();
                         return put_user(status, &hf->status);
                 }
                 case DSP56K_HOST_CMD:
                         if (arg > 31 || arg < 0)
                                 return -EINVAL;
+                       lock_kernel();
                         dsp56k_host_interface.cvr = (u_char)((arg & DSP56K_CVR_HV_MASK) |
                                                              DSP56K_CVR_HC);
+                       unlock_kernel();
                         break;
                 default:
                         return -EINVAL;
@@ -472,7 +480,7 @@ static const struct file_operations dsp56k_fops = {
         .owner          = THIS_MODULE,
         .read           = dsp56k_read,
         .write          = dsp56k_write,
-       .ioctl          = dsp56k_ioctl,
+       .unlocked_ioctl = dsp56k_ioctl,
         .open           = dsp56k_open,
         .release        = dsp56k_release,
  };
diff --git a/drivers/char/efirtc.c b/drivers/char/efirtc.c

index d57ca3e4e5343311f43acbf8b35021ed16ee3fe4..67fbd7aab5dbfa9b73396808cd6feaa1a7369791 100644 (file)
--- a/drivers/char/efirtc.c
+++ b/drivers/char/efirtc.c
@@ -37,8 +37,9 @@
  #include <linux/rtc.h>
  #include <linux/proc_fs.h>
  #include <linux/efi.h>
+#include <linux/smp_lock.h>
+#include <linux/uaccess.h>
  
-#include <asm/uaccess.h>
  #include <asm/system.h>
  
  #define EFI_RTC_VERSION                "0.4"
@@ -51,8 +52,8 @@
  
  static DEFINE_SPINLOCK(efi_rtc_lock);
  
-static int efi_rtc_ioctl(struct inode *inode, struct file *file,
-                    unsigned int cmd, unsigned long arg);
+static long efi_rtc_ioctl(struct file *file, unsigned int cmd,
+                                                       unsigned long arg);
  
  #define is_leap(year) \
            ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
@@ -146,9 +147,8 @@ convert_from_efi_time(efi_time_t *eft, struct rtc_time *wtime)
         }
  }
  
-static int
-efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
-                    unsigned long arg)
+static long efi_rtc_ioctl(struct file *file, unsigned int cmd,
+                                                       unsigned long arg)
  {
  
         efi_status_t    status;
@@ -175,13 +175,13 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
                         return -EINVAL;
  
                 case RTC_RD_TIME:
-
+                       lock_kernel();
                         spin_lock_irqsave(&efi_rtc_lock, flags);
  
                         status = efi.get_time(&eft, &cap);
  
                         spin_unlock_irqrestore(&efi_rtc_lock,flags);
-
+                       unlock_kernel();
                         if (status != EFI_SUCCESS) {
                                 /* should never happen */
                                 printk(KERN_ERR "efitime: can't read time\n");
@@ -203,11 +203,13 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
  
                         convert_to_efi_time(&wtime, &eft);
  
+                       lock_kernel();
                         spin_lock_irqsave(&efi_rtc_lock, flags);
  
                         status = efi.set_time(&eft);
  
                         spin_unlock_irqrestore(&efi_rtc_lock,flags);
+                       unlock_kernel();
  
                         return status == EFI_SUCCESS ? 0 : -EINVAL;
  
@@ -223,6 +225,7 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
  
                         convert_to_efi_time(&wtime, &eft);
  
+                       lock_kernel();
                         spin_lock_irqsave(&efi_rtc_lock, flags);
                         /*
                          * XXX Fixme:
@@ -233,16 +236,19 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
                         status = efi.set_wakeup_time((efi_bool_t)enabled, &eft);
  
                         spin_unlock_irqrestore(&efi_rtc_lock,flags);
+                       unlock_kernel();
  
                         return status == EFI_SUCCESS ? 0 : -EINVAL;
  
                 case RTC_WKALM_RD:
  
+                       lock_kernel();
                         spin_lock_irqsave(&efi_rtc_lock, flags);
  
                         status = efi.get_wakeup_time((efi_bool_t *)&enabled, (efi_bool_t *)&pending, &eft);
  
                         spin_unlock_irqrestore(&efi_rtc_lock,flags);
+                       unlock_kernel();
  
                         if (status != EFI_SUCCESS) return -EINVAL;
  
@@ -256,7 +262,7 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
                         return copy_to_user(&ewp->time, &wtime,
                                             sizeof(struct rtc_time)) ? -EFAULT : 0;
         }
-       return -EINVAL;
+       return -ENOTTY;
  }
  
  /*
@@ -265,8 +271,7 @@ efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
   *     up things on a close.
   */
  
-static int
-efi_rtc_open(struct inode *inode, struct file *file)
+static int efi_rtc_open(struct inode *inode, struct file *file)
  {
         /*
          * nothing special to do here
@@ -277,8 +282,7 @@ efi_rtc_open(struct inode *inode, struct file *file)
         return 0;
  }
  
-static int
-efi_rtc_close(struct inode *inode, struct file *file)
+static int efi_rtc_close(struct inode *inode, struct file *file)
  {
         return 0;
  }
@@ -289,13 +293,12 @@ efi_rtc_close(struct inode *inode, struct file *file)
  
  static const struct file_operations efi_rtc_fops = {
         .owner          = THIS_MODULE,
-       .ioctl          = efi_rtc_ioctl,
+       .unlocked_ioctl = efi_rtc_ioctl,
         .open           = efi_rtc_open,
         .release        = efi_rtc_close,
  };
  
-static struct miscdevice efi_rtc_dev=
-{
+static struct miscdevice efi_rtc_dev= {
         EFI_RTC_MINOR,
         "efirtc",
         &efi_rtc_fops
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c

index fb0a85a1eb36a71abcb9455048fb502c0b0d52ed..b3f5dbc6d8807c7a51cf352118bf5ea08e35d635 100644 (file)
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -623,6 +623,7 @@ static inline int hpet_tpcheck(struct hpet_task *tp)
         return -ENXIO;
  }
  
+#if 0
  int hpet_unregister(struct hpet_task *tp)
  {
         struct hpet_dev *devp;
@@ -652,6 +653,7 @@ int hpet_unregister(struct hpet_task *tp)
  
         return 0;
  }
+#endif  /*  0  */
  
  static ctl_table hpet_table[] = {
         {
diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c

index 2f9759d625cc520b02c78d4605bbb594c4765f7e..02aac104842d91033077da4488926fccfc7daa3d 100644 (file)
--- a/drivers/char/hvc_console.c
+++ b/drivers/char/hvc_console.c
@@ -27,7 +27,6 @@
  #include <linux/init.h>
  #include <linux/kbd_kern.h>
  #include <linux/kernel.h>
-#include <linux/kref.h>
  #include <linux/kthread.h>
  #include <linux/list.h>
  #include <linux/module.h>
@@ -75,23 +74,6 @@ static int hvc_init(void);
  static int sysrq_pressed;
  #endif
  
-struct hvc_struct {
-       spinlock_t lock;
-       int index;
-       struct tty_struct *tty;
-       unsigned int count;
-       int do_wakeup;
-       char *outbuf;
-       int outbuf_size;
-       int n_outbuf;
-       uint32_t vtermno;
-       struct hv_ops *ops;
-       int irq_requested;
-       int irq;
-       struct list_head next;
-       struct kref kref; /* ref count & hvc_struct lifetime */
-};
-
  /* dynamic list of hvc_struct instances */
  static LIST_HEAD(hvc_structs);
  
@@ -298,27 +280,15 @@ int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops)
  
         return 0;
  }
+EXPORT_SYMBOL_GPL(hvc_instantiate);
  
  /* Wake the sleeping khvcd */
-static void hvc_kick(void)
+void hvc_kick(void)
  {
         hvc_kicked = 1;
         wake_up_process(hvc_task);
  }
-
-static int hvc_poll(struct hvc_struct *hp);
-
-/*
- * NOTE: This API isn't used if the console adapter doesn't support interrupts.
- * In this case the console is poll driven.
- */
-static irqreturn_t hvc_handle_interrupt(int irq, void *dev_instance)
-{
-       /* if hvc_poll request a repoll, then kick the hvcd thread */
-       if (hvc_poll(dev_instance))
-               hvc_kick();
-       return IRQ_HANDLED;
-}
+EXPORT_SYMBOL_GPL(hvc_kick);
  
  static void hvc_unthrottle(struct tty_struct *tty)
  {
@@ -333,7 +303,6 @@ static int hvc_open(struct tty_struct *tty, struct file * filp)
  {
         struct hvc_struct *hp;
         unsigned long flags;
-       int irq = 0;
         int rc = 0;
  
         /* Auto increments kref reference if found. */
@@ -352,18 +321,15 @@ static int hvc_open(struct tty_struct *tty, struct file * filp)
         tty->low_latency = 1; /* Makes flushes to ldisc synchronous. */
  
         hp->tty = tty;
-       /* Save for request_irq outside of spin_lock. */
-       irq = hp->irq;
-       if (irq)
-               hp->irq_requested = 1;
+
+       if (hp->ops->notifier_add)
+               rc = hp->ops->notifier_add(hp, hp->data);
  
         spin_unlock_irqrestore(&hp->lock, flags);
-       /* check error, fallback to non-irq */
-       if (irq)
-               rc = request_irq(irq, hvc_handle_interrupt, IRQF_DISABLED, "hvc_console", hp);
+
  
         /*
-        * If the request_irq() fails and we return an error.  The tty layer
+        * If the notifier fails we return an error.  The tty layer
          * will call hvc_close() after a failed open but we don't want to clean
          * up there so we'll clean up here and clear out the previously set
          * tty fields and return the kref reference.
@@ -371,7 +337,6 @@ static int hvc_open(struct tty_struct *tty, struct file * filp)
         if (rc) {
                 spin_lock_irqsave(&hp->lock, flags);
                 hp->tty = NULL;
-               hp->irq_requested = 0;
                 spin_unlock_irqrestore(&hp->lock, flags);
                 tty->driver_data = NULL;
                 kref_put(&hp->kref, destroy_hvc_struct);
@@ -386,7 +351,6 @@ static int hvc_open(struct tty_struct *tty, struct file * filp)
  static void hvc_close(struct tty_struct *tty, struct file * filp)
  {
         struct hvc_struct *hp;
-       int irq = 0;
         unsigned long flags;
  
         if (tty_hung_up_p(filp))
@@ -404,9 +368,8 @@ static void hvc_close(struct tty_struct *tty, struct file * filp)
         spin_lock_irqsave(&hp->lock, flags);
  
         if (--hp->count == 0) {
-               if (hp->irq_requested)
-                       irq = hp->irq;
-               hp->irq_requested = 0;
+               if (hp->ops->notifier_del)
+                       hp->ops->notifier_del(hp, hp->data);
  
                 /* We are done with the tty pointer now. */
                 hp->tty = NULL;
@@ -418,10 +381,6 @@ static void hvc_close(struct tty_struct *tty, struct file * filp)
                  * waking periodically to check chars_in_buffer().
                  */
                 tty_wait_until_sent(tty, HVC_CLOSE_WAIT);
-
-               if (irq)
-                       free_irq(irq, hp);
-
         } else {
                 if (hp->count < 0)
                         printk(KERN_ERR "hvc_close %X: oops, count is %d\n",
@@ -436,7 +395,6 @@ static void hvc_hangup(struct tty_struct *tty)
  {
         struct hvc_struct *hp = tty->driver_data;
         unsigned long flags;
-       int irq = 0;
         int temp_open_count;
  
         if (!hp)
@@ -458,13 +416,12 @@ static void hvc_hangup(struct tty_struct *tty)
         hp->count = 0;
         hp->n_outbuf = 0;
         hp->tty = NULL;
-       if (hp->irq_requested)
-               /* Saved for use outside of spin_lock. */
-               irq = hp->irq;
-       hp->irq_requested = 0;
+
+       if (hp->ops->notifier_del)
+                       hp->ops->notifier_del(hp, hp->data);
+
         spin_unlock_irqrestore(&hp->lock, flags);
-       if (irq)
-               free_irq(irq, hp);
+
         while(temp_open_count) {
                 --temp_open_count;
                 kref_put(&hp->kref, destroy_hvc_struct);
@@ -575,7 +532,7 @@ static u32 timeout = MIN_TIMEOUT;
  #define HVC_POLL_READ  0x00000001
  #define HVC_POLL_WRITE 0x00000002
  
-static int hvc_poll(struct hvc_struct *hp)
+int hvc_poll(struct hvc_struct *hp)
  {
         struct tty_struct *tty;
         int i, n, poll_mask = 0;
@@ -602,10 +559,10 @@ static int hvc_poll(struct hvc_struct *hp)
         if (test_bit(TTY_THROTTLED, &tty->flags))
                 goto throttled;
  
-       /* If we aren't interrupt driven and aren't throttled, we always
+       /* If we aren't notifier driven and aren't throttled, we always
          * request a reschedule
          */
-       if (hp->irq == 0)
+       if (!hp->irq_requested)
                 poll_mask |= HVC_POLL_READ;
  
         /* Read data if any */
@@ -674,6 +631,7 @@ static int hvc_poll(struct hvc_struct *hp)
  
         return poll_mask;
  }
+EXPORT_SYMBOL_GPL(hvc_poll);
  
  /*
   * This kthread is either polling or interrupt driven.  This is determined by
@@ -733,7 +691,7 @@ static const struct tty_operations hvc_ops = {
         .chars_in_buffer = hvc_chars_in_buffer,
  };
  
-struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq,
+struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int data,
                                         struct hv_ops *ops, int outbuf_size)
  {
         struct hvc_struct *hp;
@@ -754,7 +712,7 @@ struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq,
         memset(hp, 0x00, sizeof(*hp));
  
         hp->vtermno = vtermno;
-       hp->irq = irq;
+       hp->data = data;
         hp->ops = ops;
         hp->outbuf_size = outbuf_size;
         hp->outbuf = &((char *)hp)[ALIGN(sizeof(*hp), sizeof(long))];
@@ -784,6 +742,7 @@ struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq,
  
         return hp;
  }
+EXPORT_SYMBOL_GPL(hvc_alloc);
  
  int __devexit hvc_remove(struct hvc_struct *hp)
  {
diff --git a/drivers/char/hvc_console.h b/drivers/char/hvc_console.h

index 42ffb17e15df7d4f038f483cc16da4b1a42baf3f..d9ce10915625db733c5f7aecb3ffd4679c94de08 100644 (file)
--- a/drivers/char/hvc_console.h
+++ b/drivers/char/hvc_console.h
@@ -26,6 +26,7 @@
  
  #ifndef HVC_CONSOLE_H
  #define HVC_CONSOLE_H
+#include <linux/kref.h>
  
  /*
   * This is the max number of console adapters that can/will be found as
@@ -42,24 +43,50 @@
   */
  #define HVC_ALLOC_TTY_ADAPTERS 8
  
+struct hvc_struct {
+       spinlock_t lock;
+       int index;
+       struct tty_struct *tty;
+       unsigned int count;
+       int do_wakeup;
+       char *outbuf;
+       int outbuf_size;
+       int n_outbuf;
+       uint32_t vtermno;
+       struct hv_ops *ops;
+       int irq_requested;
+       int data;
+       struct list_head next;
+       struct kref kref; /* ref count & hvc_struct lifetime */
+};
  
  /* implemented by a low level driver */
  struct hv_ops {
         int (*get_chars)(uint32_t vtermno, char *buf, int count);
         int (*put_chars)(uint32_t vtermno, const char *buf, int count);
-};
  
-struct hvc_struct;
+       /* Callbacks for notification. Called in open and close */
+       int (*notifier_add)(struct hvc_struct *hp, int irq);
+       void (*notifier_del)(struct hvc_struct *hp, int irq);
+};
  
  /* Register a vterm and a slot index for use as a console (console_init) */
  extern int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops);
  
  /* register a vterm for hvc tty operation (module_init or hotplug add) */
-extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int irq,
+extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int data,
                                 struct hv_ops *ops, int outbuf_size);
-/* remove a vterm from hvc tty operation (modele_exit or hotplug remove) */
+/* remove a vterm from hvc tty operation (module_exit or hotplug remove) */
  extern int __devexit hvc_remove(struct hvc_struct *hp);
  
+/* data available */
+int hvc_poll(struct hvc_struct *hp);
+void hvc_kick(void);
+
+/* default notifier for irq based notification */
+extern int notifier_add_irq(struct hvc_struct *hp, int data);
+extern void notifier_del_irq(struct hvc_struct *hp, int data);
+
  
  #if defined(CONFIG_XMON) && defined(CONFIG_SMP)
  #include <asm/xmon.h>
diff --git a/drivers/char/hvc_irq.c b/drivers/char/hvc_irq.c

new file mode 100644 (file)

index 0000000..73a59cd
--- /dev/null
+++ b/drivers/char/hvc_irq.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright IBM Corp. 2001,2008
+ *
+ * This file contains the IRQ specific code for hvc_console
+ *
+ */
+
+#include <linux/interrupt.h>
+
+#include "hvc_console.h"
+
+static irqreturn_t hvc_handle_interrupt(int irq, void *dev_instance)
+{
+       /* if hvc_poll request a repoll, then kick the hvcd thread */
+       if (hvc_poll(dev_instance))
+               hvc_kick();
+       return IRQ_HANDLED;
+}
+
+/*
+ * For IRQ based systems these callbacks can be used
+ */
+int notifier_add_irq(struct hvc_struct *hp, int irq)
+{
+       int rc;
+
+       if (!irq) {
+               hp->irq_requested = 0;
+               return 0;
+       }
+       rc = request_irq(irq, hvc_handle_interrupt, IRQF_DISABLED,
+                          "hvc_console", hp);
+       if (!rc)
+               hp->irq_requested = 1;
+       return rc;
+}
+
+void notifier_del_irq(struct hvc_struct *hp, int irq)
+{
+       if (!irq)
+               return;
+       free_irq(irq, hp);
+       hp->irq_requested = 0;
+}
diff --git a/drivers/char/hvc_iseries.c b/drivers/char/hvc_iseries.c

index a08f8f981c11c8446176e8e588c0d917562dfcfb..b71c610fe5ae0584092b8badbc4c9c93fe52f117 100644 (file)
--- a/drivers/char/hvc_iseries.c
+++ b/drivers/char/hvc_iseries.c
@@ -200,6 +200,8 @@ done:
  static struct hv_ops hvc_get_put_ops = {
         .get_chars = get_chars,
         .put_chars = put_chars,
+       .notifier_add = notifier_add_irq,
+       .notifier_del = notifier_del_irq,
  };
  
  static int __devinit hvc_vio_probe(struct vio_dev *vdev,
diff --git a/drivers/char/hvc_vio.c b/drivers/char/hvc_vio.c

index 79711aa4b41d4ae85de49cf8bddad9a0c50ded18..93f3840c1682e53e9f2e20c2f21330ba4a9a71e5 100644 (file)
--- a/drivers/char/hvc_vio.c
+++ b/drivers/char/hvc_vio.c
@@ -80,6 +80,8 @@ static int filtered_get_chars(uint32_t vtermno, char *buf, int count)
  static struct hv_ops hvc_get_put_ops = {
         .get_chars = filtered_get_chars,
         .put_chars = hvc_put_chars,
+       .notifier_add = notifier_add_irq,
+       .notifier_del = notifier_del_irq,
  };
  
  static int __devinit hvc_vio_probe(struct vio_dev *vdev,
diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c

index db2ae4216279377e7cc092a63ff2e5c0e6db691b..6b70aa66a587883bf56f4ca3163ac7828afc3eae 100644 (file)
--- a/drivers/char/hvc_xen.c
+++ b/drivers/char/hvc_xen.c
@@ -100,6 +100,8 @@ static int read_console(uint32_t vtermno, char *buf, int len)
  static struct hv_ops hvc_ops = {
         .get_chars = read_console,
         .put_chars = write_console,
+       .notifier_add = notifier_add_irq,
+       .notifier_del = notifier_del_irq,
  };
  
  static int __init xen_init(void)
diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c

index 9cb48fcd316c775007e596dc66a6adb0494ddfd9..689f9dcd3b866f1bd6ca9e3a0f707f7b2de9cb39 100644 (file)
--- a/drivers/char/ip2/ip2main.c
+++ b/drivers/char/ip2/ip2main.c
@@ -203,7 +203,7 @@ static int set_serial_info(i2ChanStrPtr, struct serial_struct __user *);
  
  static ssize_t ip2_ipl_read(struct file *, char __user *, size_t, loff_t *);
  static ssize_t ip2_ipl_write(struct file *, const char __user *, size_t, loff_t *);
-static int ip2_ipl_ioctl(struct inode *, struct file *, UINT, ULONG);
+static long ip2_ipl_ioctl(struct file *, UINT, ULONG);
  static int ip2_ipl_open(struct inode *, struct file *);
  
  static int DumpTraceBuffer(char __user *, int);
@@ -236,7 +236,7 @@ static const struct file_operations ip2_ipl = {
         .owner          = THIS_MODULE,
         .read           = ip2_ipl_read,
         .write          = ip2_ipl_write,
-       .ioctl          = ip2_ipl_ioctl,
+       .unlocked_ioctl = ip2_ipl_ioctl,
         .open           = ip2_ipl_open,
  }; 
  
@@ -2845,10 +2845,10 @@ ip2_ipl_write(struct file *pFile, const char __user *pData, size_t count, loff_t
  /*                                                                            */
  /*                                                                            */
  /******************************************************************************/
-static int
-ip2_ipl_ioctl ( struct inode *pInode, struct file *pFile, UINT cmd, ULONG arg )
+static long
+ip2_ipl_ioctl (struct file *pFile, UINT cmd, ULONG arg )
  {
-       unsigned int iplminor = iminor(pInode);
+       unsigned int iplminor = iminor(pFile->f_path.dentry->d_inode);
         int rc = 0;
         void __user *argp = (void __user *)arg;
         ULONG __user *pIndex = argp;
@@ -2859,6 +2859,8 @@ ip2_ipl_ioctl ( struct inode *pInode, struct file *pFile, UINT cmd, ULONG arg )
         printk (KERN_DEBUG "IP2IPL: ioctl cmd %d, arg %ld\n", cmd, arg );
  #endif
  
+       lock_kernel();
+
         switch ( iplminor ) {
         case 0:     // IPL device
                 rc = -EINVAL;
@@ -2919,6 +2921,7 @@ ip2_ipl_ioctl ( struct inode *pInode, struct file *pFile, UINT cmd, ULONG arg )
                 rc = -ENODEV;
                 break;
         }
+       unlock_kernel();
         return rc;
  }
  
diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c

index 50243fcd87e882656069cce5492b92b53469d4f7..4f8d67fed292bfc6a9423a975fb08265dd78f69e 100644 (file)
--- a/drivers/char/mwave/mwavedd.c
+++ b/drivers/char/mwave/mwavedd.c
@@ -86,8 +86,8 @@ module_param(mwave_uart_io, int, 0);
  
  static int mwave_open(struct inode *inode, struct file *file);
  static int mwave_close(struct inode *inode, struct file *file);
-static int mwave_ioctl(struct inode *inode, struct file *filp,
-                       unsigned int iocmd, unsigned long ioarg);
+static long mwave_ioctl(struct file *filp, unsigned int iocmd,
+                                                       unsigned long ioarg);
  
  MWAVE_DEVICE_DATA mwave_s_mdd;
  
@@ -119,16 +119,16 @@ static int mwave_close(struct inode *inode, struct file *file)
         return retval;
  }
  
-static int mwave_ioctl(struct inode *inode, struct file *file,
-                       unsigned int iocmd, unsigned long ioarg)
+static long mwave_ioctl(struct file *file, unsigned int iocmd,
+                                                       unsigned long ioarg)
  {
         unsigned int retval = 0;
         pMWAVE_DEVICE_DATA pDrvData = &mwave_s_mdd;
         void __user *arg = (void __user *)ioarg;
  
-       PRINTK_5(TRACE_MWAVE,
-               "mwavedd::mwave_ioctl, entry inode %p file %p cmd %x arg %x\n",
-                inode,  file, iocmd, (int) ioarg);
+       PRINTK_4(TRACE_MWAVE,
+               "mwavedd::mwave_ioctl, entry file %p cmd %x arg %x\n",
+               file, iocmd, (int) ioarg);
  
         switch (iocmd) {
  
@@ -136,7 +136,9 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
                         PRINTK_1(TRACE_MWAVE,
                                 "mwavedd::mwave_ioctl, IOCTL_MW_RESET"
                                 " calling tp3780I_ResetDSP\n");
+                       lock_kernel();
                         retval = tp3780I_ResetDSP(&pDrvData->rBDData);
+                       unlock_kernel();
                         PRINTK_2(TRACE_MWAVE,
                                 "mwavedd::mwave_ioctl, IOCTL_MW_RESET"
                                 " retval %x from tp3780I_ResetDSP\n",
@@ -147,7 +149,9 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
                         PRINTK_1(TRACE_MWAVE,
                                 "mwavedd::mwave_ioctl, IOCTL_MW_RUN"
                                 " calling tp3780I_StartDSP\n");
+                       lock_kernel();
                         retval = tp3780I_StartDSP(&pDrvData->rBDData);
+                       unlock_kernel();
                         PRINTK_2(TRACE_MWAVE,
                                 "mwavedd::mwave_ioctl, IOCTL_MW_RUN"
                                 " retval %x from tp3780I_StartDSP\n",
@@ -161,8 +165,10 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
                                 "mwavedd::mwave_ioctl,"
                                 " IOCTL_MW_DSP_ABILITIES calling"
                                 " tp3780I_QueryAbilities\n");
+                       lock_kernel();
                         retval = tp3780I_QueryAbilities(&pDrvData->rBDData,
                                         &rAbilities);
+                       unlock_kernel();
                         PRINTK_2(TRACE_MWAVE,
                                 "mwavedd::mwave_ioctl, IOCTL_MW_DSP_ABILITIES"
                                 " retval %x from tp3780I_QueryAbilities\n",
@@ -193,11 +199,13 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
                                 "mwavedd::mwave_ioctl IOCTL_MW_READ_DATA,"
                                 " size %lx, ioarg %lx pusBuffer %p\n",
                                 rReadData.ulDataLength, ioarg, pusBuffer);
+                       lock_kernel();
                         retval = tp3780I_ReadWriteDspDStore(&pDrvData->rBDData,
                                         iocmd,
                                         pusBuffer,
                                         rReadData.ulDataLength,
                                         rReadData.usDspAddress);
+                       unlock_kernel();
                 }
                         break;
         
@@ -215,10 +223,12 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
                                 " size %lx, ioarg %lx pusBuffer %p\n",
                                 rReadData.ulDataLength / 2, ioarg,
                                 pusBuffer);
+                       lock_kernel();
                         retval = tp3780I_ReadWriteDspDStore(&pDrvData->rBDData,
                                 iocmd, pusBuffer,
                                 rReadData.ulDataLength / 2,
                                 rReadData.usDspAddress);
+                       unlock_kernel();
                 }
                         break;
         
@@ -236,10 +246,12 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
                                 " size %lx, ioarg %lx pusBuffer %p\n",
                                 rWriteData.ulDataLength, ioarg,
                                 pusBuffer);
+                       lock_kernel();
                         retval = tp3780I_ReadWriteDspDStore(&pDrvData->rBDData,
                                         iocmd, pusBuffer,
                                         rWriteData.ulDataLength,
                                         rWriteData.usDspAddress);
+                       unlock_kernel();
                 }
                         break;
         
@@ -257,10 +269,12 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
                                 " size %lx, ioarg %lx pusBuffer %p\n",
                                 rWriteData.ulDataLength, ioarg,
                                 pusBuffer);
+                       lock_kernel();
                         retval = tp3780I_ReadWriteDspIStore(&pDrvData->rBDData,
                                         iocmd, pusBuffer,
                                         rWriteData.ulDataLength,
                                         rWriteData.usDspAddress);
+                       unlock_kernel();
                 }
                         break;
         
@@ -281,8 +295,10 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
                                                 ipcnum);
                                 return -EINVAL;
                         }
+                       lock_kernel();
                         pDrvData->IPCs[ipcnum].bIsHere = FALSE;
                         pDrvData->IPCs[ipcnum].bIsEnabled = TRUE;
+                       unlock_kernel();
         
                         PRINTK_2(TRACE_MWAVE,
                                 "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC"
@@ -307,6 +323,7 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
                                 return -EINVAL;
                         }
         
+                       lock_kernel();
                         if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) {
                                 DECLARE_WAITQUEUE(wait, current);
  
@@ -347,6 +364,7 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
                                         " processing\n",
                                         ipcnum);
                         }
+                       unlock_kernel();
                 }
                         break;
         
@@ -365,19 +383,18 @@ static int mwave_ioctl(struct inode *inode, struct file *file,
                                                 ipcnum);
                                 return -EINVAL;
                         }
+                       lock_kernel();
                         if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) {
                                 pDrvData->IPCs[ipcnum].bIsEnabled = FALSE;
                                 if (pDrvData->IPCs[ipcnum].bIsHere == TRUE) {
                                         wake_up_interruptible(&pDrvData->IPCs[ipcnum].ipc_wait_queue);
                                 }
                         }
+                       unlock_kernel();
                 }
                         break;
         
                 default:
-                       PRINTK_ERROR(KERN_ERR_MWAVE "mwavedd::mwave_ioctl:"
-                                       " Error: Unrecognized iocmd %x\n",
-                                       iocmd);
                         return -ENOTTY;
                         break;
         } /* switch */
@@ -460,7 +477,7 @@ static const struct file_operations mwave_fops = {
         .owner          = THIS_MODULE,
         .read           = mwave_read,
         .write          = mwave_write,
-       .ioctl          = mwave_ioctl,
+       .unlocked_ioctl = mwave_ioctl,
         .open           = mwave_open,
         .release        = mwave_close
  };
diff --git a/drivers/char/mwave/mwavedd.h b/drivers/char/mwave/mwavedd.h

index 8eca61e0a19c46a91f2e3d18328ca6a208677d77..7e0d530e2e07f151e93fa0a0b39fc9514a2b9b83 100644 (file)
--- a/drivers/char/mwave/mwavedd.h
+++ b/drivers/char/mwave/mwavedd.h
@@ -147,4 +147,6 @@ typedef struct _MWAVE_DEVICE_DATA {
  
  } MWAVE_DEVICE_DATA, *pMWAVE_DEVICE_DATA;
  
+extern MWAVE_DEVICE_DATA mwave_s_mdd;
+
  #endif
diff --git a/drivers/char/mwave/tp3780i.c b/drivers/char/mwave/tp3780i.c

index f282976daaacfe8e6da245071757c9d552f0fbad..c68969708068a0f8472b611af2bddb38a3411632 100644 (file)
--- a/drivers/char/mwave/tp3780i.c
+++ b/drivers/char/mwave/tp3780i.c
@@ -57,8 +57,6 @@
  #include "3780i.h"
  #include "mwavepub.h"
  
-extern MWAVE_DEVICE_DATA mwave_s_mdd;
-
  static unsigned short s_ausThinkpadIrqToField[16] =
         { 0xFFFF, 0xFFFF, 0xFFFF, 0x0001, 0x0002, 0x0003, 0xFFFF, 0x0004,
         0xFFFF, 0xFFFF, 0x0005, 0x0006, 0xFFFF, 0xFFFF, 0xFFFF, 0x0007 };
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c

index 4c756bbba94831aafa3895853394409688c896c6..e30575e87648d4efa42b18e444732820281ff71e 100644 (file)
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -16,7 +16,6 @@
   *     Fed through a cleanup, indent and remove of non 2.6 code by Alan Cox
   *     <alan@redhat.com>. The original 1.8 code is available on www.moxa.com.
   *     - Fixed x86_64 cleanness
- *     - Fixed sleep with spinlock held in mxser_send_break
   */
  
  #include <linux/module.h>
@@ -49,18 +48,12 @@
  
  #define        MXSER_VERSION   "2.0.4"         /* 1.12 */
  #define        MXSERMAJOR       174
-#define        MXSERCUMAJOR     175
  
  #define MXSER_BOARDS           4       /* Max. boards */
  #define MXSER_PORTS_PER_BOARD  8       /* Max. ports per board */
  #define MXSER_PORTS            (MXSER_BOARDS * MXSER_PORTS_PER_BOARD)
  #define MXSER_ISR_PASS_LIMIT   100
  
-#define        MXSER_ERR_IOADDR        -1
-#define        MXSER_ERR_IRQ           -2
-#define        MXSER_ERR_IRQ_CONFLIT   -3
-#define        MXSER_ERR_VECTOR        -4
-
  /*CheckIsMoxaMust return value*/
  #define MOXA_OTHER_UART                0x00
  #define MOXA_MUST_MU150_HWID   0x01
@@ -179,14 +172,15 @@ static struct pci_device_id mxser_pcibrds[] = {
  };
  MODULE_DEVICE_TABLE(pci, mxser_pcibrds);
  
-static int ioaddr[MXSER_BOARDS] = { 0, 0, 0, 0 };
+static unsigned long ioaddr[MXSER_BOARDS];
  static int ttymajor = MXSERMAJOR;
  
  /* Variables for insmod */
  
  MODULE_AUTHOR("Casper Yang");
  MODULE_DESCRIPTION("MOXA Smartio/Industio Family Multiport Board Device Driver");
-module_param_array(ioaddr, int, NULL, 0);
+module_param_array(ioaddr, ulong, NULL, 0);
+MODULE_PARM_DESC(ioaddr, "ISA io addresses to look for a moxa board");
  module_param(ttymajor, int, 0);
  MODULE_LICENSE("GPL");
  
@@ -196,7 +190,6 @@ struct mxser_log {
         unsigned long txcnt[MXSER_PORTS];
  };
  
-
  struct mxser_mon {
         unsigned long rxcnt;
         unsigned long txcnt;
@@ -287,19 +280,9 @@ struct mxser_mstatus {
         int dcd;
  };
  
-static struct mxser_mstatus GMStatus[MXSER_PORTS];
-
-static int mxserBoardCAP[MXSER_BOARDS] = {
-       0, 0, 0, 0
-       /*  0x180, 0x280, 0x200, 0x320 */
-};
-
  static struct mxser_board mxser_boards[MXSER_BOARDS];
  static struct tty_driver *mxvar_sdriver;
  static struct mxser_log mxvar_log;
-static int mxvar_diagflag;
-static unsigned char mxser_msr[MXSER_PORTS + 1];
-static struct mxser_mon_ext mon_data_ext;
  static int mxser_set_baud_method[MXSER_PORTS + 1];
  
  static void mxser_enable_must_enchance_mode(unsigned long baseio)
@@ -543,6 +526,7 @@ static void process_txrx_fifo(struct mxser_port *info)
  
  static unsigned char mxser_get_msr(int baseaddr, int mode, int port)
  {
+       static unsigned char mxser_msr[MXSER_PORTS + 1];
         unsigned char status = 0;
  
         status = inb(baseaddr + UART_MSR);
@@ -1319,13 +1303,9 @@ static void mxser_flush_chars(struct tty_struct *tty)
         struct mxser_port *info = tty->driver_data;
         unsigned long flags;
  
-       if (info->xmit_cnt <= 0 ||
-                       tty->stopped ||
-                       !info->port.xmit_buf ||
-                       (tty->hw_stopped &&
-                        (info->type != PORT_16550A) &&
-                        (!info->board->chip_flag)
-                       ))
+       if (info->xmit_cnt <= 0 || tty->stopped || !info->port.xmit_buf ||
+                       (tty->hw_stopped && info->type != PORT_16550A &&
+                        !info->board->chip_flag))
                 return;
  
         spin_lock_irqsave(&info->slock, flags);
@@ -1343,9 +1323,7 @@ static int mxser_write_room(struct tty_struct *tty)
         int ret;
  
         ret = SERIAL_XMIT_SIZE - info->xmit_cnt - 1;
-       if (ret < 0)
-               ret = 0;
-       return ret;
+       return ret < 0 ? 0 : ret;
  }
  
  static int mxser_chars_in_buffer(struct tty_struct *tty)
@@ -1634,6 +1612,8 @@ static int mxser_ioctl_special(unsigned int cmd, void __user *argp)
  
         switch (cmd) {
         case MOXA_GET_MAJOR:
+               printk(KERN_WARNING "mxser: '%s' uses deprecated ioctl %x, fix "
+                               "your userspace\n", current->comm, cmd);
                 return put_user(ttymajor, (int __user *)argp);
  
         case MOXA_CHKPORTENABLE:
@@ -1651,62 +1631,60 @@ static int mxser_ioctl_special(unsigned int cmd, void __user *argp)
                         ret = -EFAULT;
                 unlock_kernel();
                 return ret;
-       case MOXA_GETMSTATUS:
+       case MOXA_GETMSTATUS: {
+               struct mxser_mstatus ms, __user *msu = argp;
                 lock_kernel();
                 for (i = 0; i < MXSER_BOARDS; i++)
                         for (j = 0; j < MXSER_PORTS_PER_BOARD; j++) {
                                 port = &mxser_boards[i].ports[j];
+                               memset(&ms, 0, sizeof(ms));
  
-                               GMStatus[i].ri = 0;
-                               if (!port->ioaddr) {
-                                       GMStatus[i].dcd = 0;
-                                       GMStatus[i].dsr = 0;
-                                       GMStatus[i].cts = 0;
-                                       continue;
-                               }
+                               if (!port->ioaddr)
+                                       goto copy;
  
                                 if (!port->port.tty || !port->port.tty->termios)
-                                       GMStatus[i].cflag =
-                                               port->normal_termios.c_cflag;
+                                       ms.cflag = port->normal_termios.c_cflag;
                                 else
-                                       GMStatus[i].cflag =
-                                               port->port.tty->termios->c_cflag;
+                                       ms.cflag = port->port.tty->termios->c_cflag;
  
                                 status = inb(port->ioaddr + UART_MSR);
-                               if (status & 0x80 /*UART_MSR_DCD */ )
-                                       GMStatus[i].dcd = 1;
-                               else
-                                       GMStatus[i].dcd = 0;
-
-                               if (status & 0x20 /*UART_MSR_DSR */ )
-                                       GMStatus[i].dsr = 1;
-                               else
-                                       GMStatus[i].dsr = 0;
-
-
-                               if (status & 0x10 /*UART_MSR_CTS */ )
-                                       GMStatus[i].cts = 1;
-                               else
-                                       GMStatus[i].cts = 0;
+                               if (status & UART_MSR_DCD)
+                                       ms.dcd = 1;
+                               if (status & UART_MSR_DSR)
+                                       ms.dsr = 1;
+                               if (status & UART_MSR_CTS)
+                                       ms.cts = 1;
+                       copy:
+                               if (copy_to_user(msu, &ms, sizeof(ms))) {
+                                       unlock_kernel();
+                                       return -EFAULT;
+                               }
+                               msu++;
                         }
                 unlock_kernel();
-               if (copy_to_user(argp, GMStatus,
-                               sizeof(struct mxser_mstatus) * MXSER_PORTS))
-                       return -EFAULT;
                 return 0;
+       }
         case MOXA_ASPP_MON_EXT: {
-               int p, shiftbit;
-               unsigned long opmode;
-               unsigned cflag, iflag;
+               struct mxser_mon_ext *me; /* it's 2k, stack unfriendly */
+               unsigned int cflag, iflag, p;
+               u8 opmode;
+
+               me = kzalloc(sizeof(*me), GFP_KERNEL);
+               if (!me)
+                       return -ENOMEM;
  
                 lock_kernel();
-               for (i = 0; i < MXSER_BOARDS; i++) {
-                       for (j = 0; j < MXSER_PORTS_PER_BOARD; j++) {
+               for (i = 0, p = 0; i < MXSER_BOARDS; i++) {
+                       for (j = 0; j < MXSER_PORTS_PER_BOARD; j++, p++) {
+                               if (p >= ARRAY_SIZE(me->rx_cnt)) {
+                                       i = MXSER_BOARDS;
+                                       break;
+                               }
                                 port = &mxser_boards[i].ports[j];
                                 if (!port->ioaddr)
                                         continue;
  
-                               status = mxser_get_msr(port->ioaddr, 0, i);
+                               status = mxser_get_msr(port->ioaddr, 0, p);
  
                                 if (status & UART_MSR_TERI)
                                         port->icount.rng++;
@@ -1718,16 +1696,13 @@ static int mxser_ioctl_special(unsigned int cmd, void __user *argp)
                                         port->icount.cts++;
  
                                 port->mon_data.modem_status = status;
-                               mon_data_ext.rx_cnt[i] = port->mon_data.rxcnt;
-                               mon_data_ext.tx_cnt[i] = port->mon_data.txcnt;
-                               mon_data_ext.up_rxcnt[i] =
-                                       port->mon_data.up_rxcnt;
-                               mon_data_ext.up_txcnt[i] =
-                                       port->mon_data.up_txcnt;
-                               mon_data_ext.modem_status[i] =
+                               me->rx_cnt[p] = port->mon_data.rxcnt;
+                               me->tx_cnt[p] = port->mon_data.txcnt;
+                               me->up_rxcnt[p] = port->mon_data.up_rxcnt;
+                               me->up_txcnt[p] = port->mon_data.up_txcnt;
+                               me->modem_status[p] =
                                         port->mon_data.modem_status;
-                               mon_data_ext.baudrate[i] =
-                                       tty_get_baud_rate(port->port.tty);
+                               me->baudrate[p] = tty_get_baud_rate(port->port.tty);
  
                                 if (!port->port.tty || !port->port.tty->termios) {
                                         cflag = port->normal_termios.c_cflag;
@@ -1737,40 +1712,31 @@ static int mxser_ioctl_special(unsigned int cmd, void __user *argp)
                                         iflag = port->port.tty->termios->c_iflag;
                                 }
  
-                               mon_data_ext.databits[i] = cflag & CSIZE;
-
-                               mon_data_ext.stopbits[i] = cflag & CSTOPB;
-
-                               mon_data_ext.parity[i] =
-                                       cflag & (PARENB | PARODD | CMSPAR);
-
-                               mon_data_ext.flowctrl[i] = 0x00;
+                               me->databits[p] = cflag & CSIZE;
+                               me->stopbits[p] = cflag & CSTOPB;
+                               me->parity[p] = cflag & (PARENB | PARODD |
+                                               CMSPAR);
  
                                 if (cflag & CRTSCTS)
-                                       mon_data_ext.flowctrl[i] |= 0x03;
+                                       me->flowctrl[p] |= 0x03;
  
                                 if (iflag & (IXON | IXOFF))
-                                       mon_data_ext.flowctrl[i] |= 0x0C;
+                                       me->flowctrl[p] |= 0x0C;
  
                                 if (port->type == PORT_16550A)
-                                       mon_data_ext.fifo[i] = 1;
-                               else
-                                       mon_data_ext.fifo[i] = 0;
+                                       me->fifo[p] = 1;
  
-                               p = i % 4;
-                               shiftbit = p * 2;
-                               opmode = inb(port->opmode_ioaddr) >> shiftbit;
+                               opmode = inb(port->opmode_ioaddr) >>
+                                               ((p % 4) * 2);
                                 opmode &= OP_MODE_MASK;
-
-                               mon_data_ext.iftype[i] = opmode;
-
+                               me->iftype[p] = opmode;
                         }
                 }
                 unlock_kernel();
-               if (copy_to_user(argp, &mon_data_ext,
-                                       sizeof(mon_data_ext)))
-                       return -EFAULT;
-               return 0;
+               if (copy_to_user(argp, me, sizeof(*me)))
+                       ret = -EFAULT;
+               kfree(me);
+               return ret;
         }
         default:
                 return -ENOIOCTLCMD;
@@ -1804,7 +1770,6 @@ static int mxser_ioctl(struct tty_struct *tty, struct file *file,
  {
         struct mxser_port *info = tty->driver_data;
         struct async_icount cnow;
-       struct serial_icounter_struct __user *p_cuser;
         unsigned long flags;
         void __user *argp = (void __user *)arg;
         int retval;
@@ -1884,30 +1849,26 @@ static int mxser_ioctl(struct tty_struct *tty, struct file *file,
          * NB: both 1->0 and 0->1 transitions are counted except for
          *     RI where only 0->1 is counted.
          */
-       case TIOCGICOUNT:
+       case TIOCGICOUNT: {
+               struct serial_icounter_struct icnt = { 0 };
                 spin_lock_irqsave(&info->slock, flags);
                 cnow = info->icount;
                 spin_unlock_irqrestore(&info->slock, flags);
-               p_cuser = argp;
-               if (put_user(cnow.frame, &p_cuser->frame))
-                       return -EFAULT;
-               if (put_user(cnow.brk, &p_cuser->brk))
-                       return -EFAULT;
-               if (put_user(cnow.overrun, &p_cuser->overrun))
-                       return -EFAULT;
-               if (put_user(cnow.buf_overrun, &p_cuser->buf_overrun))
-                       return -EFAULT;
-               if (put_user(cnow.parity, &p_cuser->parity))
-                       return -EFAULT;
-               if (put_user(cnow.rx, &p_cuser->rx))
-                       return -EFAULT;
-               if (put_user(cnow.tx, &p_cuser->tx))
-                       return -EFAULT;
-               put_user(cnow.cts, &p_cuser->cts);
-               put_user(cnow.dsr, &p_cuser->dsr);
-               put_user(cnow.rng, &p_cuser->rng);
-               put_user(cnow.dcd, &p_cuser->dcd);
-               return 0;
+
+               icnt.frame = cnow.frame;
+               icnt.brk = cnow.brk;
+               icnt.overrun = cnow.overrun;
+               icnt.buf_overrun = cnow.buf_overrun;
+               icnt.parity = cnow.parity;
+               icnt.rx = cnow.rx;
+               icnt.tx = cnow.tx;
+               icnt.cts = cnow.cts;
+               icnt.dsr = cnow.dsr;
+               icnt.rng = cnow.rng;
+               icnt.dcd = cnow.dcd;
+
+               return copy_to_user(argp, &icnt, sizeof(icnt)) ? -EFAULT : 0;
+       }
         case MOXA_HighSpeedOn:
                 return put_user(info->baud_base != 115200 ? 1 : 0, (int __user *)argp);
         case MOXA_SDS_RSTICOUNTER:
@@ -2503,7 +2464,8 @@ static int __devinit mxser_initbrd(struct mxser_board *brd,
         unsigned int i;
         int retval;
  
-       printk(KERN_INFO "max. baud rate = %d bps.\n", brd->ports[0].max_baud);
+       printk(KERN_INFO "mxser: max. baud rate = %d bps\n",
+                       brd->ports[0].max_baud);
  
         for (i = 0; i < brd->info->nports; i++) {
                 info = &brd->ports[i];
@@ -2586,28 +2548,32 @@ static int __init mxser_get_ISA_conf(int cap, struct mxser_board *brd)
                 irq = regs[9] & 0xF000;
                 irq = irq | (irq >> 4);
                 if (irq != (regs[9] & 0xFF00))
-                       return MXSER_ERR_IRQ_CONFLIT;
+                       goto err_irqconflict;
         } else if (brd->info->nports == 4) {
                 irq = regs[9] & 0xF000;
                 irq = irq | (irq >> 4);
                 irq = irq | (irq >> 8);
                 if (irq != regs[9])
-                       return MXSER_ERR_IRQ_CONFLIT;
+                       goto err_irqconflict;
         } else if (brd->info->nports == 8) {
                 irq = regs[9] & 0xF000;
                 irq = irq | (irq >> 4);
                 irq = irq | (irq >> 8);
                 if ((irq != regs[9]) || (irq != regs[10]))
-                       return MXSER_ERR_IRQ_CONFLIT;
+                       goto err_irqconflict;
         }
  
-       if (!irq)
-               return MXSER_ERR_IRQ;
+       if (!irq) {
+               printk(KERN_ERR "mxser: interrupt number unset\n");
+               return -EIO;
+       }
         brd->irq = ((int)(irq & 0xF000) >> 12);
         for (i = 0; i < 8; i++)
                 brd->ports[i].ioaddr = (int) regs[i + 1] & 0xFFF8;
-       if ((regs[12] & 0x80) == 0)
-               return MXSER_ERR_VECTOR;
+       if ((regs[12] & 0x80) == 0) {
+               printk(KERN_ERR "mxser: invalid interrupt vector\n");
+               return -EIO;
+       }
         brd->vector = (int)regs[11];    /* interrupt vector */
         if (id == 1)
                 brd->vector_mask = 0x00FF;
@@ -2634,13 +2600,26 @@ static int __init mxser_get_ISA_conf(int cap, struct mxser_board *brd)
         else
                 brd->uart_type = PORT_16450;
         if (!request_region(brd->ports[0].ioaddr, 8 * brd->info->nports,
-                       "mxser(IO)"))
-               return MXSER_ERR_IOADDR;
+                       "mxser(IO)")) {
+               printk(KERN_ERR "mxser: can't request ports I/O region: "
+                               "0x%.8lx-0x%.8lx\n",
+                               brd->ports[0].ioaddr, brd->ports[0].ioaddr +
+                               8 * brd->info->nports - 1);
+               return -EIO;
+       }
         if (!request_region(brd->vector, 1, "mxser(vector)")) {
                 release_region(brd->ports[0].ioaddr, 8 * brd->info->nports);
-               return MXSER_ERR_VECTOR;
+               printk(KERN_ERR "mxser: can't request interrupt vector region: "
+                               "0x%.8lx-0x%.8lx\n",
+                               brd->ports[0].ioaddr, brd->ports[0].ioaddr +
+                               8 * brd->info->nports - 1);
+               return -EIO;
         }
         return brd->info->nports;
+
+err_irqconflict:
+       printk(KERN_ERR "mxser: invalid interrupt number\n");
+       return -EIO;
  }
  
  static int __devinit mxser_probe(struct pci_dev *pdev,
@@ -2657,20 +2636,20 @@ static int __devinit mxser_probe(struct pci_dev *pdev,
                         break;
  
         if (i >= MXSER_BOARDS) {
-               printk(KERN_ERR "Too many Smartio/Industio family boards found "
-                       "(maximum %d), board not configured\n", MXSER_BOARDS);
+               dev_err(&pdev->dev, "too many boards found (maximum %d), board "
+                               "not configured\n", MXSER_BOARDS);
                 goto err;
         }
  
         brd = &mxser_boards[i];
         brd->idx = i * MXSER_PORTS_PER_BOARD;
-       printk(KERN_INFO "Found MOXA %s board (BusNo=%d, DevNo=%d)\n",
+       dev_info(&pdev->dev, "found MOXA %s board (BusNo=%d, DevNo=%d)\n",
                 mxser_cards[ent->driver_data].name,
                 pdev->bus->number, PCI_SLOT(pdev->devfn));
  
         retval = pci_enable_device(pdev);
         if (retval) {
-               printk(KERN_ERR "Moxa SmartI/O PCI enable fail !\n");
+               dev_err(&pdev->dev, "PCI enable failed\n");
                 goto err;
         }
  
@@ -2772,11 +2751,8 @@ static struct pci_driver mxser_driver = {
  static int __init mxser_module_init(void)
  {
         struct mxser_board *brd;
-       unsigned long cap;
-       unsigned int i, m, isaloop;
-       int retval, b;
-
-       pr_debug("Loading module mxser ...\n");
+       unsigned int b, i, m;
+       int retval;
  
         mxvar_sdriver = alloc_tty_driver(MXSER_PORTS + 1);
         if (!mxvar_sdriver)
@@ -2806,74 +2782,43 @@ static int __init mxser_module_init(void)
                 goto err_put;
         }
  
-       mxvar_diagflag = 0;
-
-       m = 0;
         /* Start finding ISA boards here */
-       for (isaloop = 0; isaloop < 2; isaloop++)
-               for (b = 0; b < MXSER_BOARDS && m < MXSER_BOARDS; b++) {
-                       if (!isaloop)
-                               cap = mxserBoardCAP[b]; /* predefined */
-                       else
-                               cap = ioaddr[b]; /* module param */
-
-                       if (!cap)
-                               continue;
+       for (m = 0, b = 0; b < MXSER_BOARDS; b++) {
+               if (!ioaddr[b])
+                       continue;
+
+               brd = &mxser_boards[m];
+               retval = mxser_get_ISA_conf(!ioaddr[b], brd);
+               if (retval <= 0) {
+                       brd->info = NULL;
+                       continue;
+               }
  
-                       brd = &mxser_boards[m];
-                       retval = mxser_get_ISA_conf(cap, brd);
-
-                       if (retval != 0)
-                               printk(KERN_INFO "Found MOXA %s board "
-                                       "(CAP=0x%x)\n",
-                                       brd->info->name, ioaddr[b]);
-
-                       if (retval <= 0) {
-                               if (retval == MXSER_ERR_IRQ)
-                                       printk(KERN_ERR "Invalid interrupt "
-                                               "number, board not "
-                                               "configured\n");
-                               else if (retval == MXSER_ERR_IRQ_CONFLIT)
-                                       printk(KERN_ERR "Invalid interrupt "
-                                               "number, board not "
-                                               "configured\n");
-                               else if (retval == MXSER_ERR_VECTOR)
-                                       printk(KERN_ERR "Invalid interrupt "
-                                               "vector, board not "
-                                               "configured\n");
-                               else if (retval == MXSER_ERR_IOADDR)
-                                       printk(KERN_ERR "Invalid I/O address, "
-                                               "board not configured\n");
-
-                               brd->info = NULL;
-                               continue;
-                       }
+               printk(KERN_INFO "mxser: found MOXA %s board (CAP=0x%lx)\n",
+                               brd->info->name, ioaddr[b]);
  
-                       /* mxser_initbrd will hook ISR. */
-                       if (mxser_initbrd(brd, NULL) < 0) {
-                               brd->info = NULL;
-                               continue;
-                       }
+               /* mxser_initbrd will hook ISR. */
+               if (mxser_initbrd(brd, NULL) < 0) {
+                       brd->info = NULL;
+                       continue;
+               }
  
-                       brd->idx = m * MXSER_PORTS_PER_BOARD;
-                       for (i = 0; i < brd->info->nports; i++)
-                               tty_register_device(mxvar_sdriver, brd->idx + i,
-                                               NULL);
+               brd->idx = m * MXSER_PORTS_PER_BOARD;
+               for (i = 0; i < brd->info->nports; i++)
+                       tty_register_device(mxvar_sdriver, brd->idx + i, NULL);
  
-                       m++;
-               }
+               m++;
+       }
  
         retval = pci_register_driver(&mxser_driver);
         if (retval) {
-               printk(KERN_ERR "Can't register pci driver\n");
+               printk(KERN_ERR "mxser: can't register pci driver\n");
                 if (!m) {
                         retval = -ENODEV;
                         goto err_unr;
                 } /* else: we have some ISA cards under control */
         }
  
-       pr_debug("Done.\n");
-
         return 0;
  err_unr:
         tty_unregister_driver(mxvar_sdriver);
@@ -2886,8 +2831,6 @@ static void __exit mxser_module_exit(void)
  {
         unsigned int i, j;
  
-       pr_debug("Unloading module mxser ...\n");
-
         pci_unregister_driver(&mxser_driver);
  
         for (i = 0; i < MXSER_BOARDS; i++) /* ISA remains */
@@ -2901,8 +2844,6 @@ static void __exit mxser_module_exit(void)
         for (i = 0; i < MXSER_BOARDS; i++)
                 if (mxser_boards[i].info != NULL)
                         mxser_release_res(&mxser_boards[i], NULL, 1);
-
-       pr_debug("Done.\n");
  }
  
  module_init(mxser_module_init);
diff --git a/drivers/char/nwflash.c b/drivers/char/nwflash.c

index ba012c2bdf7a3b8ceb136f32d0d24cede5037725..f9f72a21129257aa5a82da1944993e7860cd59ee 100644 (file)
--- a/drivers/char/nwflash.c
+++ b/drivers/char/nwflash.c
@@ -122,35 +122,20 @@ static int flash_ioctl(struct inode *inodep, struct file *filep, unsigned int cm
  static ssize_t flash_read(struct file *file, char __user *buf, size_t size,
                           loff_t *ppos)
  {
-       unsigned long p = *ppos;
-       unsigned int count = size;
-       int ret = 0;
+       ssize_t ret;
  
         if (flashdebug)
                 printk(KERN_DEBUG "flash_read: flash_read: offset=0x%lX, "
                        "buffer=%p, count=0x%X.\n", p, buf, count);
+       /*
+        * We now lock against reads and writes. --rmk
+        */
+       if (mutex_lock_interruptible(&nwflash_mutex))
+               return -ERESTARTSYS;
  
-       if (count)
-               ret = -ENXIO;
-
-       if (p < gbFlashSize) {
-               if (count > gbFlashSize - p)
-                       count = gbFlashSize - p;
+       ret = simple_read_from_buffer(buf, size, ppos, FLASH_BASE, gbFlashSize);
+       mutex_unlock(&nwflash_mutex);
  
-               /*
-                * We now lock against reads and writes. --rmk
-                */
-               if (mutex_lock_interruptible(&nwflash_mutex))
-                       return -ERESTARTSYS;
-
-               ret = copy_to_user(buf, (void *)(FLASH_BASE + p), count);
-               if (ret == 0) {
-                       ret = count;
-                       *ppos += count;
-               } else
-                       ret = -EFAULT;
-               mutex_unlock(&nwflash_mutex);
-       }
         return ret;
  }
  
diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c

index 7af7a7e6b9c2e570ff53eff4152e5f61d6cf7738..bee39fdfba738de8a11429a1d2c57caa7b99cc26 100644 (file)
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c
@@ -67,7 +67,7 @@
  #include <linux/major.h>
  #include <linux/ppdev.h>
  #include <linux/smp_lock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
  
  #define PP_VERSION "ppdev: user-space parallel port driver"
  #define CHRDEV "ppdev"
@@ -328,10 +328,9 @@ static enum ieee1284_phase init_phase (int mode)
         return IEEE1284_PH_FWD_IDLE;
  }
  
-static int pp_ioctl(struct inode *inode, struct file *file,
-                   unsigned int cmd, unsigned long arg)
+static int pp_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
  {
-       unsigned int minor = iminor(inode);
+       unsigned int minor = iminor(file->f_path.dentry->d_inode);
         struct pp_struct *pp = file->private_data;
         struct parport * port;
         void __user *argp = (void __user *)arg;
@@ -634,6 +633,15 @@ static int pp_ioctl(struct inode *inode, struct file *file,
         return 0;
  }
  
+static long pp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+       long ret;
+       lock_kernel();
+       ret = pp_do_ioctl(file, cmd, arg);
+       unlock_kernel();
+       return ret;
+}
+
  static int pp_open (struct inode * inode, struct file * file)
  {
         unsigned int minor = iminor(inode);
@@ -745,7 +753,7 @@ static const struct file_operations pp_fops = {
         .read           = pp_read,
         .write          = pp_write,
         .poll           = pp_poll,
-       .ioctl          = pp_ioctl,
+       .unlocked_ioctl = pp_ioctl,
         .open           = pp_open,
         .release        = pp_release,
  };
diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c

index 0cdfee152916ffb7bab395049ca69fe47baea8ab..a8f68a3f14dd56e4beea33c06407f51d39cc62a1 100644 (file)
--- a/drivers/char/rio/rio_linux.c
+++ b/drivers/char/rio/rio_linux.c
@@ -179,7 +179,7 @@ static int rio_set_real_termios(void *ptr);
  static void rio_hungup(void *ptr);
  static void rio_close(void *ptr);
  static int rio_chars_in_buffer(void *ptr);
-static int rio_fw_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+static long rio_fw_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
  static int rio_init_drivers(void);
  
  static void my_hd(void *addr, int len);
@@ -240,7 +240,7 @@ static struct real_driver rio_real_driver = {
  
  static const struct file_operations rio_fw_fops = {
         .owner = THIS_MODULE,
-       .ioctl = rio_fw_ioctl,
+       .unlocked_ioctl = rio_fw_ioctl,
  };
  
  static struct miscdevice rio_fw_device = {
@@ -560,13 +560,15 @@ static void rio_close(void *ptr)
  
  
  
-static int rio_fw_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+static long rio_fw_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
  {
         int rc = 0;
         func_enter();
  
         /* The "dev" argument isn't used. */
+       lock_kernel();
         rc = riocontrol(p, 0, cmd, arg, capable(CAP_SYS_ADMIN));
+       unlock_kernel();
  
         func_exit();
         return rc;
diff --git a/drivers/char/sx.c b/drivers/char/sx.c

index 2162439bbe487544c589c6f0c054e6d849257a94..c385206f9db540caa925d615be1c1d4d566482f4 100644 (file)
--- a/drivers/char/sx.c
+++ b/drivers/char/sx.c
@@ -286,8 +286,8 @@ static void sx_close(void *ptr);
  static int sx_chars_in_buffer(void *ptr);
  static int sx_init_board(struct sx_board *board);
  static int sx_init_portstructs(int nboards, int nports);
-static int sx_fw_ioctl(struct inode *inode, struct file *filp,
-               unsigned int cmd, unsigned long arg);
+static long sx_fw_ioctl(struct file *filp, unsigned int cmd,
+                                               unsigned long arg);
  static int sx_init_drivers(void);
  
  static struct tty_driver *sx_driver;
@@ -396,7 +396,7 @@ static struct real_driver sx_real_driver = {
  
  static const struct file_operations sx_fw_fops = {
         .owner = THIS_MODULE,
-       .ioctl = sx_fw_ioctl,
+       .unlocked_ioctl = sx_fw_ioctl,
  };
  
  static struct miscdevice sx_fw_device = {
@@ -1686,10 +1686,10 @@ static int do_memtest_w(struct sx_board *board, int min, int max)
  }
  #endif
  
-static int sx_fw_ioctl(struct inode *inode, struct file *filp,
-               unsigned int cmd, unsigned long arg)
+static long sx_fw_ioctl(struct file *filp, unsigned int cmd,
+                                                       unsigned long arg)
  {
-       int rc = 0;
+       long rc = 0;
         int __user *descr = (int __user *)arg;
         int i;
         static struct sx_board *board = NULL;
@@ -1699,13 +1699,10 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp,
  
         func_enter();
  
-#if 0
-       /* Removed superuser check: Sysops can use the permissions on the device
-          file to restrict access. Recommendation: Root only. (root.root 600) */
-       if (!capable(CAP_SYS_ADMIN)) {
+       if (!capable(CAP_SYS_RAWIO))
                 return -EPERM;
-       }
-#endif
+
+       lock_kernel();
  
         sx_dprintk(SX_DEBUG_FIRMWARE, "IOCTL %x: %lx\n", cmd, arg);
  
@@ -1720,19 +1717,23 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp,
                 for (i = 0; i < SX_NBOARDS; i++)
                         sx_dprintk(SX_DEBUG_FIRMWARE, "<%x> ", boards[i].flags);
                 sx_dprintk(SX_DEBUG_FIRMWARE, "\n");
+               unlock_kernel();
                 return -EIO;
         }
  
         switch (cmd) {
         case SXIO_SET_BOARD:
                 sx_dprintk(SX_DEBUG_FIRMWARE, "set board to %ld\n", arg);
+               rc = -EIO;
                 if (arg >= SX_NBOARDS)
-                       return -EIO;
+                       break;
                 sx_dprintk(SX_DEBUG_FIRMWARE, "not out of range\n");
                 if (!(boards[arg].flags & SX_BOARD_PRESENT))
-                       return -EIO;
+                       break;
                 sx_dprintk(SX_DEBUG_FIRMWARE, ".. and present!\n");
                 board = &boards[arg];
+               rc = 0;
+               /* FIXME: And this does ... nothing?? */
                 break;
         case SXIO_GET_TYPE:
                 rc = -ENOENT;   /* If we manage to miss one, return error. */
@@ -1746,7 +1747,7 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp,
                         rc = SX_TYPE_SI;
                 if (IS_EISA_BOARD(board))
                         rc = SX_TYPE_SI;
-               sx_dprintk(SX_DEBUG_FIRMWARE, "returning type= %d\n", rc);
+               sx_dprintk(SX_DEBUG_FIRMWARE, "returning type= %ld\n", rc);
                 break;
         case SXIO_DO_RAMTEST:
                 if (sx_initialized)     /* Already initialized: better not ramtest the board.  */
@@ -1760,19 +1761,26 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp,
                         rc = do_memtest(board, 0, 0x7ff8);
                         /* if (!rc) rc = do_memtest_w (board, 0, 0x7ff8); */
                 }
-               sx_dprintk(SX_DEBUG_FIRMWARE, "returning memtest result= %d\n",
-                          rc);
+               sx_dprintk(SX_DEBUG_FIRMWARE,
+                               "returning memtest result= %ld\n", rc);
                 break;
         case SXIO_DOWNLOAD:
-               if (sx_initialized)     /* Already initialized */
-                       return -EEXIST;
-               if (!sx_reset(board))
-                       return -EIO;
+               if (sx_initialized) {/* Already initialized */
+                       rc = -EEXIST;
+                       break;
+               }
+               if (!sx_reset(board)) {
+                       rc = -EIO;
+                       break;
+               }
                 sx_dprintk(SX_DEBUG_INIT, "reset the board...\n");
  
                 tmp = kmalloc(SX_CHUNK_SIZE, GFP_USER);
-               if (!tmp)
-                       return -ENOMEM;
+               if (!tmp) {
+                       rc = -ENOMEM;
+                       break;
+               }
+               /* FIXME: check returns */
                 get_user(nbytes, descr++);
                 get_user(offset, descr++);
                 get_user(data, descr++);
@@ -1782,7 +1790,8 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp,
                                                 (i + SX_CHUNK_SIZE > nbytes) ?
                                                 nbytes - i : SX_CHUNK_SIZE)) {
                                         kfree(tmp);
-                                       return -EFAULT;
+                                       rc = -EFAULT;
+                                       break;
                                 }
                                 memcpy_toio(board->base2 + offset + i, tmp,
                                                 (i + SX_CHUNK_SIZE > nbytes) ?
@@ -1798,13 +1807,17 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp,
                 rc = sx_nports;
                 break;
         case SXIO_INIT:
-               if (sx_initialized)     /* Already initialized */
-                       return -EEXIST;
+               if (sx_initialized) {   /* Already initialized */
+                       rc = -EEXIST;
+                       break;
+               }
                 /* This is not allowed until all boards are initialized... */
                 for (i = 0; i < SX_NBOARDS; i++) {
                         if ((boards[i].flags & SX_BOARD_PRESENT) &&
-                               !(boards[i].flags & SX_BOARD_INITIALIZED))
-                               return -EIO;
+                               !(boards[i].flags & SX_BOARD_INITIALIZED)) {
+                               rc = -EIO;
+                               break;
+                       }
                 }
                 for (i = 0; i < SX_NBOARDS; i++)
                         if (!(boards[i].flags & SX_BOARD_PRESENT))
@@ -1832,10 +1845,10 @@ static int sx_fw_ioctl(struct inode *inode, struct file *filp,
                 rc = sx_nports;
                 break;
         default:
-               printk(KERN_WARNING "Unknown ioctl on firmware device (%x).\n",
-                               cmd);
+               rc = -ENOTTY;
                 break;
         }
+       unlock_kernel();
         func_exit();
         return rc;
  }
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c

index 6f4d856df987e745a46f2a407a7e2706feb1ae57..e1b46bc7e43cd245794f02610afe6c88d49eeb47 100644 (file)
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -3580,7 +3580,6 @@ void proc_clear_tty(struct task_struct *p)
         p->signal->tty = NULL;
         spin_unlock_irq(&p->sighand->siglock);
  }
-EXPORT_SYMBOL(proc_clear_tty);
  
  /* Called under the sighand lock */
  
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c

index dc17fe3a88bc48f2813ad5c3e5dd27efd00ff7d3..d0f4eb6fdb7fee27eca6cfec84ff8cf25af209e4 100644 (file)
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -46,6 +46,9 @@ static char *in, *inbuf;
  /* The operations for our console. */
  static struct hv_ops virtio_cons;
  
+/* The hvc device */
+static struct hvc_struct *hvc;
+
  /*D:310 The put_chars() callback is pretty straightforward.
   *
   * We turn the characters into a scatter-gather list, add it to the output
@@ -134,6 +137,27 @@ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int))
         return hvc_instantiate(0, 0, &virtio_cons);
  }
  
+/*
+ * we support only one console, the hvc struct is a global var
+ * There is no need to do anything
+ */
+static int notifier_add_vio(struct hvc_struct *hp, int data)
+{
+       hp->irq_requested = 1;
+       return 0;
+}
+
+static void notifier_del_vio(struct hvc_struct *hp, int data)
+{
+       hp->irq_requested = 0;
+}
+
+static void hvc_handle_input(struct virtqueue *vq)
+{
+       if (hvc_poll(hvc))
+               hvc_kick();
+}
+
  /*D:370 Once we're further in boot, we get probed like any other virtio device.
   * At this stage we set up the output virtqueue.
   *
@@ -144,7 +168,6 @@ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int))
  static int __devinit virtcons_probe(struct virtio_device *dev)
  {
         int err;
-       struct hvc_struct *hvc;
  
         vdev = dev;
  
@@ -158,7 +181,7 @@ static int __devinit virtcons_probe(struct virtio_device *dev)
         /* Find the input queue. */
         /* FIXME: This is why we want to wean off hvc: we do nothing
          * when input comes in. */
-       in_vq = vdev->config->find_vq(vdev, 0, NULL);
+       in_vq = vdev->config->find_vq(vdev, 0, hvc_handle_input);
         if (IS_ERR(in_vq)) {
                 err = PTR_ERR(in_vq);
                 goto free;
@@ -173,15 +196,18 @@ static int __devinit virtcons_probe(struct virtio_device *dev)
         /* Start using the new console output. */
         virtio_cons.get_chars = get_chars;
         virtio_cons.put_chars = put_chars;
+       virtio_cons.notifier_add = notifier_add_vio;
+       virtio_cons.notifier_del = notifier_del_vio;
  
         /* The first argument of hvc_alloc() is the virtual console number, so
-        * we use zero.  The second argument is the interrupt number; we
-        * currently leave this as zero: it would be better not to use the
-        * hvc mechanism and fix this (FIXME!).
+        * we use zero.  The second argument is the parameter for the
+        * notification mechanism (like irq number). We currently leave this
+        * as zero, virtqueues have implicit notifications.
          *
          * The third argument is a "struct hv_ops" containing the put_chars()
-        * and get_chars() pointers.  The final argument is the output buffer
-        * size: we can do any size, so we put PAGE_SIZE here. */
+        * get_chars(), notifier_add() and notifier_del() pointers.
+        * The final argument is the output buffer size: we can do any size,
+        * so we put PAGE_SIZE here. */
         hvc = hvc_alloc(0, 0, &virtio_cons, PAGE_SIZE);
         if (IS_ERR(hvc)) {
                 err = PTR_ERR(hvc);
diff --git a/drivers/char/vt.c b/drivers/char/vt.c

index cb8c90da3934fa60bb08b47460271a2135878b68..82a51f38a5469cd1a56637c6635b9c16f69f5f89 100644 (file)
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -2211,7 +2211,7 @@ rescan_last_byte:
                         c = 0xfffd;
                     tc = c;
                 } else {        /* no utf or alternate charset mode */
-                   tc = vc->vc_translate[vc->vc_toggle_meta ? (c | 0x80) : c];
+                   tc = vc_translate(vc, c);
                 }
  
                 param.c = tc;
diff --git a/drivers/char/xilinx_hwicap/xilinx_hwicap.c b/drivers/char/xilinx_hwicap/xilinx_hwicap.c

index 51966ccf4ea3f56c4449f88c93906ab81e136111..8bfee5fb7223cd21e5dff7c0f4f62371f66fe963 100644 (file)
--- a/drivers/char/xilinx_hwicap/xilinx_hwicap.c
+++ b/drivers/char/xilinx_hwicap/xilinx_hwicap.c
@@ -87,7 +87,6 @@
  #include <linux/mutex.h>
  #include <linux/smp_lock.h>
  #include <linux/sysctl.h>
-#include <linux/version.h>
  #include <linux/fs.h>
  #include <linux/cdev.h>
  #include <linux/platform_device.h>
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig

index 6e6c3c4aea6b074d53c9824a1060bbfc38e88069..5a11e3cbcae26748ac75ac5e3e4e31a724d1fab8 100644 (file)
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -123,6 +123,13 @@ config EDAC_I5000
           Support for error detection and correction the Intel
           Greekcreek/Blackford chipsets.
  
+config EDAC_I5100
+       tristate "Intel San Clemente MCH"
+       depends on EDAC_MM_EDAC && X86 && PCI
+       help
+         Support for error detection and correction the Intel
+         San Clemente MCH.
+
  config EDAC_MPC85XX
         tristate "Freescale MPC85xx"
         depends on EDAC_MM_EDAC && FSL_SOC && MPC85xx
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile

index 83807731d4a9782f673e7dd46ad9d6af183d4193..e5e9104b55204a1672f5bc7e8f5547b3079aed7d 100644 (file)
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -19,6 +19,7 @@ endif
  
  obj-$(CONFIG_EDAC_AMD76X)              += amd76x_edac.o
  obj-$(CONFIG_EDAC_I5000)               += i5000_edac.o
+obj-$(CONFIG_EDAC_I5100)               += i5100_edac.o
  obj-$(CONFIG_EDAC_E7XXX)               += e7xxx_edac.o
  obj-$(CONFIG_EDAC_E752X)               += e752x_edac.o
  obj-$(CONFIG_EDAC_I82443BXGX)          += i82443bxgx_edac.o
diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c

index c94a0eb492cb1eb2b01bfa5c8a5b47d7099c79a1..facfdb1fa71c5ece5c6f7d7f14acf17227ffefd1 100644 (file)
--- a/drivers/edac/e752x_edac.c
+++ b/drivers/edac/e752x_edac.c
@@ -28,6 +28,7 @@
  #define E752X_REVISION " Ver: 2.0.2 " __DATE__
  #define EDAC_MOD_STR   "e752x_edac"
  
+static int report_non_memory_errors;
  static int force_function_unhide;
  static int sysbus_parity = -1;
  
@@ -117,7 +118,7 @@ static struct edac_pci_ctl_info *e752x_pci;
  #define E752X_BUF_FERR         0x70    /* Memory buffer first error reg (8b) */
  #define E752X_BUF_NERR         0x72    /* Memory buffer next error reg (8b) */
  #define E752X_BUF_ERRMASK      0x74    /* Memory buffer error mask reg (8b) */
-#define E752X_BUF_SMICMD       0x7A    /* Memory buffer SMI command reg (8b) */
+#define E752X_BUF_SMICMD       0x7A    /* Memory buffer SMI cmd reg (8b) */
  #define E752X_DRAM_FERR                0x80    /* DRAM first error register (16b) */
  #define E752X_DRAM_NERR                0x82    /* DRAM next error register (16b) */
  #define E752X_DRAM_ERRMASK     0x84    /* DRAM error mask register (8b) */
@@ -127,7 +128,7 @@ static struct edac_pci_ctl_info *e752x_pci;
                                         /*     error address register (32b) */
                                         /*
                                          * 31    Reserved
-                                        * 30:2  CE address (64 byte block 34:6)
+                                        * 30:2  CE address (64 byte block 34:6
                                          * 1     Reserved
                                          * 0     HiLoCS
                                          */
@@ -147,11 +148,11 @@ static struct edac_pci_ctl_info *e752x_pci;
                                          * 1     Reserved
                                          * 0     HiLoCS
                                          */
-#define E752X_DRAM_SCRB_ADD    0xA8    /* DRAM first uncorrectable scrub memory */
+#define E752X_DRAM_SCRB_ADD    0xA8    /* DRAM 1st uncorrectable scrub mem */
                                         /*     error address register (32b) */
                                         /*
                                          * 31    Reserved
-                                        * 30:2  CE address (64 byte block 34:6)
+                                        * 30:2  CE address (64 byte block 34:6
                                          * 1     Reserved
                                          * 0     HiLoCS
                                          */
@@ -394,9 +395,12 @@ static void do_process_ded_retry(struct mem_ctl_info *mci, u16 error,
         struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info;
  
         error_1b = retry_add;
-       page = error_1b >> (PAGE_SHIFT - 4);    /* convert the addr to 4k page */
-       row = pvt->mc_symmetric ? ((page >> 1) & 3) :   /* chip select are bits 14 & 13 */
+       page = error_1b >> (PAGE_SHIFT - 4);  /* convert the addr to 4k page */
+
+       /* chip select are bits 14 & 13 */
+       row = pvt->mc_symmetric ? ((page >> 1) & 3) :
                 edac_mc_find_csrow_by_page(mci, page);
+
         e752x_mc_printk(mci, KERN_WARNING,
                         "CE page 0x%lx, row %d : Memory read retry\n",
                         (long unsigned int)page, row);
@@ -422,12 +426,21 @@ static inline void process_threshold_ce(struct mem_ctl_info *mci, u16 error,
  }
  
  static char *global_message[11] = {
-       "PCI Express C1", "PCI Express C", "PCI Express B1",
-       "PCI Express B", "PCI Express A1", "PCI Express A",
-       "DMA Controler", "HUB or NS Interface", "System Bus",
-       "DRAM Controler", "Internal Buffer"
+       "PCI Express C1",
+       "PCI Express C",
+       "PCI Express B1",
+       "PCI Express B",
+       "PCI Express A1",
+       "PCI Express A",
+       "DMA Controller",
+       "HUB or NS Interface",
+       "System Bus",
+       "DRAM Controller",  /* 9th entry */
+       "Internal Buffer"
  };
  
+#define DRAM_ENTRY     9
+
  static char *fatal_message[2] = { "Non-Fatal ", "Fatal " };
  
  static void do_global_error(int fatal, u32 errors)
@@ -435,9 +448,16 @@ static void do_global_error(int fatal, u32 errors)
         int i;
  
         for (i = 0; i < 11; i++) {
-               if (errors & (1 << i))
-                       e752x_printk(KERN_WARNING, "%sError %s\n",
-                               fatal_message[fatal], global_message[i]);
+               if (errors & (1 << i)) {
+                       /* If the error is from DRAM Controller OR
+                        * we are to report ALL errors, then
+                        * report the error
+                        */
+                       if ((i == DRAM_ENTRY) || report_non_memory_errors)
+                               e752x_printk(KERN_WARNING, "%sError %s\n",
+                                       fatal_message[fatal],
+                                       global_message[i]);
+               }
         }
  }
  
@@ -1021,7 +1041,7 @@ static int e752x_get_devs(struct pci_dev *pdev, int dev_idx,
         struct pci_dev *dev;
  
         pvt->bridge_ck = pci_get_device(PCI_VENDOR_ID_INTEL,
-                                       pvt->dev_info->err_dev, pvt->bridge_ck);
+                               pvt->dev_info->err_dev, pvt->bridge_ck);
  
         if (pvt->bridge_ck == NULL)
                 pvt->bridge_ck = pci_scan_single_device(pdev->bus,
@@ -1034,8 +1054,9 @@ static int e752x_get_devs(struct pci_dev *pdev, int dev_idx,
                 return 1;
         }
  
-       dev = pci_get_device(PCI_VENDOR_ID_INTEL, e752x_devs[dev_idx].ctl_dev,
-                       NULL);
+       dev = pci_get_device(PCI_VENDOR_ID_INTEL,
+                               e752x_devs[dev_idx].ctl_dev,
+                               NULL);
  
         if (dev == NULL)
                 goto fail;
@@ -1316,7 +1337,8 @@ MODULE_DESCRIPTION("MC support for Intel e752x/3100 memory controllers");
  
  module_param(force_function_unhide, int, 0444);
  MODULE_PARM_DESC(force_function_unhide, "if BIOS sets Dev0:Fun1 up as hidden:"
-                " 1=force unhide and hope BIOS doesn't fight driver for Dev0:Fun1 access");
+                " 1=force unhide and hope BIOS doesn't fight driver for "
+               "Dev0:Fun1 access");
  
  module_param(edac_op_state, int, 0444);
  MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
@@ -1324,3 +1346,6 @@ MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
  module_param(sysbus_parity, int, 0444);
  MODULE_PARM_DESC(sysbus_parity, "0=disable system bus parity checking,"
                 " 1=enable system bus parity checking, default=auto-detect");
+module_param(report_non_memory_errors, int, 0644);
+MODULE_PARM_DESC(report_non_memory_errors, "0=disable non-memory error "
+               "reporting, 1=enable non-memory error reporting");
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c

index 021d18795145cbc364e676d763ed8369ccea8ee1..ad218fe4942dfcf258273ee22f56776f3a0b23c2 100644 (file)
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -44,6 +44,25 @@ int edac_mc_get_poll_msec(void)
         return edac_mc_poll_msec;
  }
  
+static int edac_set_poll_msec(const char *val, struct kernel_param *kp)
+{
+       long l;
+       int ret;
+
+       if (!val)
+               return -EINVAL;
+
+       ret = strict_strtol(val, 0, &l);
+       if (ret == -EINVAL || ((int)l != l))
+               return -EINVAL;
+       *((int *)kp->arg) = l;
+
+       /* notify edac_mc engine to reset the poll period */
+       edac_mc_reset_delay_period(l);
+
+       return 0;
+}
+
  /* Parameter declarations for above */
  module_param(edac_mc_panic_on_ue, int, 0644);
  MODULE_PARM_DESC(edac_mc_panic_on_ue, "Panic on uncorrected error: 0=off 1=on");
@@ -53,7 +72,8 @@ MODULE_PARM_DESC(edac_mc_log_ue,
  module_param(edac_mc_log_ce, int, 0644);
  MODULE_PARM_DESC(edac_mc_log_ce,
                  "Log correctable error to console: 0=off 1=on");
-module_param(edac_mc_poll_msec, int, 0644);
+module_param_call(edac_mc_poll_msec, edac_set_poll_msec, param_get_int,
+                 &edac_mc_poll_msec, 0644);
  MODULE_PARM_DESC(edac_mc_poll_msec, "Polling period in milliseconds");
  
  /*
@@ -103,16 +123,6 @@ static const char *edac_caps[] = {
  
  
  
-/*
- * /sys/devices/system/edac/mc;
- *     data structures and methods
- */
-static ssize_t memctrl_int_show(void *ptr, char *buffer)
-{
-       int *value = (int *)ptr;
-       return sprintf(buffer, "%u\n", *value);
-}
-
  static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count)
  {
         int *value = (int *)ptr;
@@ -123,23 +133,6 @@ static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count)
         return count;
  }
  
-/*
- * mc poll_msec time value
- */
-static ssize_t poll_msec_int_store(void *ptr, const char *buffer, size_t count)
-{
-       int *value = (int *)ptr;
-
-       if (isdigit(*buffer)) {
-               *value = simple_strtoul(buffer, NULL, 0);
-
-               /* notify edac_mc engine to reset the poll period */
-               edac_mc_reset_delay_period(*value);
-       }
-
-       return count;
-}
-
  
  /* EDAC sysfs CSROW data structures and methods
   */
@@ -185,7 +178,11 @@ static ssize_t csrow_edac_mode_show(struct csrow_info *csrow, char *data,
  static ssize_t channel_dimm_label_show(struct csrow_info *csrow,
                                 char *data, int channel)
  {
-       return snprintf(data, EDAC_MC_LABEL_LEN, "%s",
+       /* if field has not been initialized, there is nothing to send */
+       if (!csrow->channels[channel].label[0])
+               return 0;
+
+       return snprintf(data, EDAC_MC_LABEL_LEN, "%s\n",
                         csrow->channels[channel].label);
  }
  
@@ -649,98 +646,10 @@ static struct kobj_type ktype_mci = {
         .default_attrs = (struct attribute **)mci_attr,
  };
  
-/* show/store, tables, etc for the MC kset */
-
-
-struct memctrl_dev_attribute {
-       struct attribute attr;
-       void *value;
-        ssize_t(*show) (void *, char *);
-        ssize_t(*store) (void *, const char *, size_t);
-};
-
-/* Set of show/store abstract level functions for memory control object */
-static ssize_t memctrl_dev_show(struct kobject *kobj,
-                               struct attribute *attr, char *buffer)
-{
-       struct memctrl_dev_attribute *memctrl_dev;
-       memctrl_dev = (struct memctrl_dev_attribute *)attr;
-
-       if (memctrl_dev->show)
-               return memctrl_dev->show(memctrl_dev->value, buffer);
-
-       return -EIO;
-}
-
-static ssize_t memctrl_dev_store(struct kobject *kobj, struct attribute *attr,
-                                const char *buffer, size_t count)
-{
-       struct memctrl_dev_attribute *memctrl_dev;
-       memctrl_dev = (struct memctrl_dev_attribute *)attr;
-
-       if (memctrl_dev->store)
-               return memctrl_dev->store(memctrl_dev->value, buffer, count);
-
-       return -EIO;
-}
-
-static struct sysfs_ops memctrlfs_ops = {
-       .show = memctrl_dev_show,
-       .store = memctrl_dev_store
-};
-
-#define MEMCTRL_ATTR(_name, _mode, _show, _store)                      \
-static struct memctrl_dev_attribute attr_##_name = {                   \
-       .attr = {.name = __stringify(_name), .mode = _mode },   \
-       .value  = &_name,                                       \
-       .show   = _show,                                        \
-       .store  = _store,                                       \
-};
-
-#define MEMCTRL_STRING_ATTR(_name, _data, _mode, _show, _store)        \
-static struct memctrl_dev_attribute attr_##_name = {                   \
-       .attr = {.name = __stringify(_name), .mode = _mode },   \
-       .value  = _data,                                        \
-       .show   = _show,                                        \
-       .store  = _store,                                       \
-};
-
-/* csrow<id> control files */
-MEMCTRL_ATTR(edac_mc_panic_on_ue,
-       S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store);
-
-MEMCTRL_ATTR(edac_mc_log_ue,
-       S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store);
-
-MEMCTRL_ATTR(edac_mc_log_ce,
-       S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store);
-
-MEMCTRL_ATTR(edac_mc_poll_msec,
-       S_IRUGO | S_IWUSR, memctrl_int_show, poll_msec_int_store);
-
-/* Base Attributes of the memory ECC object */
-static struct memctrl_dev_attribute *memctrl_attr[] = {
-       &attr_edac_mc_panic_on_ue,
-       &attr_edac_mc_log_ue,
-       &attr_edac_mc_log_ce,
-       &attr_edac_mc_poll_msec,
-       NULL,
-};
-
-
-/* the ktype for the mc_kset internal kobj */
-static struct kobj_type ktype_mc_set_attribs = {
-       .sysfs_ops = &memctrlfs_ops,
-       .default_attrs = (struct attribute **)memctrl_attr,
-};
-
  /* EDAC memory controller sysfs kset:
   *     /sys/devices/system/edac/mc
   */
-static struct kset mc_kset = {
-       .kobj = {.ktype = &ktype_mc_set_attribs },
-};
-
+static struct kset *mc_kset;
  
  /*
   * edac_mc_register_sysfs_main_kobj
@@ -771,7 +680,7 @@ int edac_mc_register_sysfs_main_kobj(struct mem_ctl_info *mci)
         }
  
         /* this instance become part of the mc_kset */
-       kobj_mci->kset = &mc_kset;
+       kobj_mci->kset = mc_kset;
  
         /* register the mc<id> kobject to the mc_kset */
         err = kobject_init_and_add(kobj_mci, &ktype_mci, NULL,
@@ -1001,12 +910,9 @@ int edac_sysfs_setup_mc_kset(void)
         }
  
         /* Init the MC's kobject */
-       kobject_set_name(&mc_kset.kobj, "mc");
-       mc_kset.kobj.parent = &edac_class->kset.kobj;
-
-       /* register the mc_kset */
-       err = kset_register(&mc_kset);
-       if (err) {
+       mc_kset = kset_create_and_add("mc", NULL, &edac_class->kset.kobj);
+       if (!mc_kset) {
+               err = -ENOMEM;
                 debugf1("%s() Failed to register '.../edac/mc'\n", __func__);
                 goto fail_out;
         }
@@ -1028,6 +934,6 @@ fail_out:
   */
  void edac_sysfs_teardown_mc_kset(void)
  {
-       kset_unregister(&mc_kset);
+       kset_unregister(mc_kset);
  }
  
diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c

index 2c1fa1bb6df2fe87dbc96b3c511c24e509c43cf4..5c153dccc95e29ad2ac92658a5dec9a97da7d30a 100644 (file)
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -28,7 +28,7 @@ static int edac_pci_poll_msec = 1000; /* one second workq period */
  static atomic_t pci_parity_count = ATOMIC_INIT(0);
  static atomic_t pci_nonparity_count = ATOMIC_INIT(0);
  
-static struct kobject edac_pci_top_main_kobj;
+static struct kobject *edac_pci_top_main_kobj;
  static atomic_t edac_pci_sysfs_refcount = ATOMIC_INIT(0);
  
  /* getter functions for the data variables */
@@ -83,7 +83,7 @@ static void edac_pci_instance_release(struct kobject *kobj)
         pci = to_instance(kobj);
  
         /* decrement reference count on top main kobj */
-       kobject_put(&edac_pci_top_main_kobj);
+       kobject_put(edac_pci_top_main_kobj);
  
         kfree(pci);     /* Free the control struct */
  }
@@ -166,7 +166,7 @@ static int edac_pci_create_instance_kobj(struct edac_pci_ctl_info *pci, int idx)
          * track the number of PCI instances we have, and thus nest
          * properly on keeping the module loaded
          */
-       main_kobj = kobject_get(&edac_pci_top_main_kobj);
+       main_kobj = kobject_get(edac_pci_top_main_kobj);
         if (!main_kobj) {
                 err = -ENODEV;
                 goto error_out;
@@ -174,11 +174,11 @@ static int edac_pci_create_instance_kobj(struct edac_pci_ctl_info *pci, int idx)
  
         /* And now register this new kobject under the main kobj */
         err = kobject_init_and_add(&pci->kobj, &ktype_pci_instance,
-                                  &edac_pci_top_main_kobj, "pci%d", idx);
+                                  edac_pci_top_main_kobj, "pci%d", idx);
         if (err != 0) {
                 debugf2("%s() failed to register instance pci%d\n",
                         __func__, idx);
-               kobject_put(&edac_pci_top_main_kobj);
+               kobject_put(edac_pci_top_main_kobj);
                 goto error_out;
         }
  
@@ -316,9 +316,10 @@ static struct edac_pci_dev_attribute *edac_pci_attr[] = {
   */
  static void edac_pci_release_main_kobj(struct kobject *kobj)
  {
-
         debugf0("%s() here to module_put(THIS_MODULE)\n", __func__);
  
+       kfree(kobj);
+
         /* last reference to top EDAC PCI kobject has been removed,
          * NOW release our ref count on the core module
          */
@@ -369,8 +370,16 @@ static int edac_pci_main_kobj_setup(void)
                 goto decrement_count_fail;
         }
  
+       edac_pci_top_main_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
+       if (!edac_pci_top_main_kobj) {
+               debugf1("Failed to allocate\n");
+               err = -ENOMEM;
+               goto kzalloc_fail;
+       }
+
         /* Instanstiate the pci object */
-       err = kobject_init_and_add(&edac_pci_top_main_kobj, &ktype_edac_pci_main_kobj,
+       err = kobject_init_and_add(edac_pci_top_main_kobj,
+                                  &ktype_edac_pci_main_kobj,
                                    &edac_class->kset.kobj, "pci");
         if (err) {
                 debugf1("Failed to register '.../edac/pci'\n");
@@ -381,13 +390,16 @@ static int edac_pci_main_kobj_setup(void)
          * for EDAC PCI, then edac_pci_main_kobj_teardown()
          * must be used, for resources to be cleaned up properly
          */
-       kobject_uevent(&edac_pci_top_main_kobj, KOBJ_ADD);
+       kobject_uevent(edac_pci_top_main_kobj, KOBJ_ADD);
         debugf1("Registered '.../edac/pci' kobject\n");
  
         return 0;
  
         /* Error unwind statck */
  kobject_init_and_add_fail:
+       kfree(edac_pci_top_main_kobj);
+
+kzalloc_fail:
         module_put(THIS_MODULE);
  
  decrement_count_fail:
@@ -414,7 +426,7 @@ static void edac_pci_main_kobj_teardown(void)
         if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) {
                 debugf0("%s() called kobject_put on main kobj\n",
                         __func__);
-               kobject_put(&edac_pci_top_main_kobj);
+               kobject_put(edac_pci_top_main_kobj);
         }
  }
  
diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c

new file mode 100644 (file)

index 0000000..22db05a
--- /dev/null
+++ b/drivers/edac/i5100_edac.c
@@ -0,0 +1,981 @@
+/*
+ * Intel 5100 Memory Controllers kernel module
+ *
+ * This file may be distributed under the terms of the
+ * GNU General Public License.
+ *
+ * This module is based on the following document:
+ *
+ * Intel 5100X Chipset Memory Controller Hub (MCH) - Datasheet
+ *      http://download.intel.com/design/chipsets/datashts/318378.pdf
+ *
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/slab.h>
+#include <linux/edac.h>
+#include <linux/delay.h>
+#include <linux/mmzone.h>
+
+#include "edac_core.h"
+
+/* register addresses */
+
+/* device 16, func 1 */
+#define I5100_MC               0x40    /* Memory Control Register */
+#define I5100_MS               0x44    /* Memory Status Register */
+#define I5100_SPDDATA          0x48    /* Serial Presence Detect Status Reg */
+#define I5100_SPDCMD           0x4c    /* Serial Presence Detect Command Reg */
+#define I5100_TOLM             0x6c    /* Top of Low Memory */
+#define I5100_MIR0             0x80    /* Memory Interleave Range 0 */
+#define I5100_MIR1             0x84    /* Memory Interleave Range 1 */
+#define I5100_AMIR_0           0x8c    /* Adjusted Memory Interleave Range 0 */
+#define I5100_AMIR_1           0x90    /* Adjusted Memory Interleave Range 1 */
+#define I5100_FERR_NF_MEM      0xa0    /* MC First Non Fatal Errors */
+#define                I5100_FERR_NF_MEM_M16ERR_MASK   (1 << 16)
+#define                I5100_FERR_NF_MEM_M15ERR_MASK   (1 << 15)
+#define                I5100_FERR_NF_MEM_M14ERR_MASK   (1 << 14)
+#define                I5100_FERR_NF_MEM_M12ERR_MASK   (1 << 12)
+#define                I5100_FERR_NF_MEM_M11ERR_MASK   (1 << 11)
+#define                I5100_FERR_NF_MEM_M10ERR_MASK   (1 << 10)
+#define                I5100_FERR_NF_MEM_M6ERR_MASK    (1 << 6)
+#define                I5100_FERR_NF_MEM_M5ERR_MASK    (1 << 5)
+#define                I5100_FERR_NF_MEM_M4ERR_MASK    (1 << 4)
+#define                I5100_FERR_NF_MEM_M1ERR_MASK    1
+#define                I5100_FERR_NF_MEM_ANY_MASK      \
+                       (I5100_FERR_NF_MEM_M16ERR_MASK | \
+                       I5100_FERR_NF_MEM_M15ERR_MASK | \
+                       I5100_FERR_NF_MEM_M14ERR_MASK | \
+                       I5100_FERR_NF_MEM_M12ERR_MASK | \
+                       I5100_FERR_NF_MEM_M11ERR_MASK | \
+                       I5100_FERR_NF_MEM_M10ERR_MASK | \
+                       I5100_FERR_NF_MEM_M6ERR_MASK | \
+                       I5100_FERR_NF_MEM_M5ERR_MASK | \
+                       I5100_FERR_NF_MEM_M4ERR_MASK | \
+                       I5100_FERR_NF_MEM_M1ERR_MASK)
+#define        I5100_NERR_NF_MEM       0xa4    /* MC Next Non-Fatal Errors */
+#define I5100_EMASK_MEM                0xa8    /* MC Error Mask Register */
+
+/* device 21 and 22, func 0 */
+#define I5100_MTR_0    0x154   /* Memory Technology Registers 0-3 */
+#define I5100_DMIR     0x15c   /* DIMM Interleave Range */
+#define        I5100_VALIDLOG  0x18c   /* Valid Log Markers */
+#define        I5100_NRECMEMA  0x190   /* Non-Recoverable Memory Error Log Reg A */
+#define        I5100_NRECMEMB  0x194   /* Non-Recoverable Memory Error Log Reg B */
+#define        I5100_REDMEMA   0x198   /* Recoverable Memory Data Error Log Reg A */
+#define        I5100_REDMEMB   0x19c   /* Recoverable Memory Data Error Log Reg B */
+#define        I5100_RECMEMA   0x1a0   /* Recoverable Memory Error Log Reg A */
+#define        I5100_RECMEMB   0x1a4   /* Recoverable Memory Error Log Reg B */
+#define I5100_MTR_4    0x1b0   /* Memory Technology Registers 4,5 */
+
+/* bit field accessors */
+
+static inline u32 i5100_mc_errdeten(u32 mc)
+{
+       return mc >> 5 & 1;
+}
+
+static inline u16 i5100_spddata_rdo(u16 a)
+{
+       return a >> 15 & 1;
+}
+
+static inline u16 i5100_spddata_sbe(u16 a)
+{
+       return a >> 13 & 1;
+}
+
+static inline u16 i5100_spddata_busy(u16 a)
+{
+       return a >> 12 & 1;
+}
+
+static inline u16 i5100_spddata_data(u16 a)
+{
+       return a & ((1 << 8) - 1);
+}
+
+static inline u32 i5100_spdcmd_create(u32 dti, u32 ckovrd, u32 sa, u32 ba,
+                                     u32 data, u32 cmd)
+{
+       return  ((dti & ((1 << 4) - 1))  << 28) |
+               ((ckovrd & 1)            << 27) |
+               ((sa & ((1 << 3) - 1))   << 24) |
+               ((ba & ((1 << 8) - 1))   << 16) |
+               ((data & ((1 << 8) - 1)) <<  8) |
+               (cmd & 1);
+}
+
+static inline u16 i5100_tolm_tolm(u16 a)
+{
+       return a >> 12 & ((1 << 4) - 1);
+}
+
+static inline u16 i5100_mir_limit(u16 a)
+{
+       return a >> 4 & ((1 << 12) - 1);
+}
+
+static inline u16 i5100_mir_way1(u16 a)
+{
+       return a >> 1 & 1;
+}
+
+static inline u16 i5100_mir_way0(u16 a)
+{
+       return a & 1;
+}
+
+static inline u32 i5100_ferr_nf_mem_chan_indx(u32 a)
+{
+       return a >> 28 & 1;
+}
+
+static inline u32 i5100_ferr_nf_mem_any(u32 a)
+{
+       return a & I5100_FERR_NF_MEM_ANY_MASK;
+}
+
+static inline u32 i5100_nerr_nf_mem_any(u32 a)
+{
+       return i5100_ferr_nf_mem_any(a);
+}
+
+static inline u32 i5100_dmir_limit(u32 a)
+{
+       return a >> 16 & ((1 << 11) - 1);
+}
+
+static inline u32 i5100_dmir_rank(u32 a, u32 i)
+{
+       return a >> (4 * i) & ((1 << 2) - 1);
+}
+
+static inline u16 i5100_mtr_present(u16 a)
+{
+       return a >> 10 & 1;
+}
+
+static inline u16 i5100_mtr_ethrottle(u16 a)
+{
+       return a >> 9 & 1;
+}
+
+static inline u16 i5100_mtr_width(u16 a)
+{
+       return a >> 8 & 1;
+}
+
+static inline u16 i5100_mtr_numbank(u16 a)
+{
+       return a >> 6 & 1;
+}
+
+static inline u16 i5100_mtr_numrow(u16 a)
+{
+       return a >> 2 & ((1 << 2) - 1);
+}
+
+static inline u16 i5100_mtr_numcol(u16 a)
+{
+       return a & ((1 << 2) - 1);
+}
+
+
+static inline u32 i5100_validlog_redmemvalid(u32 a)
+{
+       return a >> 2 & 1;
+}
+
+static inline u32 i5100_validlog_recmemvalid(u32 a)
+{
+       return a >> 1 & 1;
+}
+
+static inline u32 i5100_validlog_nrecmemvalid(u32 a)
+{
+       return a & 1;
+}
+
+static inline u32 i5100_nrecmema_merr(u32 a)
+{
+       return a >> 15 & ((1 << 5) - 1);
+}
+
+static inline u32 i5100_nrecmema_bank(u32 a)
+{
+       return a >> 12 & ((1 << 3) - 1);
+}
+
+static inline u32 i5100_nrecmema_rank(u32 a)
+{
+       return a >>  8 & ((1 << 3) - 1);
+}
+
+static inline u32 i5100_nrecmema_dm_buf_id(u32 a)
+{
+       return a & ((1 << 8) - 1);
+}
+
+static inline u32 i5100_nrecmemb_cas(u32 a)
+{
+       return a >> 16 & ((1 << 13) - 1);
+}
+
+static inline u32 i5100_nrecmemb_ras(u32 a)
+{
+       return a & ((1 << 16) - 1);
+}
+
+static inline u32 i5100_redmemb_ecc_locator(u32 a)
+{
+       return a & ((1 << 18) - 1);
+}
+
+static inline u32 i5100_recmema_merr(u32 a)
+{
+       return i5100_nrecmema_merr(a);
+}
+
+static inline u32 i5100_recmema_bank(u32 a)
+{
+       return i5100_nrecmema_bank(a);
+}
+
+static inline u32 i5100_recmema_rank(u32 a)
+{
+       return i5100_nrecmema_rank(a);
+}
+
+static inline u32 i5100_recmema_dm_buf_id(u32 a)
+{
+       return i5100_nrecmema_dm_buf_id(a);
+}
+
+static inline u32 i5100_recmemb_cas(u32 a)
+{
+       return i5100_nrecmemb_cas(a);
+}
+
+static inline u32 i5100_recmemb_ras(u32 a)
+{
+       return i5100_nrecmemb_ras(a);
+}
+
+/* some generic limits */
+#define I5100_MAX_RANKS_PER_CTLR       6
+#define I5100_MAX_CTLRS                        2
+#define I5100_MAX_RANKS_PER_DIMM       4
+#define I5100_DIMM_ADDR_LINES          (6 - 3) /* 64 bits / 8 bits per byte */
+#define I5100_MAX_DIMM_SLOTS_PER_CTLR  4
+#define I5100_MAX_RANK_INTERLEAVE      4
+#define I5100_MAX_DMIRS                        5
+
+struct i5100_priv {
+       /* ranks on each dimm -- 0 maps to not present -- obtained via SPD */
+       int dimm_numrank[I5100_MAX_CTLRS][I5100_MAX_DIMM_SLOTS_PER_CTLR];
+
+       /*
+        * mainboard chip select map -- maps i5100 chip selects to
+        * DIMM slot chip selects.  In the case of only 4 ranks per
+        * controller, the mapping is fairly obvious but not unique.
+        * we map -1 -> NC and assume both controllers use the same
+        * map...
+        *
+        */
+       int dimm_csmap[I5100_MAX_DIMM_SLOTS_PER_CTLR][I5100_MAX_RANKS_PER_DIMM];
+
+       /* memory interleave range */
+       struct {
+               u64      limit;
+               unsigned way[2];
+       } mir[I5100_MAX_CTLRS];
+
+       /* adjusted memory interleave range register */
+       unsigned amir[I5100_MAX_CTLRS];
+
+       /* dimm interleave range */
+       struct {
+               unsigned rank[I5100_MAX_RANK_INTERLEAVE];
+               u64      limit;
+       } dmir[I5100_MAX_CTLRS][I5100_MAX_DMIRS];
+
+       /* memory technology registers... */
+       struct {
+               unsigned present;       /* 0 or 1 */
+               unsigned ethrottle;     /* 0 or 1 */
+               unsigned width;         /* 4 or 8 bits  */
+               unsigned numbank;       /* 2 or 3 lines */
+               unsigned numrow;        /* 13 .. 16 lines */
+               unsigned numcol;        /* 11 .. 12 lines */
+       } mtr[I5100_MAX_CTLRS][I5100_MAX_RANKS_PER_CTLR];
+
+       u64 tolm;               /* top of low memory in bytes */
+       unsigned ranksperctlr;  /* number of ranks per controller */
+
+       struct pci_dev *mc;     /* device 16 func 1 */
+       struct pci_dev *ch0mm;  /* device 21 func 0 */
+       struct pci_dev *ch1mm;  /* device 22 func 0 */
+};
+
+/* map a rank/ctlr to a slot number on the mainboard */
+static int i5100_rank_to_slot(const struct mem_ctl_info *mci,
+                             int ctlr, int rank)
+{
+       const struct i5100_priv *priv = mci->pvt_info;
+       int i;
+
+       for (i = 0; i < I5100_MAX_DIMM_SLOTS_PER_CTLR; i++) {
+               int j;
+               const int numrank = priv->dimm_numrank[ctlr][i];
+
+               for (j = 0; j < numrank; j++)
+                       if (priv->dimm_csmap[i][j] == rank)
+                               return i * 2 + ctlr;
+       }
+
+       return -1;
+}
+
+static const char *i5100_err_msg(unsigned err)
+{
+       static const char *merrs[] = {
+               "unknown", /* 0 */
+               "uncorrectable data ECC on replay", /* 1 */
+               "unknown", /* 2 */
+               "unknown", /* 3 */
+               "aliased uncorrectable demand data ECC", /* 4 */
+               "aliased uncorrectable spare-copy data ECC", /* 5 */
+               "aliased uncorrectable patrol data ECC", /* 6 */
+               "unknown", /* 7 */
+               "unknown", /* 8 */
+               "unknown", /* 9 */
+               "non-aliased uncorrectable demand data ECC", /* 10 */
+               "non-aliased uncorrectable spare-copy data ECC", /* 11 */
+               "non-aliased uncorrectable patrol data ECC", /* 12 */
+               "unknown", /* 13 */
+               "correctable demand data ECC", /* 14 */
+               "correctable spare-copy data ECC", /* 15 */
+               "correctable patrol data ECC", /* 16 */
+               "unknown", /* 17 */
+               "SPD protocol error", /* 18 */
+               "unknown", /* 19 */
+               "spare copy initiated", /* 20 */
+               "spare copy completed", /* 21 */
+       };
+       unsigned i;
+
+       for (i = 0; i < ARRAY_SIZE(merrs); i++)
+               if (1 << i & err)
+                       return merrs[i];
+
+       return "none";
+}
+
+/* convert csrow index into a rank (per controller -- 0..5) */
+static int i5100_csrow_to_rank(const struct mem_ctl_info *mci, int csrow)
+{
+       const struct i5100_priv *priv = mci->pvt_info;
+
+       return csrow % priv->ranksperctlr;
+}
+
+/* convert csrow index into a controller (0..1) */
+static int i5100_csrow_to_cntlr(const struct mem_ctl_info *mci, int csrow)
+{
+       const struct i5100_priv *priv = mci->pvt_info;
+
+       return csrow / priv->ranksperctlr;
+}
+
+static unsigned i5100_rank_to_csrow(const struct mem_ctl_info *mci,
+                                   int ctlr, int rank)
+{
+       const struct i5100_priv *priv = mci->pvt_info;
+
+       return ctlr * priv->ranksperctlr + rank;
+}
+
+static void i5100_handle_ce(struct mem_ctl_info *mci,
+                           int ctlr,
+                           unsigned bank,
+                           unsigned rank,
+                           unsigned long syndrome,
+                           unsigned cas,
+                           unsigned ras,
+                           const char *msg)
+{
+       const int csrow = i5100_rank_to_csrow(mci, ctlr, rank);
+
+       printk(KERN_ERR
+               "CE ctlr %d, bank %u, rank %u, syndrome 0x%lx, "
+               "cas %u, ras %u, csrow %u, label \"%s\": %s\n",
+               ctlr, bank, rank, syndrome, cas, ras,
+               csrow, mci->csrows[csrow].channels[0].label, msg);
+
+       mci->ce_count++;
+       mci->csrows[csrow].ce_count++;
+       mci->csrows[csrow].channels[0].ce_count++;
+}
+
+static void i5100_handle_ue(struct mem_ctl_info *mci,
+                           int ctlr,
+                           unsigned bank,
+                           unsigned rank,
+                           unsigned long syndrome,
+                           unsigned cas,
+                           unsigned ras,
+                           const char *msg)
+{
+       const int csrow = i5100_rank_to_csrow(mci, ctlr, rank);
+
+       printk(KERN_ERR
+               "UE ctlr %d, bank %u, rank %u, syndrome 0x%lx, "
+               "cas %u, ras %u, csrow %u, label \"%s\": %s\n",
+               ctlr, bank, rank, syndrome, cas, ras,
+               csrow, mci->csrows[csrow].channels[0].label, msg);
+
+       mci->ue_count++;
+       mci->csrows[csrow].ue_count++;
+}
+
+static void i5100_read_log(struct mem_ctl_info *mci, int ctlr,
+                          u32 ferr, u32 nerr)
+{
+       struct i5100_priv *priv = mci->pvt_info;
+       struct pci_dev *pdev = (ctlr) ? priv->ch1mm : priv->ch0mm;
+       u32 dw;
+       u32 dw2;
+       unsigned syndrome = 0;
+       unsigned ecc_loc = 0;
+       unsigned merr;
+       unsigned bank;
+       unsigned rank;
+       unsigned cas;
+       unsigned ras;
+
+       pci_read_config_dword(pdev, I5100_VALIDLOG, &dw);
+
+       if (i5100_validlog_redmemvalid(dw)) {
+               pci_read_config_dword(pdev, I5100_REDMEMA, &dw2);
+               syndrome = dw2;
+               pci_read_config_dword(pdev, I5100_REDMEMB, &dw2);
+               ecc_loc = i5100_redmemb_ecc_locator(dw2);
+       }
+
+       if (i5100_validlog_recmemvalid(dw)) {
+               const char *msg;
+
+               pci_read_config_dword(pdev, I5100_RECMEMA, &dw2);
+               merr = i5100_recmema_merr(dw2);
+               bank = i5100_recmema_bank(dw2);
+               rank = i5100_recmema_rank(dw2);
+
+               pci_read_config_dword(pdev, I5100_RECMEMB, &dw2);
+               cas = i5100_recmemb_cas(dw2);
+               ras = i5100_recmemb_ras(dw2);
+
+               /* FIXME:  not really sure if this is what merr is...
+                */
+               if (!merr)
+                       msg = i5100_err_msg(ferr);
+               else
+                       msg = i5100_err_msg(nerr);
+
+               i5100_handle_ce(mci, ctlr, bank, rank, syndrome, cas, ras, msg);
+       }
+
+       if (i5100_validlog_nrecmemvalid(dw)) {
+               const char *msg;
+
+               pci_read_config_dword(pdev, I5100_NRECMEMA, &dw2);
+               merr = i5100_nrecmema_merr(dw2);
+               bank = i5100_nrecmema_bank(dw2);
+               rank = i5100_nrecmema_rank(dw2);
+
+               pci_read_config_dword(pdev, I5100_NRECMEMB, &dw2);
+               cas = i5100_nrecmemb_cas(dw2);
+               ras = i5100_nrecmemb_ras(dw2);
+
+               /* FIXME:  not really sure if this is what merr is...
+                */
+               if (!merr)
+                       msg = i5100_err_msg(ferr);
+               else
+                       msg = i5100_err_msg(nerr);
+
+               i5100_handle_ue(mci, ctlr, bank, rank, syndrome, cas, ras, msg);
+       }
+
+       pci_write_config_dword(pdev, I5100_VALIDLOG, dw);
+}
+
+static void i5100_check_error(struct mem_ctl_info *mci)
+{
+       struct i5100_priv *priv = mci->pvt_info;
+       u32 dw;
+
+
+       pci_read_config_dword(priv->mc, I5100_FERR_NF_MEM, &dw);
+       if (i5100_ferr_nf_mem_any(dw)) {
+               u32 dw2;
+
+               pci_read_config_dword(priv->mc, I5100_NERR_NF_MEM, &dw2);
+               if (dw2)
+                       pci_write_config_dword(priv->mc, I5100_NERR_NF_MEM,
+                                              dw2);
+               pci_write_config_dword(priv->mc, I5100_FERR_NF_MEM, dw);
+
+               i5100_read_log(mci, i5100_ferr_nf_mem_chan_indx(dw),
+                              i5100_ferr_nf_mem_any(dw),
+                              i5100_nerr_nf_mem_any(dw2));
+       }
+}
+
+static struct pci_dev *pci_get_device_func(unsigned vendor,
+                                          unsigned device,
+                                          unsigned func)
+{
+       struct pci_dev *ret = NULL;
+
+       while (1) {
+               ret = pci_get_device(vendor, device, ret);
+
+               if (!ret)
+                       break;
+
+               if (PCI_FUNC(ret->devfn) == func)
+                       break;
+       }
+
+       return ret;
+}
+
+static unsigned long __devinit i5100_npages(struct mem_ctl_info *mci,
+                                           int csrow)
+{
+       struct i5100_priv *priv = mci->pvt_info;
+       const unsigned ctlr_rank = i5100_csrow_to_rank(mci, csrow);
+       const unsigned ctlr = i5100_csrow_to_cntlr(mci, csrow);
+       unsigned addr_lines;
+
+       /* dimm present? */
+       if (!priv->mtr[ctlr][ctlr_rank].present)
+               return 0ULL;
+
+       addr_lines =
+               I5100_DIMM_ADDR_LINES +
+               priv->mtr[ctlr][ctlr_rank].numcol +
+               priv->mtr[ctlr][ctlr_rank].numrow +
+               priv->mtr[ctlr][ctlr_rank].numbank;
+
+       return (unsigned long)
+               ((unsigned long long) (1ULL << addr_lines) / PAGE_SIZE);
+}
+
+static void __devinit i5100_init_mtr(struct mem_ctl_info *mci)
+{
+       struct i5100_priv *priv = mci->pvt_info;
+       struct pci_dev *mms[2] = { priv->ch0mm, priv->ch1mm };
+       int i;
+
+       for (i = 0; i < I5100_MAX_CTLRS; i++) {
+               int j;
+               struct pci_dev *pdev = mms[i];
+
+               for (j = 0; j < I5100_MAX_RANKS_PER_CTLR; j++) {
+                       const unsigned addr =
+                               (j < 4) ? I5100_MTR_0 + j * 2 :
+                                         I5100_MTR_4 + (j - 4) * 2;
+                       u16 w;
+
+                       pci_read_config_word(pdev, addr, &w);
+
+                       priv->mtr[i][j].present = i5100_mtr_present(w);
+                       priv->mtr[i][j].ethrottle = i5100_mtr_ethrottle(w);
+                       priv->mtr[i][j].width = 4 + 4 * i5100_mtr_width(w);
+                       priv->mtr[i][j].numbank = 2 + i5100_mtr_numbank(w);
+                       priv->mtr[i][j].numrow = 13 + i5100_mtr_numrow(w);
+                       priv->mtr[i][j].numcol = 10 + i5100_mtr_numcol(w);
+               }
+       }
+}
+
+/*
+ * FIXME: make this into a real i2c adapter (so that dimm-decode
+ * will work)?
+ */
+static int i5100_read_spd_byte(const struct mem_ctl_info *mci,
+                              u8 ch, u8 slot, u8 addr, u8 *byte)
+{
+       struct i5100_priv *priv = mci->pvt_info;
+       u16 w;
+       unsigned long et;
+
+       pci_read_config_word(priv->mc, I5100_SPDDATA, &w);
+       if (i5100_spddata_busy(w))
+               return -1;
+
+       pci_write_config_dword(priv->mc, I5100_SPDCMD,
+                              i5100_spdcmd_create(0xa, 1, ch * 4 + slot, addr,
+                                                  0, 0));
+
+       /* wait up to 100ms */
+       et = jiffies + HZ / 10;
+       udelay(100);
+       while (1) {
+               pci_read_config_word(priv->mc, I5100_SPDDATA, &w);
+               if (!i5100_spddata_busy(w))
+                       break;
+               udelay(100);
+       }
+
+       if (!i5100_spddata_rdo(w) || i5100_spddata_sbe(w))
+               return -1;
+
+       *byte = i5100_spddata_data(w);
+
+       return 0;
+}
+
+/*
+ * fill dimm chip select map
+ *
+ * FIXME:
+ *   o only valid for 4 ranks per controller
+ *   o not the only way to may chip selects to dimm slots
+ *   o investigate if there is some way to obtain this map from the bios
+ */
+static void __devinit i5100_init_dimm_csmap(struct mem_ctl_info *mci)
+{
+       struct i5100_priv *priv = mci->pvt_info;
+       int i;
+
+       WARN_ON(priv->ranksperctlr != 4);
+
+       for (i = 0; i < I5100_MAX_DIMM_SLOTS_PER_CTLR; i++) {
+               int j;
+
+               for (j = 0; j < I5100_MAX_RANKS_PER_DIMM; j++)
+                       priv->dimm_csmap[i][j] = -1; /* default NC */
+       }
+
+       /* only 2 chip selects per slot... */
+       priv->dimm_csmap[0][0] = 0;
+       priv->dimm_csmap[0][1] = 3;
+       priv->dimm_csmap[1][0] = 1;
+       priv->dimm_csmap[1][1] = 2;
+       priv->dimm_csmap[2][0] = 2;
+       priv->dimm_csmap[3][0] = 3;
+}
+
+static void __devinit i5100_init_dimm_layout(struct pci_dev *pdev,
+                                            struct mem_ctl_info *mci)
+{
+       struct i5100_priv *priv = mci->pvt_info;
+       int i;
+
+       for (i = 0; i < I5100_MAX_CTLRS; i++) {
+               int j;
+
+               for (j = 0; j < I5100_MAX_DIMM_SLOTS_PER_CTLR; j++) {
+                       u8 rank;
+
+                       if (i5100_read_spd_byte(mci, i, j, 5, &rank) < 0)
+                               priv->dimm_numrank[i][j] = 0;
+                       else
+                               priv->dimm_numrank[i][j] = (rank & 3) + 1;
+               }
+       }
+
+       i5100_init_dimm_csmap(mci);
+}
+
+static void __devinit i5100_init_interleaving(struct pci_dev *pdev,
+                                             struct mem_ctl_info *mci)
+{
+       u16 w;
+       u32 dw;
+       struct i5100_priv *priv = mci->pvt_info;
+       struct pci_dev *mms[2] = { priv->ch0mm, priv->ch1mm };
+       int i;
+
+       pci_read_config_word(pdev, I5100_TOLM, &w);
+       priv->tolm = (u64) i5100_tolm_tolm(w) * 256 * 1024 * 1024;
+
+       pci_read_config_word(pdev, I5100_MIR0, &w);
+       priv->mir[0].limit = (u64) i5100_mir_limit(w) << 28;
+       priv->mir[0].way[1] = i5100_mir_way1(w);
+       priv->mir[0].way[0] = i5100_mir_way0(w);
+
+       pci_read_config_word(pdev, I5100_MIR1, &w);
+       priv->mir[1].limit = (u64) i5100_mir_limit(w) << 28;
+       priv->mir[1].way[1] = i5100_mir_way1(w);
+       priv->mir[1].way[0] = i5100_mir_way0(w);
+
+       pci_read_config_word(pdev, I5100_AMIR_0, &w);
+       priv->amir[0] = w;
+       pci_read_config_word(pdev, I5100_AMIR_1, &w);
+       priv->amir[1] = w;
+
+       for (i = 0; i < I5100_MAX_CTLRS; i++) {
+               int j;
+
+               for (j = 0; j < 5; j++) {
+                       int k;
+
+                       pci_read_config_dword(mms[i], I5100_DMIR + j * 4, &dw);
+
+                       priv->dmir[i][j].limit =
+                               (u64) i5100_dmir_limit(dw) << 28;
+                       for (k = 0; k < I5100_MAX_RANKS_PER_DIMM; k++)
+                               priv->dmir[i][j].rank[k] =
+                                       i5100_dmir_rank(dw, k);
+               }
+       }
+
+       i5100_init_mtr(mci);
+}
+
+static void __devinit i5100_init_csrows(struct mem_ctl_info *mci)
+{
+       int i;
+       unsigned long total_pages = 0UL;
+       struct i5100_priv *priv = mci->pvt_info;
+
+       for (i = 0; i < mci->nr_csrows; i++) {
+               const unsigned long npages = i5100_npages(mci, i);
+               const unsigned cntlr = i5100_csrow_to_cntlr(mci, i);
+               const unsigned rank = i5100_csrow_to_rank(mci, i);
+
+               if (!npages)
+                       continue;
+
+               /*
+                * FIXME: these two are totally bogus -- I don't see how to
+                * map them correctly to this structure...
+                */
+               mci->csrows[i].first_page = total_pages;
+               mci->csrows[i].last_page = total_pages + npages - 1;
+               mci->csrows[i].page_mask = 0UL;
+
+               mci->csrows[i].nr_pages = npages;
+               mci->csrows[i].grain = 32;
+               mci->csrows[i].csrow_idx = i;
+               mci->csrows[i].dtype =
+                       (priv->mtr[cntlr][rank].width == 4) ? DEV_X4 : DEV_X8;
+               mci->csrows[i].ue_count = 0;
+               mci->csrows[i].ce_count = 0;
+               mci->csrows[i].mtype = MEM_RDDR2;
+               mci->csrows[i].edac_mode = EDAC_SECDED;
+               mci->csrows[i].mci = mci;
+               mci->csrows[i].nr_channels = 1;
+               mci->csrows[i].channels[0].chan_idx = 0;
+               mci->csrows[i].channels[0].ce_count = 0;
+               mci->csrows[i].channels[0].csrow = mci->csrows + i;
+               snprintf(mci->csrows[i].channels[0].label,
+                        sizeof(mci->csrows[i].channels[0].label),
+                        "DIMM%u", i5100_rank_to_slot(mci, cntlr, rank));
+
+               total_pages += npages;
+       }
+}
+
+static int __devinit i5100_init_one(struct pci_dev *pdev,
+                                   const struct pci_device_id *id)
+{
+       int rc;
+       struct mem_ctl_info *mci;
+       struct i5100_priv *priv;
+       struct pci_dev *ch0mm, *ch1mm;
+       int ret = 0;
+       u32 dw;
+       int ranksperch;
+
+       if (PCI_FUNC(pdev->devfn) != 1)
+               return -ENODEV;
+
+       rc = pci_enable_device(pdev);
+       if (rc < 0) {
+               ret = rc;
+               goto bail;
+       }
+
+       /* ECC enabled? */
+       pci_read_config_dword(pdev, I5100_MC, &dw);
+       if (!i5100_mc_errdeten(dw)) {
+               printk(KERN_INFO "i5100_edac: ECC not enabled.\n");
+               ret = -ENODEV;
+               goto bail_pdev;
+       }
+
+       /* figure out how many ranks, from strapped state of 48GB_Mode input */
+       pci_read_config_dword(pdev, I5100_MS, &dw);
+       ranksperch = !!(dw & (1 << 8)) * 2 + 4;
+
+       if (ranksperch != 4) {
+               /* FIXME: get 6 ranks / controller to work - need hw... */
+               printk(KERN_INFO "i5100_edac: unsupported configuration.\n");
+               ret = -ENODEV;
+               goto bail_pdev;
+       }
+
+       /* enable error reporting... */
+       pci_read_config_dword(pdev, I5100_EMASK_MEM, &dw);
+       dw &= ~I5100_FERR_NF_MEM_ANY_MASK;
+       pci_write_config_dword(pdev, I5100_EMASK_MEM, dw);
+
+       /* device 21, func 0, Channel 0 Memory Map, Error Flag/Mask, etc... */
+       ch0mm = pci_get_device_func(PCI_VENDOR_ID_INTEL,
+                                   PCI_DEVICE_ID_INTEL_5100_21, 0);
+       if (!ch0mm) {
+               ret = -ENODEV;
+               goto bail_pdev;
+       }
+
+       rc = pci_enable_device(ch0mm);
+       if (rc < 0) {
+               ret = rc;
+               goto bail_ch0;
+       }
+
+       /* device 22, func 0, Channel 1 Memory Map, Error Flag/Mask, etc... */
+       ch1mm = pci_get_device_func(PCI_VENDOR_ID_INTEL,
+                                   PCI_DEVICE_ID_INTEL_5100_22, 0);
+       if (!ch1mm) {
+               ret = -ENODEV;
+               goto bail_disable_ch0;
+       }
+
+       rc = pci_enable_device(ch1mm);
+       if (rc < 0) {
+               ret = rc;
+               goto bail_ch1;
+       }
+
+       mci = edac_mc_alloc(sizeof(*priv), ranksperch * 2, 1, 0);
+       if (!mci) {
+               ret = -ENOMEM;
+               goto bail_disable_ch1;
+       }
+
+       mci->dev = &pdev->dev;
+
+       priv = mci->pvt_info;
+       priv->ranksperctlr = ranksperch;
+       priv->mc = pdev;
+       priv->ch0mm = ch0mm;
+       priv->ch1mm = ch1mm;
+
+       i5100_init_dimm_layout(pdev, mci);
+       i5100_init_interleaving(pdev, mci);
+
+       mci->mtype_cap = MEM_FLAG_FB_DDR2;
+       mci->edac_ctl_cap = EDAC_FLAG_SECDED;
+       mci->edac_cap = EDAC_FLAG_SECDED;
+       mci->mod_name = "i5100_edac.c";
+       mci->mod_ver = "not versioned";
+       mci->ctl_name = "i5100";
+       mci->dev_name = pci_name(pdev);
+       mci->ctl_page_to_phys = NULL;
+
+       mci->edac_check = i5100_check_error;
+
+       i5100_init_csrows(mci);
+
+       /* this strange construction seems to be in every driver, dunno why */
+       switch (edac_op_state) {
+       case EDAC_OPSTATE_POLL:
+       case EDAC_OPSTATE_NMI:
+               break;
+       default:
+               edac_op_state = EDAC_OPSTATE_POLL;
+               break;
+       }
+
+       if (edac_mc_add_mc(mci)) {
+               ret = -ENODEV;
+               goto bail_mc;
+       }
+
+       return ret;
+
+bail_mc:
+       edac_mc_free(mci);
+
+bail_disable_ch1:
+       pci_disable_device(ch1mm);
+
+bail_ch1:
+       pci_dev_put(ch1mm);
+
+bail_disable_ch0:
+       pci_disable_device(ch0mm);
+
+bail_ch0:
+       pci_dev_put(ch0mm);
+
+bail_pdev:
+       pci_disable_device(pdev);
+
+bail:
+       return ret;
+}
+
+static void __devexit i5100_remove_one(struct pci_dev *pdev)
+{
+       struct mem_ctl_info *mci;
+       struct i5100_priv *priv;
+
+       mci = edac_mc_del_mc(&pdev->dev);
+
+       if (!mci)
+               return;
+
+       priv = mci->pvt_info;
+       pci_disable_device(pdev);
+       pci_disable_device(priv->ch0mm);
+       pci_disable_device(priv->ch1mm);
+       pci_dev_put(priv->ch0mm);
+       pci_dev_put(priv->ch1mm);
+
+       edac_mc_free(mci);
+}
+
+static const struct pci_device_id i5100_pci_tbl[] __devinitdata = {
+       /* Device 16, Function 0, Channel 0 Memory Map, Error Flag/Mask, ... */
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5100_16) },
+       { 0, }
+};
+MODULE_DEVICE_TABLE(pci, i5100_pci_tbl);
+
+static struct pci_driver i5100_driver = {
+       .name = KBUILD_BASENAME,
+       .probe = i5100_init_one,
+       .remove = __devexit_p(i5100_remove_one),
+       .id_table = i5100_pci_tbl,
+};
+
+static int __init i5100_init(void)
+{
+       int pci_rc;
+
+       pci_rc = pci_register_driver(&i5100_driver);
+
+       return (pci_rc < 0) ? pci_rc : 0;
+}
+
+static void __exit i5100_exit(void)
+{
+       pci_unregister_driver(&i5100_driver);
+}
+
+module_init(i5100_init);
+module_exit(i5100_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR
+    ("Arthur Jones <ajones@riverbed.com>");
+MODULE_DESCRIPTION("MC Driver for Intel I5100 memory controllers");
diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c

index d49361bfe670e09a4af2dfae7ec99d9646851716..2265d9ca1535652018fb9f28f87f3d7ba447231c 100644 (file)
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -195,14 +195,15 @@ static irqreturn_t mpc85xx_pci_isr(int irq, void *dev_id)
         return IRQ_HANDLED;
  }
  
-static int __devinit mpc85xx_pci_err_probe(struct platform_device *pdev)
+static int __devinit mpc85xx_pci_err_probe(struct of_device *op,
+                                          const struct of_device_id *match)
  {
         struct edac_pci_ctl_info *pci;
         struct mpc85xx_pci_pdata *pdata;
-       struct resource *r;
+       struct resource r;
         int res = 0;
  
-       if (!devres_open_group(&pdev->dev, mpc85xx_pci_err_probe, GFP_KERNEL))
+       if (!devres_open_group(&op->dev, mpc85xx_pci_err_probe, GFP_KERNEL))
                 return -ENOMEM;
  
         pci = edac_pci_alloc_ctl_info(sizeof(*pdata), "mpc85xx_pci_err");
@@ -212,34 +213,37 @@ static int __devinit mpc85xx_pci_err_probe(struct platform_device *pdev)
         pdata = pci->pvt_info;
         pdata->name = "mpc85xx_pci_err";
         pdata->irq = NO_IRQ;
-       platform_set_drvdata(pdev, pci);
-       pci->dev = &pdev->dev;
+       dev_set_drvdata(&op->dev, pci);
+       pci->dev = &op->dev;
         pci->mod_name = EDAC_MOD_STR;
         pci->ctl_name = pdata->name;
-       pci->dev_name = pdev->dev.bus_id;
+       pci->dev_name = op->dev.bus_id;
  
         if (edac_op_state == EDAC_OPSTATE_POLL)
                 pci->edac_check = mpc85xx_pci_check;
  
         pdata->edac_idx = edac_pci_idx++;
  
-       r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!r) {
+       res = of_address_to_resource(op->node, 0, &r);
+       if (res) {
                 printk(KERN_ERR "%s: Unable to get resource for "
                        "PCI err regs\n", __func__);
                 goto err;
         }
  
-       if (!devm_request_mem_region(&pdev->dev, r->start,
-                                    r->end - r->start + 1, pdata->name)) {
+       /* we only need the error registers */
+       r.start += 0xe00;
+
+       if (!devm_request_mem_region(&op->dev, r.start,
+                                       r.end - r.start + 1, pdata->name)) {
                 printk(KERN_ERR "%s: Error while requesting mem region\n",
                        __func__);
                 res = -EBUSY;
                 goto err;
         }
  
-       pdata->pci_vbase = devm_ioremap(&pdev->dev, r->start,
-                                       r->end - r->start + 1);
+       pdata->pci_vbase = devm_ioremap(&op->dev, r.start,
+                                       r.end - r.start + 1);
         if (!pdata->pci_vbase) {
                 printk(KERN_ERR "%s: Unable to setup PCI err regs\n", __func__);
                 res = -ENOMEM;
@@ -266,14 +270,15 @@ static int __devinit mpc85xx_pci_err_probe(struct platform_device *pdev)
         }
  
         if (edac_op_state == EDAC_OPSTATE_INT) {
-               pdata->irq = platform_get_irq(pdev, 0);
-               res = devm_request_irq(&pdev->dev, pdata->irq,
+               pdata->irq = irq_of_parse_and_map(op->node, 0);
+               res = devm_request_irq(&op->dev, pdata->irq,
                                        mpc85xx_pci_isr, IRQF_DISABLED,
                                        "[EDAC] PCI err", pci);
                 if (res < 0) {
                         printk(KERN_ERR
                                "%s: Unable to requiest irq %d for "
                                "MPC85xx PCI err\n", __func__, pdata->irq);
+                       irq_dispose_mapping(pdata->irq);
                         res = -ENODEV;
                         goto err2;
                 }
@@ -282,23 +287,23 @@ static int __devinit mpc85xx_pci_err_probe(struct platform_device *pdev)
                        pdata->irq);
         }
  
-       devres_remove_group(&pdev->dev, mpc85xx_pci_err_probe);
+       devres_remove_group(&op->dev, mpc85xx_pci_err_probe);
         debugf3("%s(): success\n", __func__);
         printk(KERN_INFO EDAC_MOD_STR " PCI err registered\n");
  
         return 0;
  
  err2:
-       edac_pci_del_device(&pdev->dev);
+       edac_pci_del_device(&op->dev);
  err:
         edac_pci_free_ctl_info(pci);
-       devres_release_group(&pdev->dev, mpc85xx_pci_err_probe);
+       devres_release_group(&op->dev, mpc85xx_pci_err_probe);
         return res;
  }
  
-static int mpc85xx_pci_err_remove(struct platform_device *pdev)
+static int mpc85xx_pci_err_remove(struct of_device *op)
  {
-       struct edac_pci_ctl_info *pci = platform_get_drvdata(pdev);
+       struct edac_pci_ctl_info *pci = dev_get_drvdata(&op->dev);
         struct mpc85xx_pci_pdata *pdata = pci->pvt_info;
  
         debugf0("%s()\n", __func__);
@@ -318,12 +323,26 @@ static int mpc85xx_pci_err_remove(struct platform_device *pdev)
         return 0;
  }
  
-static struct platform_driver mpc85xx_pci_err_driver = {
+static struct of_device_id mpc85xx_pci_err_of_match[] = {
+       {
+        .compatible = "fsl,mpc8540-pcix",
+        },
+       {
+        .compatible = "fsl,mpc8540-pci",
+       },
+       {},
+};
+
+static struct of_platform_driver mpc85xx_pci_err_driver = {
+       .owner = THIS_MODULE,
+       .name = "mpc85xx_pci_err",
+       .match_table = mpc85xx_pci_err_of_match,
         .probe = mpc85xx_pci_err_probe,
         .remove = __devexit_p(mpc85xx_pci_err_remove),
         .driver = {
-               .name = "mpc85xx_pci_err",
-       }
+                  .name = "mpc85xx_pci_err",
+                  .owner = THIS_MODULE,
+                  },
  };
  
  #endif                         /* CONFIG_PCI */
@@ -1002,7 +1021,7 @@ static int __init mpc85xx_mc_init(void)
                 printk(KERN_WARNING EDAC_MOD_STR "L2 fails to register\n");
  
  #ifdef CONFIG_PCI
-       res = platform_driver_register(&mpc85xx_pci_err_driver);
+       res = of_register_platform_driver(&mpc85xx_pci_err_driver);
         if (res)
                 printk(KERN_WARNING EDAC_MOD_STR "PCI fails to register\n");
  #endif
@@ -1025,7 +1044,7 @@ static void __exit mpc85xx_mc_exit(void)
  {
         mtspr(SPRN_HID1, orig_hid1);
  #ifdef CONFIG_PCI
-       platform_driver_unregister(&mpc85xx_pci_err_driver);
+       of_unregister_platform_driver(&mpc85xx_pci_err_driver);
  #endif
         of_unregister_platform_driver(&mpc85xx_l2_err_driver);
         of_unregister_platform_driver(&mpc85xx_mc_err_driver);
diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c

index bf071f140a054c492e16321bdf33bd2c06f6599d..083ce8d0c63d78c009c45588e55ac9c85ac665c4 100644 (file)
--- a/drivers/edac/mv64x60_edac.c
+++ b/drivers/edac/mv64x60_edac.c
@@ -71,6 +71,35 @@ static irqreturn_t mv64x60_pci_isr(int irq, void *dev_id)
         return IRQ_HANDLED;
  }
  
+/*
+ * Bit 0 of MV64x60_PCIx_ERR_MASK does not exist on the 64360 and because of
+ * errata FEr-#11 and FEr-##16 for the 64460, it should be 0 on that chip as
+ * well.  IOW, don't set bit 0.
+ */
+
+/* Erratum FEr PCI-#16: clear bit 0 of PCI SERRn Mask reg. */
+static int __init mv64x60_pci_fixup(struct platform_device *pdev)
+{
+       struct resource *r;
+       void __iomem *pci_serr;
+
+       r = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+       if (!r) {
+               printk(KERN_ERR "%s: Unable to get resource for "
+                      "PCI err regs\n", __func__);
+               return -ENOENT;
+       }
+
+       pci_serr = ioremap(r->start, r->end - r->start + 1);
+       if (!pci_serr)
+               return -ENOMEM;
+
+       out_le32(pci_serr, in_le32(pci_serr) & ~0x1);
+       iounmap(pci_serr);
+
+       return 0;
+}
+
  static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev)
  {
         struct edac_pci_ctl_info *pci;
@@ -128,6 +157,12 @@ static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev)
                 goto err;
         }
  
+       res = mv64x60_pci_fixup(pdev);
+       if (res < 0) {
+               printk(KERN_ERR "%s: PCI fixup failed\n", __func__);
+               goto err;
+       }
+
         out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE, 0);
         out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK, 0);
         out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK,
@@ -612,7 +647,7 @@ static void get_total_mem(struct mv64x60_mc_pdata *pdata)
         if (!np)
                 return;
  
-       reg = get_property(np, "reg", NULL);
+       reg = of_get_property(np, "reg", NULL);
  
         pdata->total_mem = reg[1];
  }
diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c

index 0b624e927a6fea8c4306949b85d28e0b5b980417..c66817e7717b8f6b42e364d85d687726983afc24 100644 (file)
--- a/drivers/firmware/dcdbas.c
+++ b/drivers/firmware/dcdbas.c
@@ -152,20 +152,11 @@ static ssize_t smi_data_read(struct kobject *kobj,
                              struct bin_attribute *bin_attr,
                              char *buf, loff_t pos, size_t count)
  {
-       size_t max_read;
         ssize_t ret;
  
         mutex_lock(&smi_data_lock);
-
-       if (pos >= smi_data_buf_size) {
-               ret = 0;
-               goto out;
-       }
-
-       max_read = smi_data_buf_size - pos;
-       ret = min(max_read, count);
-       memcpy(buf, smi_data_buf + pos, ret);
-out:
+       ret = memory_read_from_buffer(buf, count, &pos, smi_data_buf,
+                                       smi_data_buf_size);
         mutex_unlock(&smi_data_lock);
         return ret;
  }
diff --git a/drivers/firmware/dell_rbu.c b/drivers/firmware/dell_rbu.c

index 7430e218cda6d54546a72ada7871216ae4916e1c..13946ebd77d6dbfb904eeec75f647a4ceabdf057 100644 (file)
--- a/drivers/firmware/dell_rbu.c
+++ b/drivers/firmware/dell_rbu.c
@@ -507,11 +507,6 @@ static ssize_t read_packet_data(char *buffer, loff_t pos, size_t count)
  
  static ssize_t read_rbu_mono_data(char *buffer, loff_t pos, size_t count)
  {
-       unsigned char *ptemp = NULL;
-       size_t bytes_left = 0;
-       size_t data_length = 0;
-       ssize_t ret_count = 0;
-
         /* check to see if we have something to return */
         if ((rbu_data.image_update_buffer == NULL) ||
                 (rbu_data.bios_image_size == 0)) {
@@ -519,28 +514,11 @@ static ssize_t read_rbu_mono_data(char *buffer, loff_t pos, size_t count)
                         "bios_image_size %lu\n",
                         rbu_data.image_update_buffer,
                         rbu_data.bios_image_size);
-               ret_count = -ENOMEM;
-               goto read_rbu_data_exit;
-       }
-
-       if (pos > rbu_data.bios_image_size) {
-               ret_count = 0;
-               goto read_rbu_data_exit;
+               return -ENOMEM;
         }
  
-       bytes_left = rbu_data.bios_image_size - pos;
-       data_length = min(bytes_left, count);
-
-       ptemp = rbu_data.image_update_buffer;
-       memcpy(buffer, (ptemp + pos), data_length);
-
-       if ((pos + count) > rbu_data.bios_image_size)
-               /* this was the last copy */
-               ret_count = bytes_left;
-       else
-               ret_count = count;
-      read_rbu_data_exit:
-       return ret_count;
+       return memory_read_from_buffer(buffer, count, &pos,
+                       rbu_data.image_update_buffer, rbu_data.bios_image_size);
  }
  
  static ssize_t read_rbu_data(struct kobject *kobj,
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig

index fced1909cbba0dd055c2a01cd73fc5303138750b..dbd42d6c93a701b2a0e52ec4a118f36ed538701e 100644 (file)
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -2,15 +2,40 @@
  # GPIO infrastructure and expanders
  #
  
-config HAVE_GPIO_LIB
+config ARCH_WANT_OPTIONAL_GPIOLIB
         bool
+       help
+         Select this config option from the architecture Kconfig, if
+         it is possible to use gpiolib on the architecture, but let the
+         user decide whether to actually build it or not.
+         Select this instead of ARCH_REQUIRE_GPIOLIB, if your architecture does
+         not depend on GPIOs being available, but rather let the user
+         decide whether he needs it or not.
+
+config ARCH_REQUIRE_GPIOLIB
+       bool
+       select GPIOLIB
         help
           Platforms select gpiolib if they use this infrastructure
           for all their GPIOs, usually starting with ones integrated
           into SOC processors.
+         Selecting this from the architecture code will cause the gpiolib
+         code to always get built in.
+
+
+
+menuconfig GPIOLIB
+       bool "GPIO Support"
+       depends on ARCH_WANT_OPTIONAL_GPIOLIB || ARCH_REQUIRE_GPIOLIB
+       select GENERIC_GPIO
+       help
+         This enables GPIO support through the generic GPIO library.
+         You only need to enable this, if you also want to enable
+         one or more of the GPIO expansion card drivers below.
  
-menu "GPIO Support"
-       depends on HAVE_GPIO_LIB
+         If unsure, say N.
+
+if GPIOLIB
  
  config DEBUG_GPIO
         bool "Debug GPIO calls"
@@ -23,10 +48,44 @@ config DEBUG_GPIO
           slower.  The diagnostics help catch the type of setup errors
           that are most common when setting up new platforms or boards.
  
+config GPIO_SYSFS
+       bool "/sys/class/gpio/... (sysfs interface)"
+       depends on SYSFS && EXPERIMENTAL
+       help
+         Say Y here to add a sysfs interface for GPIOs.
+
+         This is mostly useful to work around omissions in a system's
+         kernel support.  Those are common in custom and semicustom
+         hardware assembled using standard kernels with a minimum of
+         custom patches.  In those cases, userspace code may import
+         a given GPIO from the kernel, if no kernel driver requested it.
+
+         Kernel drivers may also request that a particular GPIO be
+         exported to userspace; this can be useful when debugging.
+
  # put expanders in the right section, in alphabetical order
  
  comment "I2C GPIO expanders:"
  
+config GPIO_MAX732X
+       tristate "MAX7319, MAX7320-7327 I2C Port Expanders"
+       depends on I2C
+       help
+         Say yes here to support the MAX7319, MAX7320-7327 series of I2C
+         Port Expanders. Each IO port on these chips has a fixed role of
+         Input (designated by 'I'), Push-Pull Output ('O'), or Open-Drain
+         Input and Output (designed by 'P'). The combinations are listed
+         below:
+
+         8 bits:       max7319 (8I), max7320 (8O), max7321 (8P),
+                       max7322 (4I4O), max7323 (4P4O)
+
+         16 bits:      max7324 (8I8O), max7325 (8P8O),
+                       max7326 (4I12O), max7327 (4P12O)
+
+         Board setup code must specify the model to use, and the start
+         number for these GPIOs.
+
  config GPIO_PCA953X
         tristate "PCA953x, PCA955x, and MAX7310 I/O ports"
         depends on I2C
@@ -68,6 +127,24 @@ config GPIO_PCF857X
           This driver provides an in-kernel interface to those GPIOs using
           platform-neutral GPIO calls.
  
+comment "PCI GPIO expanders:"
+
+config GPIO_BT8XX
+       tristate "BT8XX GPIO abuser"
+       depends on PCI && VIDEO_BT848=n
+       help
+         The BT8xx frame grabber chip has 24 GPIO pins than can be abused
+         as a cheap PCI GPIO card.
+
+         This chip can be found on Miro, Hauppauge and STB TV-cards.
+
+         The card needs to be physically altered for using it as a
+         GPIO card. For more information on how to build a GPIO card
+         from a BT8xx TV card, see the documentation file at
+         Documentation/bt8xxgpio.txt
+
+         If unsure, say N.
+
  comment "SPI GPIO expanders:"
  
  config GPIO_MAX7301
@@ -83,4 +160,4 @@ config GPIO_MCP23S08
           SPI driver for Microchip MCP23S08 I/O expander.  This provides
           a GPIO interface supporting inputs and outputs.
  
-endmenu
+endif
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile

index 16e796dc5410fe8659466d54b250b357eee1a6ac..01b4bbde1956e48933d23d60c782ef94f8dff63e 100644 (file)
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -2,9 +2,11 @@
  
  ccflags-$(CONFIG_DEBUG_GPIO)   += -DDEBUG
  
-obj-$(CONFIG_HAVE_GPIO_LIB)    += gpiolib.o
+obj-$(CONFIG_GPIOLIB)          += gpiolib.o
  
  obj-$(CONFIG_GPIO_MAX7301)     += max7301.o
+obj-$(CONFIG_GPIO_MAX732X)     += max732x.o
  obj-$(CONFIG_GPIO_MCP23S08)    += mcp23s08.o
  obj-$(CONFIG_GPIO_PCA953X)     += pca953x.o
  obj-$(CONFIG_GPIO_PCF857X)     += pcf857x.o
+obj-$(CONFIG_GPIO_BT8XX)       += bt8xxgpio.o
diff --git a/drivers/gpio/bt8xxgpio.c b/drivers/gpio/bt8xxgpio.c

new file mode 100644 (file)

index 0000000..7a11682
--- /dev/null
+++ b/drivers/gpio/bt8xxgpio.c
@@ -0,0 +1,348 @@
+/*
+
+    bt8xx GPIO abuser
+
+    Copyright (C) 2008 Michael Buesch <mb@bu3sch.de>
+
+    Please do _only_ contact the people listed _above_ with issues related to this driver.
+    All the other people listed below are not related to this driver. Their names
+    are only here, because this driver is derived from the bt848 driver.
+
+
+    Derived from the bt848 driver:
+
+    Copyright (C) 1996,97,98 Ralph  Metzler
+                          & Marcus Metzler
+    (c) 1999-2002 Gerd Knorr
+
+    some v4l2 code lines are taken from Justin's bttv2 driver which is
+    (c) 2000 Justin Schoeman
+
+    V4L1 removal from:
+    (c) 2005-2006 Nickolay V. Shmyrev
+
+    Fixes to be fully V4L2 compliant by
+    (c) 2006 Mauro Carvalho Chehab
+
+    Cropping and overscan support
+    Copyright (C) 2005, 2006 Michael H. Schimek
+    Sponsored by OPQ Systems AB
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+
+#include <asm/gpio.h>
+
+/* Steal the hardware definitions from the bttv driver. */
+#include "../media/video/bt8xx/bt848.h"
+
+
+#define BT8XXGPIO_NR_GPIOS             24 /* We have 24 GPIO pins */
+
+
+struct bt8xxgpio {
+       spinlock_t lock;
+
+       void __iomem *mmio;
+       struct pci_dev *pdev;
+       struct gpio_chip gpio;
+
+#ifdef CONFIG_PM
+       u32 saved_outen;
+       u32 saved_data;
+#endif
+};
+
+#define bgwrite(dat, adr)      writel((dat), bg->mmio+(adr))
+#define bgread(adr)            readl(bg->mmio+(adr))
+
+
+static int modparam_gpiobase = -1/* dynamic */;
+module_param_named(gpiobase, modparam_gpiobase, int, 0444);
+MODULE_PARM_DESC(gpiobase, "The GPIO number base. -1 means dynamic, which is the default.");
+
+
+static int bt8xxgpio_gpio_direction_input(struct gpio_chip *gpio, unsigned nr)
+{
+       struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio);
+       unsigned long flags;
+       u32 outen, data;
+
+       spin_lock_irqsave(&bg->lock, flags);
+
+       data = bgread(BT848_GPIO_DATA);
+       data &= ~(1 << nr);
+       bgwrite(data, BT848_GPIO_DATA);
+
+       outen = bgread(BT848_GPIO_OUT_EN);
+       outen &= ~(1 << nr);
+       bgwrite(outen, BT848_GPIO_OUT_EN);
+
+       spin_unlock_irqrestore(&bg->lock, flags);
+
+       return 0;
+}
+
+static int bt8xxgpio_gpio_get(struct gpio_chip *gpio, unsigned nr)
+{
+       struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio);
+       unsigned long flags;
+       u32 val;
+
+       spin_lock_irqsave(&bg->lock, flags);
+       val = bgread(BT848_GPIO_DATA);
+       spin_unlock_irqrestore(&bg->lock, flags);
+
+       return !!(val & (1 << nr));
+}
+
+static int bt8xxgpio_gpio_direction_output(struct gpio_chip *gpio,
+                                       unsigned nr, int val)
+{
+       struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio);
+       unsigned long flags;
+       u32 outen, data;
+
+       spin_lock_irqsave(&bg->lock, flags);
+
+       outen = bgread(BT848_GPIO_OUT_EN);
+       outen |= (1 << nr);
+       bgwrite(outen, BT848_GPIO_OUT_EN);
+
+       data = bgread(BT848_GPIO_DATA);
+       if (val)
+               data |= (1 << nr);
+       else
+               data &= ~(1 << nr);
+       bgwrite(data, BT848_GPIO_DATA);
+
+       spin_unlock_irqrestore(&bg->lock, flags);
+
+       return 0;
+}
+
+static void bt8xxgpio_gpio_set(struct gpio_chip *gpio,
+                           unsigned nr, int val)
+{
+       struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio);
+       unsigned long flags;
+       u32 data;
+
+       spin_lock_irqsave(&bg->lock, flags);
+
+       data = bgread(BT848_GPIO_DATA);
+       if (val)
+               data |= (1 << nr);
+       else
+               data &= ~(1 << nr);
+       bgwrite(data, BT848_GPIO_DATA);
+
+       spin_unlock_irqrestore(&bg->lock, flags);
+}
+
+static void bt8xxgpio_gpio_setup(struct bt8xxgpio *bg)
+{
+       struct gpio_chip *c = &bg->gpio;
+
+       c->label = bg->pdev->dev.bus_id;
+       c->owner = THIS_MODULE;
+       c->direction_input = bt8xxgpio_gpio_direction_input;
+       c->get = bt8xxgpio_gpio_get;
+       c->direction_output = bt8xxgpio_gpio_direction_output;
+       c->set = bt8xxgpio_gpio_set;
+       c->dbg_show = NULL;
+       c->base = modparam_gpiobase;
+       c->ngpio = BT8XXGPIO_NR_GPIOS;
+       c->can_sleep = 0;
+}
+
+static int bt8xxgpio_probe(struct pci_dev *dev,
+                       const struct pci_device_id *pci_id)
+{
+       struct bt8xxgpio *bg;
+       int err;
+
+       bg = kzalloc(sizeof(*bg), GFP_KERNEL);
+       if (!bg)
+               return -ENOMEM;
+
+       bg->pdev = dev;
+       spin_lock_init(&bg->lock);
+
+       err = pci_enable_device(dev);
+       if (err) {
+               printk(KERN_ERR "bt8xxgpio: Can't enable device.\n");
+               goto err_freebg;
+       }
+       if (!request_mem_region(pci_resource_start(dev, 0),
+                               pci_resource_len(dev, 0),
+                               "bt8xxgpio")) {
+               printk(KERN_WARNING "bt8xxgpio: Can't request iomem (0x%llx).\n",
+                      (unsigned long long)pci_resource_start(dev, 0));
+               err = -EBUSY;
+               goto err_disable;
+       }
+       pci_set_master(dev);
+       pci_set_drvdata(dev, bg);
+
+       bg->mmio = ioremap(pci_resource_start(dev, 0), 0x1000);
+       if (!bg->mmio) {
+               printk(KERN_ERR "bt8xxgpio: ioremap() failed\n");
+               err = -EIO;
+               goto err_release_mem;
+       }
+
+       /* Disable interrupts */
+       bgwrite(0, BT848_INT_MASK);
+
+       /* gpio init */
+       bgwrite(0, BT848_GPIO_DMA_CTL);
+       bgwrite(0, BT848_GPIO_REG_INP);
+       bgwrite(0, BT848_GPIO_OUT_EN);
+
+       bt8xxgpio_gpio_setup(bg);
+       err = gpiochip_add(&bg->gpio);
+       if (err) {
+               printk(KERN_ERR "bt8xxgpio: Failed to register GPIOs\n");
+               goto err_release_mem;
+       }
+
+       printk(KERN_INFO "bt8xxgpio: Abusing BT8xx card for GPIOs %d to %d\n",
+              bg->gpio.base, bg->gpio.base + BT8XXGPIO_NR_GPIOS - 1);
+
+       return 0;
+
+err_release_mem:
+       release_mem_region(pci_resource_start(dev, 0),
+                          pci_resource_len(dev, 0));
+       pci_set_drvdata(dev, NULL);
+err_disable:
+       pci_disable_device(dev);
+err_freebg:
+       kfree(bg);
+
+       return err;
+}
+
+static void bt8xxgpio_remove(struct pci_dev *pdev)
+{
+       struct bt8xxgpio *bg = pci_get_drvdata(pdev);
+
+       gpiochip_remove(&bg->gpio);
+
+       bgwrite(0, BT848_INT_MASK);
+       bgwrite(~0x0, BT848_INT_STAT);
+       bgwrite(0x0, BT848_GPIO_OUT_EN);
+
+       iounmap(bg->mmio);
+       release_mem_region(pci_resource_start(pdev, 0),
+                          pci_resource_len(pdev, 0));
+       pci_disable_device(pdev);
+
+       pci_set_drvdata(pdev, NULL);
+       kfree(bg);
+}
+
+#ifdef CONFIG_PM
+static int bt8xxgpio_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+       struct bt8xxgpio *bg = pci_get_drvdata(pdev);
+       unsigned long flags;
+
+       spin_lock_irqsave(&bg->lock, flags);
+
+       bg->saved_outen = bgread(BT848_GPIO_OUT_EN);
+       bg->saved_data = bgread(BT848_GPIO_DATA);
+
+       bgwrite(0, BT848_INT_MASK);
+       bgwrite(~0x0, BT848_INT_STAT);
+       bgwrite(0x0, BT848_GPIO_OUT_EN);
+
+       spin_unlock_irqrestore(&bg->lock, flags);
+
+       pci_save_state(pdev);
+       pci_disable_device(pdev);
+       pci_set_power_state(pdev, pci_choose_state(pdev, state));
+
+       return 0;
+}
+
+static int bt8xxgpio_resume(struct pci_dev *pdev)
+{
+       struct bt8xxgpio *bg = pci_get_drvdata(pdev);
+       unsigned long flags;
+       int err;
+
+       pci_set_power_state(pdev, 0);
+       err = pci_enable_device(pdev);
+       if (err)
+               return err;
+       pci_restore_state(pdev);
+
+       spin_lock_irqsave(&bg->lock, flags);
+
+       bgwrite(0, BT848_INT_MASK);
+       bgwrite(0, BT848_GPIO_DMA_CTL);
+       bgwrite(0, BT848_GPIO_REG_INP);
+       bgwrite(bg->saved_outen, BT848_GPIO_OUT_EN);
+       bgwrite(bg->saved_data & bg->saved_outen,
+               BT848_GPIO_DATA);
+
+       spin_unlock_irqrestore(&bg->lock, flags);
+
+       return 0;
+}
+#else
+#define bt8xxgpio_suspend NULL
+#define bt8xxgpio_resume NULL
+#endif /* CONFIG_PM */
+
+static struct pci_device_id bt8xxgpio_pci_tbl[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT848) },
+       { PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT849) },
+       { PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT878) },
+       { PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT879) },
+       { 0, },
+};
+MODULE_DEVICE_TABLE(pci, bt8xxgpio_pci_tbl);
+
+static struct pci_driver bt8xxgpio_pci_driver = {
+       .name           = "bt8xxgpio",
+       .id_table       = bt8xxgpio_pci_tbl,
+       .probe          = bt8xxgpio_probe,
+       .remove         = bt8xxgpio_remove,
+       .suspend        = bt8xxgpio_suspend,
+       .resume         = bt8xxgpio_resume,
+};
+
+static int bt8xxgpio_init(void)
+{
+       return pci_register_driver(&bt8xxgpio_pci_driver);
+}
+module_init(bt8xxgpio_init)
+
+static void bt8xxgpio_exit(void)
+{
+       pci_unregister_driver(&bt8xxgpio_pci_driver);
+}
+module_exit(bt8xxgpio_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michael Buesch");
+MODULE_DESCRIPTION("Abuse a BT8xx framegrabber card as generic GPIO card");
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c

index beaf6b3a37dcefb0b2c1d8c8f79b7db2713dba45..8d2940517c99221c33e3bf200ac53a82214cebee 100644 (file)
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2,8 +2,11 @@
  #include <linux/module.h>
  #include <linux/irq.h>
  #include <linux/spinlock.h>
-
-#include <asm/gpio.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/gpio.h>
  
  
  /* Optional implementation infrastructure for GPIO interfaces.
@@ -44,6 +47,8 @@ struct gpio_desc {
  #define FLAG_REQUESTED 0
  #define FLAG_IS_OUT    1
  #define FLAG_RESERVED  2
+#define FLAG_EXPORT    3       /* protected by sysfs_lock */
+#define FLAG_SYSFS     4       /* exported via /sys/class/gpio/control */
  
  #ifdef CONFIG_DEBUG_FS
         const char              *label;
@@ -151,6 +156,482 @@ err:
         return ret;
  }
  
+#ifdef CONFIG_GPIO_SYSFS
+
+/* lock protects against unexport_gpio() being called while
+ * sysfs files are active.
+ */
+static DEFINE_MUTEX(sysfs_lock);
+
+/*
+ * /sys/class/gpio/gpioN... only for GPIOs that are exported
+ *   /direction
+ *      * MAY BE OMITTED if kernel won't allow direction changes
+ *      * is read/write as "in" or "out"
+ *      * may also be written as "high" or "low", initializing
+ *        output value as specified ("out" implies "low")
+ *   /value
+ *      * always readable, subject to hardware behavior
+ *      * may be writable, as zero/nonzero
+ *
+ * REVISIT there will likely be an attribute for configuring async
+ * notifications, e.g. to specify polling interval or IRQ trigger type
+ * that would for example trigger a poll() on the "value".
+ */
+
+static ssize_t gpio_direction_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       const struct gpio_desc  *desc = dev_get_drvdata(dev);
+       ssize_t                 status;
+
+       mutex_lock(&sysfs_lock);
+
+       if (!test_bit(FLAG_EXPORT, &desc->flags))
+               status = -EIO;
+       else
+               status = sprintf(buf, "%s\n",
+                       test_bit(FLAG_IS_OUT, &desc->flags)
+                               ? "out" : "in");
+
+       mutex_unlock(&sysfs_lock);
+       return status;
+}
+
+static ssize_t gpio_direction_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t size)
+{
+       const struct gpio_desc  *desc = dev_get_drvdata(dev);
+       unsigned                gpio = desc - gpio_desc;
+       ssize_t                 status;
+
+       mutex_lock(&sysfs_lock);
+
+       if (!test_bit(FLAG_EXPORT, &desc->flags))
+               status = -EIO;
+       else if (sysfs_streq(buf, "high"))
+               status = gpio_direction_output(gpio, 1);
+       else if (sysfs_streq(buf, "out") || sysfs_streq(buf, "low"))
+               status = gpio_direction_output(gpio, 0);
+       else if (sysfs_streq(buf, "in"))
+               status = gpio_direction_input(gpio);
+       else
+               status = -EINVAL;
+
+       mutex_unlock(&sysfs_lock);
+       return status ? : size;
+}
+
+static const DEVICE_ATTR(direction, 0644,
+               gpio_direction_show, gpio_direction_store);
+
+static ssize_t gpio_value_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       const struct gpio_desc  *desc = dev_get_drvdata(dev);
+       unsigned                gpio = desc - gpio_desc;
+       ssize_t                 status;
+
+       mutex_lock(&sysfs_lock);
+
+       if (!test_bit(FLAG_EXPORT, &desc->flags))
+               status = -EIO;
+       else
+               status = sprintf(buf, "%d\n", gpio_get_value_cansleep(gpio));
+
+       mutex_unlock(&sysfs_lock);
+       return status;
+}
+
+static ssize_t gpio_value_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t size)
+{
+       const struct gpio_desc  *desc = dev_get_drvdata(dev);
+       unsigned                gpio = desc - gpio_desc;
+       ssize_t                 status;
+
+       mutex_lock(&sysfs_lock);
+
+       if (!test_bit(FLAG_EXPORT, &desc->flags))
+               status = -EIO;
+       else if (!test_bit(FLAG_IS_OUT, &desc->flags))
+               status = -EPERM;
+       else {
+               long            value;
+
+               status = strict_strtol(buf, 0, &value);
+               if (status == 0) {
+                       gpio_set_value_cansleep(gpio, value != 0);
+                       status = size;
+               }
+       }
+
+       mutex_unlock(&sysfs_lock);
+       return status;
+}
+
+static /*const*/ DEVICE_ATTR(value, 0644,
+               gpio_value_show, gpio_value_store);
+
+static const struct attribute *gpio_attrs[] = {
+       &dev_attr_direction.attr,
+       &dev_attr_value.attr,
+       NULL,
+};
+
+static const struct attribute_group gpio_attr_group = {
+       .attrs = (struct attribute **) gpio_attrs,
+};
+
+/*
+ * /sys/class/gpio/gpiochipN/
+ *   /base ... matching gpio_chip.base (N)
+ *   /label ... matching gpio_chip.label
+ *   /ngpio ... matching gpio_chip.ngpio
+ */
+
+static ssize_t chip_base_show(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       const struct gpio_chip  *chip = dev_get_drvdata(dev);
+
+       return sprintf(buf, "%d\n", chip->base);
+}
+static DEVICE_ATTR(base, 0444, chip_base_show, NULL);
+
+static ssize_t chip_label_show(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       const struct gpio_chip  *chip = dev_get_drvdata(dev);
+
+       return sprintf(buf, "%s\n", chip->label ? : "");
+}
+static DEVICE_ATTR(label, 0444, chip_label_show, NULL);
+
+static ssize_t chip_ngpio_show(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       const struct gpio_chip  *chip = dev_get_drvdata(dev);
+
+       return sprintf(buf, "%u\n", chip->ngpio);
+}
+static DEVICE_ATTR(ngpio, 0444, chip_ngpio_show, NULL);
+
+static const struct attribute *gpiochip_attrs[] = {
+       &dev_attr_base.attr,
+       &dev_attr_label.attr,
+       &dev_attr_ngpio.attr,
+       NULL,
+};
+
+static const struct attribute_group gpiochip_attr_group = {
+       .attrs = (struct attribute **) gpiochip_attrs,
+};
+
+/*
+ * /sys/class/gpio/export ... write-only
+ *     integer N ... number of GPIO to export (full access)
+ * /sys/class/gpio/unexport ... write-only
+ *     integer N ... number of GPIO to unexport
+ */
+static ssize_t export_store(struct class *class, const char *buf, size_t len)
+{
+       long    gpio;
+       int     status;
+
+       status = strict_strtol(buf, 0, &gpio);
+       if (status < 0)
+               goto done;
+
+       /* No extra locking here; FLAG_SYSFS just signifies that the
+        * request and export were done by on behalf of userspace, so
+        * they may be undone on its behalf too.
+        */
+
+       status = gpio_request(gpio, "sysfs");
+       if (status < 0)
+               goto done;
+
+       status = gpio_export(gpio, true);
+       if (status < 0)
+               gpio_free(gpio);
+       else
+               set_bit(FLAG_SYSFS, &gpio_desc[gpio].flags);
+
+done:
+       if (status)
+               pr_debug("%s: status %d\n", __func__, status);
+       return status ? : len;
+}
+
+static ssize_t unexport_store(struct class *class, const char *buf, size_t len)
+{
+       long    gpio;
+       int     status;
+
+       status = strict_strtol(buf, 0, &gpio);
+       if (status < 0)
+               goto done;
+
+       status = -EINVAL;
+
+       /* reject bogus commands (gpio_unexport ignores them) */
+       if (!gpio_is_valid(gpio))
+               goto done;
+
+       /* No extra locking here; FLAG_SYSFS just signifies that the
+        * request and export were done by on behalf of userspace, so
+        * they may be undone on its behalf too.
+        */
+       if (test_and_clear_bit(FLAG_SYSFS, &gpio_desc[gpio].flags)) {
+               status = 0;
+               gpio_free(gpio);
+       }
+done:
+       if (status)
+               pr_debug("%s: status %d\n", __func__, status);
+       return status ? : len;
+}
+
+static struct class_attribute gpio_class_attrs[] = {
+       __ATTR(export, 0200, NULL, export_store),
+       __ATTR(unexport, 0200, NULL, unexport_store),
+       __ATTR_NULL,
+};
+
+static struct class gpio_class = {
+       .name =         "gpio",
+       .owner =        THIS_MODULE,
+
+       .class_attrs =  gpio_class_attrs,
+};
+
+
+/**
+ * gpio_export - export a GPIO through sysfs
+ * @gpio: gpio to make available, already requested
+ * @direction_may_change: true if userspace may change gpio direction
+ * Context: arch_initcall or later
+ *
+ * When drivers want to make a GPIO accessible to userspace after they
+ * have requested it -- perhaps while debugging, or as part of their
+ * public interface -- they may use this routine.  If the GPIO can
+ * change direction (some can't) and the caller allows it, userspace
+ * will see "direction" sysfs attribute which may be used to change
+ * the gpio's direction.  A "value" attribute will always be provided.
+ *
+ * Returns zero on success, else an error.
+ */
+int gpio_export(unsigned gpio, bool direction_may_change)
+{
+       unsigned long           flags;
+       struct gpio_desc        *desc;
+       int                     status = -EINVAL;
+
+       /* can't export until sysfs is available ... */
+       if (!gpio_class.p) {
+               pr_debug("%s: called too early!\n", __func__);
+               return -ENOENT;
+       }
+
+       if (!gpio_is_valid(gpio))
+               goto done;
+
+       mutex_lock(&sysfs_lock);
+
+       spin_lock_irqsave(&gpio_lock, flags);
+       desc = &gpio_desc[gpio];
+       if (test_bit(FLAG_REQUESTED, &desc->flags)
+                       && !test_bit(FLAG_EXPORT, &desc->flags)) {
+               status = 0;
+               if (!desc->chip->direction_input
+                               || !desc->chip->direction_output)
+                       direction_may_change = false;
+       }
+       spin_unlock_irqrestore(&gpio_lock, flags);
+
+       if (status == 0) {
+               struct device   *dev;
+
+               dev = device_create(&gpio_class, desc->chip->dev, MKDEV(0, 0),
+                                       desc, "gpio%d", gpio);
+               if (dev) {
+                       if (direction_may_change)
+                               status = sysfs_create_group(&dev->kobj,
+                                               &gpio_attr_group);
+                       else
+                               status = device_create_file(dev,
+                                               &dev_attr_value);
+                       if (status != 0)
+                               device_unregister(dev);
+               } else
+                       status = -ENODEV;
+               if (status == 0)
+                       set_bit(FLAG_EXPORT, &desc->flags);
+       }
+
+       mutex_unlock(&sysfs_lock);
+
+done:
+       if (status)
+               pr_debug("%s: gpio%d status %d\n", __func__, gpio, status);
+
+       return status;
+}
+EXPORT_SYMBOL_GPL(gpio_export);
+
+static int match_export(struct device *dev, void *data)
+{
+       return dev_get_drvdata(dev) == data;
+}
+
+/**
+ * gpio_unexport - reverse effect of gpio_export()
+ * @gpio: gpio to make unavailable
+ *
+ * This is implicit on gpio_free().
+ */
+void gpio_unexport(unsigned gpio)
+{
+       struct gpio_desc        *desc;
+       int                     status = -EINVAL;
+
+       if (!gpio_is_valid(gpio))
+               goto done;
+
+       mutex_lock(&sysfs_lock);
+
+       desc = &gpio_desc[gpio];
+       if (test_bit(FLAG_EXPORT, &desc->flags)) {
+               struct device   *dev = NULL;
+
+               dev = class_find_device(&gpio_class, NULL, desc, match_export);
+               if (dev) {
+                       clear_bit(FLAG_EXPORT, &desc->flags);
+                       put_device(dev);
+                       device_unregister(dev);
+                       status = 0;
+               } else
+                       status = -ENODEV;
+       }
+
+       mutex_unlock(&sysfs_lock);
+done:
+       if (status)
+               pr_debug("%s: gpio%d status %d\n", __func__, gpio, status);
+}
+EXPORT_SYMBOL_GPL(gpio_unexport);
+
+static int gpiochip_export(struct gpio_chip *chip)
+{
+       int             status;
+       struct device   *dev;
+
+       /* Many systems register gpio chips for SOC support very early,
+        * before driver model support is available.  In those cases we
+        * export this later, in gpiolib_sysfs_init() ... here we just
+        * verify that _some_ field of gpio_class got initialized.
+        */
+       if (!gpio_class.p)
+               return 0;
+
+       /* use chip->base for the ID; it's already known to be unique */
+       mutex_lock(&sysfs_lock);
+       dev = device_create(&gpio_class, chip->dev, MKDEV(0, 0), chip,
+                               "gpiochip%d", chip->base);
+       if (dev) {
+               status = sysfs_create_group(&dev->kobj,
+                               &gpiochip_attr_group);
+       } else
+               status = -ENODEV;
+       chip->exported = (status == 0);
+       mutex_unlock(&sysfs_lock);
+
+       if (status) {
+               unsigned long   flags;
+               unsigned        gpio;
+
+               spin_lock_irqsave(&gpio_lock, flags);
+               gpio = chip->base;
+               while (gpio_desc[gpio].chip == chip)
+                       gpio_desc[gpio++].chip = NULL;
+               spin_unlock_irqrestore(&gpio_lock, flags);
+
+               pr_debug("%s: chip %s status %d\n", __func__,
+                               chip->label, status);
+       }
+
+       return status;
+}
+
+static void gpiochip_unexport(struct gpio_chip *chip)
+{
+       int                     status;
+       struct device           *dev;
+
+       mutex_lock(&sysfs_lock);
+       dev = class_find_device(&gpio_class, NULL, chip, match_export);
+       if (dev) {
+               put_device(dev);
+               device_unregister(dev);
+               chip->exported = 0;
+               status = 0;
+       } else
+               status = -ENODEV;
+       mutex_unlock(&sysfs_lock);
+
+       if (status)
+               pr_debug("%s: chip %s status %d\n", __func__,
+                               chip->label, status);
+}
+
+static int __init gpiolib_sysfs_init(void)
+{
+       int             status;
+       unsigned long   flags;
+       unsigned        gpio;
+
+       status = class_register(&gpio_class);
+       if (status < 0)
+               return status;
+
+       /* Scan and register the gpio_chips which registered very
+        * early (e.g. before the class_register above was called).
+        *
+        * We run before arch_initcall() so chip->dev nodes can have
+        * registered, and so arch_initcall() can always gpio_export().
+        */
+       spin_lock_irqsave(&gpio_lock, flags);
+       for (gpio = 0; gpio < ARCH_NR_GPIOS; gpio++) {
+               struct gpio_chip        *chip;
+
+               chip = gpio_desc[gpio].chip;
+               if (!chip || chip->exported)
+                       continue;
+
+               spin_unlock_irqrestore(&gpio_lock, flags);
+               status = gpiochip_export(chip);
+               spin_lock_irqsave(&gpio_lock, flags);
+       }
+       spin_unlock_irqrestore(&gpio_lock, flags);
+
+
+       return status;
+}
+postcore_initcall(gpiolib_sysfs_init);
+
+#else
+static inline int gpiochip_export(struct gpio_chip *chip)
+{
+       return 0;
+}
+
+static inline void gpiochip_unexport(struct gpio_chip *chip)
+{
+}
+
+#endif /* CONFIG_GPIO_SYSFS */
+
  /**
   * gpiochip_add() - register a gpio_chip
   * @chip: the chip to register, with chip->base initialized
@@ -160,6 +641,11 @@ err:
   * because the chip->base is invalid or already associated with a
   * different chip.  Otherwise it returns zero as a success code.
   *
+ * When gpiochip_add() is called very early during boot, so that GPIOs
+ * can be freely used, the chip->dev device must be registered before
+ * the gpio framework's arch_initcall().  Otherwise sysfs initialization
+ * for GPIOs will fail rudely.
+ *
   * If chip->base is negative, this requests dynamic assignment of
   * a range of valid GPIOs.
   */
@@ -182,7 +668,7 @@ int gpiochip_add(struct gpio_chip *chip)
                 base = gpiochip_find_base(chip->ngpio);
                 if (base < 0) {
                         status = base;
-                       goto fail_unlock;
+                       goto unlock;
                 }
                 chip->base = base;
         }
@@ -197,12 +683,23 @@ int gpiochip_add(struct gpio_chip *chip)
         if (status == 0) {
                 for (id = base; id < base + chip->ngpio; id++) {
                         gpio_desc[id].chip = chip;
-                       gpio_desc[id].flags = 0;
+
+                       /* REVISIT:  most hardware initializes GPIOs as
+                        * inputs (often with pullups enabled) so power
+                        * usage is minimized.  Linux code should set the
+                        * gpio direction first thing; but until it does,
+                        * we may expose the wrong direction in sysfs.
+                        */
+                       gpio_desc[id].flags = !chip->direction_input
+                               ? (1 << FLAG_IS_OUT)
+                               : 0;
                 }
         }
  
-fail_unlock:
+unlock:
         spin_unlock_irqrestore(&gpio_lock, flags);
+       if (status == 0)
+               status = gpiochip_export(chip);
  fail:
         /* failures here can mean systems won't boot... */
         if (status)
@@ -239,6 +736,10 @@ int gpiochip_remove(struct gpio_chip *chip)
         }
  
         spin_unlock_irqrestore(&gpio_lock, flags);
+
+       if (status == 0)
+               gpiochip_unexport(chip);
+
         return status;
  }
  EXPORT_SYMBOL_GPL(gpiochip_remove);
@@ -296,6 +797,8 @@ void gpio_free(unsigned gpio)
                 return;
         }
  
+       gpio_unexport(gpio);
+
         spin_lock_irqsave(&gpio_lock, flags);
  
         desc = &gpio_desc[gpio];
@@ -534,10 +1037,6 @@ EXPORT_SYMBOL_GPL(gpio_set_value_cansleep);
  
  #ifdef CONFIG_DEBUG_FS
  
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-
  static void gpiolib_dbg_show(struct seq_file *s, struct gpio_chip *chip)
  {
         unsigned                i;
@@ -614,17 +1113,28 @@ static int gpiolib_show(struct seq_file *s, void *unused)
         /* REVISIT this isn't locked against gpio_chip removal ... */
  
         for (gpio = 0; gpio_is_valid(gpio); gpio++) {
+               struct device *dev;
+
                 if (chip == gpio_desc[gpio].chip)
                         continue;
                 chip = gpio_desc[gpio].chip;
                 if (!chip)
                         continue;
  
-               seq_printf(s, "%sGPIOs %d-%d, %s%s:\n",
+               seq_printf(s, "%sGPIOs %d-%d",
                                 started ? "\n" : "",
-                               chip->base, chip->base + chip->ngpio - 1,
-                               chip->label ? : "generic",
-                               chip->can_sleep ? ", can sleep" : "");
+                               chip->base, chip->base + chip->ngpio - 1);
+               dev = chip->dev;
+               if (dev)
+                       seq_printf(s, ", %s/%s",
+                               dev->bus ? dev->bus->name : "no-bus",
+                               dev->bus_id);
+               if (chip->label)
+                       seq_printf(s, ", %s", chip->label);
+               if (chip->can_sleep)
+                       seq_printf(s, ", can sleep");
+               seq_printf(s, ":\n");
+
                 started = 1;
                 if (chip->dbg_show)
                         chip->dbg_show(s, chip);
diff --git a/drivers/gpio/max732x.c b/drivers/gpio/max732x.c

new file mode 100644 (file)

index 0000000..b51c813
--- /dev/null
+++ b/drivers/gpio/max732x.c
@@ -0,0 +1,385 @@
+/*
+ *  max732x.c - I2C Port Expander with 8/16 I/O
+ *
+ *  Copyright (C) 2007 Marvell International Ltd.
+ *  Copyright (C) 2008 Jack Ren <jack.ren@marvell.com>
+ *  Copyright (C) 2008 Eric Miao <eric.miao@marvell.com>
+ *
+ *  Derived from drivers/gpio/pca953x.c
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/gpio.h>
+
+#include <linux/i2c.h>
+#include <linux/i2c/max732x.h>
+
+
+/*
+ * Each port of MAX732x (including MAX7319) falls into one of the
+ * following three types:
+ *
+ *   - Push Pull Output
+ *   - Input
+ *   - Open Drain I/O
+ *
+ * designated by 'O', 'I' and 'P' individually according to MAXIM's
+ * datasheets.
+ *
+ * There are two groups of I/O ports, each group usually includes
+ * up to 8 I/O ports, and is accessed by a specific I2C address:
+ *
+ *   - Group A : by I2C address 0b'110xxxx
+ *   - Group B : by I2C address 0b'101xxxx
+ *
+ * where 'xxxx' is decided by the connections of pin AD2/AD0.  The
+ * address used also affects the initial state of output signals.
+ *
+ * Within each group of ports, there are five known combinations of
+ * I/O ports: 4I4O, 4P4O, 8I, 8P, 8O, see the definitions below for
+ * the detailed organization of these ports.
+ *
+ * GPIO numbers start from 'gpio_base + 0' to 'gpio_base + 8/16',
+ * and GPIOs from GROUP_A are numbered before those from GROUP_B
+ * (if there are two groups).
+ *
+ * NOTE: MAX7328/MAX7329 are drop-in replacements for PCF8574/a, so
+ * they are not supported by this driver.
+ */
+
+#define PORT_NONE      0x0     /* '/' No Port */
+#define PORT_OUTPUT    0x1     /* 'O' Push-Pull, Output Only */
+#define PORT_INPUT     0x2     /* 'I' Input Only */
+#define PORT_OPENDRAIN 0x3     /* 'P' Open-Drain, I/O */
+
+#define IO_4I4O                0x5AA5  /* O7 O6 I5 I4 I3 I2 O1 O0 */
+#define IO_4P4O                0x5FF5  /* O7 O6 P5 P4 P3 P2 O1 O0 */
+#define IO_8I          0xAAAA  /* I7 I6 I5 I4 I3 I2 I1 I0 */
+#define IO_8P          0xFFFF  /* P7 P6 P5 P4 P3 P2 P1 P0 */
+#define IO_8O          0x5555  /* O7 O6 O5 O4 O3 O2 O1 O0 */
+
+#define GROUP_A(x)     ((x) & 0xffff)  /* I2C Addr: 0b'110xxxx */
+#define GROUP_B(x)     ((x) << 16)     /* I2C Addr: 0b'101xxxx */
+
+static const struct i2c_device_id max732x_id[] = {
+       { "max7319", GROUP_A(IO_8I) },
+       { "max7320", GROUP_B(IO_8O) },
+       { "max7321", GROUP_A(IO_8P) },
+       { "max7322", GROUP_A(IO_4I4O) },
+       { "max7323", GROUP_A(IO_4P4O) },
+       { "max7324", GROUP_A(IO_8I) | GROUP_B(IO_8O) },
+       { "max7325", GROUP_A(IO_8P) | GROUP_B(IO_8O) },
+       { "max7326", GROUP_A(IO_4I4O) | GROUP_B(IO_8O) },
+       { "max7327", GROUP_A(IO_4P4O) | GROUP_B(IO_8O) },
+       { },
+};
+MODULE_DEVICE_TABLE(i2c, max732x_id);
+
+struct max732x_chip {
+       struct gpio_chip gpio_chip;
+
+       struct i2c_client *client;      /* "main" client */
+       struct i2c_client *client_dummy;
+       struct i2c_client *client_group_a;
+       struct i2c_client *client_group_b;
+
+       unsigned int    mask_group_a;
+       unsigned int    dir_input;
+       unsigned int    dir_output;
+
+       struct mutex    lock;
+       uint8_t         reg_out[2];
+};
+
+static int max732x_write(struct max732x_chip *chip, int group_a, uint8_t val)
+{
+       struct i2c_client *client;
+       int ret;
+
+       client = group_a ? chip->client_group_a : chip->client_group_b;
+       ret = i2c_smbus_write_byte(client, val);
+       if (ret < 0) {
+               dev_err(&client->dev, "failed writing\n");
+               return ret;
+       }
+
+       return 0;
+}
+
+static int max732x_read(struct max732x_chip *chip, int group_a, uint8_t *val)
+{
+       struct i2c_client *client;
+       int ret;
+
+       client = group_a ? chip->client_group_a : chip->client_group_b;
+       ret = i2c_smbus_read_byte(client);
+       if (ret < 0) {
+               dev_err(&client->dev, "failed reading\n");
+               return ret;
+       }
+
+       *val = (uint8_t)ret;
+       return 0;
+}
+
+static inline int is_group_a(struct max732x_chip *chip, unsigned off)
+{
+       return (1u << off) & chip->mask_group_a;
+}
+
+static int max732x_gpio_get_value(struct gpio_chip *gc, unsigned off)
+{
+       struct max732x_chip *chip;
+       uint8_t reg_val;
+       int ret;
+
+       chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+       ret = max732x_read(chip, is_group_a(chip, off), &reg_val);
+       if (ret < 0)
+               return 0;
+
+       return reg_val & (1u << (off & 0x7));
+}
+
+static void max732x_gpio_set_value(struct gpio_chip *gc, unsigned off, int val)
+{
+       struct max732x_chip *chip;
+       uint8_t reg_out, mask = 1u << (off & 0x7);
+       int ret;
+
+       chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+       mutex_lock(&chip->lock);
+
+       reg_out = (off > 7) ? chip->reg_out[1] : chip->reg_out[0];
+       reg_out = (val) ? reg_out | mask : reg_out & ~mask;
+
+       ret = max732x_write(chip, is_group_a(chip, off), reg_out);
+       if (ret < 0)
+               goto out;
+
+       /* update the shadow register then */
+       if (off > 7)
+               chip->reg_out[1] = reg_out;
+       else
+               chip->reg_out[0] = reg_out;
+out:
+       mutex_unlock(&chip->lock);
+}
+
+static int max732x_gpio_direction_input(struct gpio_chip *gc, unsigned off)
+{
+       struct max732x_chip *chip;
+       unsigned int mask = 1u << off;
+
+       chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+       if ((mask & chip->dir_input) == 0) {
+               dev_dbg(&chip->client->dev, "%s port %d is output only\n",
+                       chip->client->name, off);
+               return -EACCES;
+       }
+
+       return 0;
+}
+
+static int max732x_gpio_direction_output(struct gpio_chip *gc,
+               unsigned off, int val)
+{
+       struct max732x_chip *chip;
+       unsigned int mask = 1u << off;
+
+       chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+       if ((mask & chip->dir_output) == 0) {
+               dev_dbg(&chip->client->dev, "%s port %d is input only\n",
+                       chip->client->name, off);
+               return -EACCES;
+       }
+
+       max732x_gpio_set_value(gc, off, val);
+       return 0;
+}
+
+static int __devinit max732x_setup_gpio(struct max732x_chip *chip,
+                                       const struct i2c_device_id *id,
+                                       unsigned gpio_start)
+{
+       struct gpio_chip *gc = &chip->gpio_chip;
+       uint32_t id_data = id->driver_data;
+       int i, port = 0;
+
+       for (i = 0; i < 16; i++, id_data >>= 2) {
+               unsigned int mask = 1 << port;
+
+               switch (id_data & 0x3) {
+               case PORT_OUTPUT:
+                       chip->dir_output |= mask;
+                       break;
+               case PORT_INPUT:
+                       chip->dir_input |= mask;
+                       break;
+               case PORT_OPENDRAIN:
+                       chip->dir_output |= mask;
+                       chip->dir_input |= mask;
+                       break;
+               default:
+                       continue;
+               }
+
+               if (i < 8)
+                       chip->mask_group_a |= mask;
+               port++;
+       }
+
+       if (chip->dir_input)
+               gc->direction_input = max732x_gpio_direction_input;
+       if (chip->dir_output) {
+               gc->direction_output = max732x_gpio_direction_output;
+               gc->set = max732x_gpio_set_value;
+       }
+       gc->get = max732x_gpio_get_value;
+       gc->can_sleep = 1;
+
+       gc->base = gpio_start;
+       gc->ngpio = port;
+       gc->label = chip->client->name;
+       gc->owner = THIS_MODULE;
+
+       return port;
+}
+
+static int __devinit max732x_probe(struct i2c_client *client,
+                                  const struct i2c_device_id *id)
+{
+       struct max732x_platform_data *pdata;
+       struct max732x_chip *chip;
+       struct i2c_client *c;
+       uint16_t addr_a, addr_b;
+       int ret, nr_port;
+
+       pdata = client->dev.platform_data;
+       if (pdata == NULL)
+               return -ENODEV;
+
+       chip = kzalloc(sizeof(struct max732x_chip), GFP_KERNEL);
+       if (chip == NULL)
+               return -ENOMEM;
+       chip->client = client;
+
+       nr_port = max732x_setup_gpio(chip, id, pdata->gpio_base);
+
+       addr_a = (client->addr & 0x0f) | 0x60;
+       addr_b = (client->addr & 0x0f) | 0x50;
+
+       switch (client->addr & 0x70) {
+       case 0x60:
+               chip->client_group_a = client;
+               if (nr_port > 7) {
+                       c = i2c_new_dummy(client->adapter, addr_b);
+                       chip->client_group_b = chip->client_dummy = c;
+               }
+               break;
+       case 0x50:
+               chip->client_group_b = client;
+               if (nr_port > 7) {
+                       c = i2c_new_dummy(client->adapter, addr_a);
+                       chip->client_group_a = chip->client_dummy = c;
+               }
+               break;
+       default:
+               dev_err(&client->dev, "invalid I2C address specified %02x\n",
+                               client->addr);
+               ret = -EINVAL;
+               goto out_failed;
+       }
+
+       mutex_init(&chip->lock);
+
+       max732x_read(chip, is_group_a(chip, 0), &chip->reg_out[0]);
+       if (nr_port > 7)
+               max732x_read(chip, is_group_a(chip, 8), &chip->reg_out[1]);
+
+       ret = gpiochip_add(&chip->gpio_chip);
+       if (ret)
+               goto out_failed;
+
+       if (pdata->setup) {
+               ret = pdata->setup(client, chip->gpio_chip.base,
+                               chip->gpio_chip.ngpio, pdata->context);
+               if (ret < 0)
+                       dev_warn(&client->dev, "setup failed, %d\n", ret);
+       }
+
+       i2c_set_clientdata(client, chip);
+       return 0;
+
+out_failed:
+       kfree(chip);
+       return ret;
+}
+
+static int __devexit max732x_remove(struct i2c_client *client)
+{
+       struct max732x_platform_data *pdata = client->dev.platform_data;
+       struct max732x_chip *chip = i2c_get_clientdata(client);
+       int ret;
+
+       if (pdata->teardown) {
+               ret = pdata->teardown(client, chip->gpio_chip.base,
+                               chip->gpio_chip.ngpio, pdata->context);
+               if (ret < 0) {
+                       dev_err(&client->dev, "%s failed, %d\n",
+                                       "teardown", ret);
+                       return ret;
+               }
+       }
+
+       ret = gpiochip_remove(&chip->gpio_chip);
+       if (ret) {
+               dev_err(&client->dev, "%s failed, %d\n",
+                               "gpiochip_remove()", ret);
+               return ret;
+       }
+
+       /* unregister any dummy i2c_client */
+       if (chip->client_dummy)
+               i2c_unregister_device(chip->client_dummy);
+
+       kfree(chip);
+       return 0;
+}
+
+static struct i2c_driver max732x_driver = {
+       .driver = {
+               .name   = "max732x",
+               .owner  = THIS_MODULE,
+       },
+       .probe          = max732x_probe,
+       .remove         = __devexit_p(max732x_remove),
+       .id_table       = max732x_id,
+};
+
+static int __init max732x_init(void)
+{
+       return i2c_add_driver(&max732x_driver);
+}
+module_init(max732x_init);
+
+static void __exit max732x_exit(void)
+{
+       i2c_del_driver(&max732x_driver);
+}
+module_exit(max732x_exit);
+
+MODULE_AUTHOR("Eric Miao <eric.miao@marvell.com>");
+MODULE_DESCRIPTION("GPIO expander driver for MAX732X");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpio/mcp23s08.c b/drivers/gpio/mcp23s08.c

index 7f92fdd5f0e2888b2c719e67873a5a7f0b435267..8a1b405fefda040f0001a5799299eefbe55f5768 100644 (file)
--- a/drivers/gpio/mcp23s08.c
+++ b/drivers/gpio/mcp23s08.c
@@ -40,15 +40,26 @@ struct mcp23s08 {
         struct spi_device       *spi;
         u8                      addr;
  
+       u8                      cache[11];
         /* lock protects the cached values */
         struct mutex            lock;
-       u8                      cache[11];
  
         struct gpio_chip        chip;
  
         struct work_struct      work;
  };
  
+/* A given spi_device can represent up to four mcp23s08 chips
+ * sharing the same chipselect but using different addresses
+ * (e.g. chips #0 and #3 might be populated, but not #1 or $2).
+ * Driver data holds all the per-chip data.
+ */
+struct mcp23s08_driver_data {
+       unsigned                ngpio;
+       struct mcp23s08         *mcp[4];
+       struct mcp23s08         chip[];
+};
+
  static int mcp23s08_read(struct mcp23s08 *mcp, unsigned reg)
  {
         u8      tx[2], rx[1];
@@ -208,25 +219,18 @@ done:
  
  /*----------------------------------------------------------------------*/
  
-static int mcp23s08_probe(struct spi_device *spi)
+static int mcp23s08_probe_one(struct spi_device *spi, unsigned addr,
+               unsigned base, unsigned pullups)
  {
-       struct mcp23s08                 *mcp;
-       struct mcp23s08_platform_data   *pdata;
+       struct mcp23s08_driver_data     *data = spi_get_drvdata(spi);
+       struct mcp23s08                 *mcp = data->mcp[addr];
         int                             status;
         int                             do_update = 0;
  
-       pdata = spi->dev.platform_data;
-       if (!pdata || pdata->slave > 3 || !pdata->base)
-               return -ENODEV;
-
-       mcp = kzalloc(sizeof *mcp, GFP_KERNEL);
-       if (!mcp)
-               return -ENOMEM;
-
         mutex_init(&mcp->lock);
  
         mcp->spi = spi;
-       mcp->addr = 0x40 | (pdata->slave << 1);
+       mcp->addr = 0x40 | (addr << 1);
  
         mcp->chip.label = "mcp23s08",
  
@@ -236,26 +240,28 @@ static int mcp23s08_probe(struct spi_device *spi)
         mcp->chip.set = mcp23s08_set;
         mcp->chip.dbg_show = mcp23s08_dbg_show;
  
-       mcp->chip.base = pdata->base;
+       mcp->chip.base = base;
         mcp->chip.ngpio = 8;
         mcp->chip.can_sleep = 1;
+       mcp->chip.dev = &spi->dev;
         mcp->chip.owner = THIS_MODULE;
  
-       spi_set_drvdata(spi, mcp);
-
-       /* verify MCP_IOCON.SEQOP = 0, so sequential reads work */
+       /* verify MCP_IOCON.SEQOP = 0, so sequential reads work,
+        * and MCP_IOCON.HAEN = 1, so we work with all chips.
+        */
         status = mcp23s08_read(mcp, MCP_IOCON);
         if (status < 0)
                 goto fail;
-       if (status & IOCON_SEQOP) {
+       if ((status & IOCON_SEQOP) || !(status & IOCON_HAEN)) {
                 status &= ~IOCON_SEQOP;
+               status |= IOCON_HAEN;
                 status = mcp23s08_write(mcp, MCP_IOCON, (u8) status);
                 if (status < 0)
                         goto fail;
         }
  
         /* configure ~100K pullups */
-       status = mcp23s08_write(mcp, MCP_GPPU, pdata->pullups);
+       status = mcp23s08_write(mcp, MCP_GPPU, pullups);
         if (status < 0)
                 goto fail;
  
@@ -282,11 +288,58 @@ static int mcp23s08_probe(struct spi_device *spi)
                 tx[1] = MCP_IPOL;
                 memcpy(&tx[2], &mcp->cache[MCP_IPOL], sizeof(tx) - 2);
                 status = spi_write_then_read(mcp->spi, tx, sizeof tx, NULL, 0);
-
-               /* FIXME check status... */
+               if (status < 0)
+                       goto fail;
         }
  
         status = gpiochip_add(&mcp->chip);
+fail:
+       if (status < 0)
+               dev_dbg(&spi->dev, "can't setup chip %d, --> %d\n",
+                               addr, status);
+       return status;
+}
+
+static int mcp23s08_probe(struct spi_device *spi)
+{
+       struct mcp23s08_platform_data   *pdata;
+       unsigned                        addr;
+       unsigned                        chips = 0;
+       struct mcp23s08_driver_data     *data;
+       int                             status;
+       unsigned                        base;
+
+       pdata = spi->dev.platform_data;
+       if (!pdata || !gpio_is_valid(pdata->base))
+               return -ENODEV;
+
+       for (addr = 0; addr < 4; addr++) {
+               if (!pdata->chip[addr].is_present)
+                       continue;
+               chips++;
+       }
+       if (!chips)
+               return -ENODEV;
+
+       data = kzalloc(sizeof *data + chips * sizeof(struct mcp23s08),
+                       GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+       spi_set_drvdata(spi, data);
+
+       base = pdata->base;
+       for (addr = 0; addr < 4; addr++) {
+               if (!pdata->chip[addr].is_present)
+                       continue;
+               chips--;
+               data->mcp[addr] = &data->chip[chips];
+               status = mcp23s08_probe_one(spi, addr, base,
+                               pdata->chip[addr].pullups);
+               if (status < 0)
+                       goto fail;
+               base += 8;
+       }
+       data->ngpio = base - pdata->base;
  
         /* NOTE:  these chips have a relatively sane IRQ framework, with
          * per-signal masking and level/edge triggering.  It's not yet
@@ -294,8 +347,9 @@ static int mcp23s08_probe(struct spi_device *spi)
          */
  
         if (pdata->setup) {
-               status = pdata->setup(spi, mcp->chip.base,
-                               mcp->chip.ngpio, pdata->context);
+               status = pdata->setup(spi,
+                               pdata->base, data->ngpio,
+                               pdata->context);
                 if (status < 0)
                         dev_dbg(&spi->dev, "setup --> %d\n", status);
         }
@@ -303,19 +357,29 @@ static int mcp23s08_probe(struct spi_device *spi)
         return 0;
  
  fail:
-       kfree(mcp);
+       for (addr = 0; addr < 4; addr++) {
+               int tmp;
+
+               if (!data->mcp[addr])
+                       continue;
+               tmp = gpiochip_remove(&data->mcp[addr]->chip);
+               if (tmp < 0)
+                       dev_err(&spi->dev, "%s --> %d\n", "remove", tmp);
+       }
+       kfree(data);
         return status;
  }
  
  static int mcp23s08_remove(struct spi_device *spi)
  {
-       struct mcp23s08                 *mcp = spi_get_drvdata(spi);
+       struct mcp23s08_driver_data     *data = spi_get_drvdata(spi);
         struct mcp23s08_platform_data   *pdata = spi->dev.platform_data;
+       unsigned                        addr;
         int                             status = 0;
  
         if (pdata->teardown) {
                 status = pdata->teardown(spi,
-                               mcp->chip.base, mcp->chip.ngpio,
+                               pdata->base, data->ngpio,
                                 pdata->context);
                 if (status < 0) {
                         dev_err(&spi->dev, "%s --> %d\n", "teardown", status);
@@ -323,11 +387,20 @@ static int mcp23s08_remove(struct spi_device *spi)
                 }
         }
  
-       status = gpiochip_remove(&mcp->chip);
+       for (addr = 0; addr < 4; addr++) {
+               int tmp;
+
+               if (!data->mcp[addr])
+                       continue;
+
+               tmp = gpiochip_remove(&data->mcp[addr]->chip);
+               if (tmp < 0) {
+                       dev_err(&spi->dev, "%s --> %d\n", "remove", tmp);
+                       status = tmp;
+               }
+       }
         if (status == 0)
-               kfree(mcp);
-       else
-               dev_err(&spi->dev, "%s --> %d\n", "remove", status);
+               kfree(data);
         return status;
  }
  
@@ -355,4 +428,3 @@ static void __exit mcp23s08_exit(void)
  module_exit(mcp23s08_exit);
  
  MODULE_LICENSE("GPL");
-
diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c

index a380730b61abe49800af8e655eaf5735d0524ad0..cc8468692ae0967b295e196c5073fb102c4894c7 100644 (file)
--- a/drivers/gpio/pca953x.c
+++ b/drivers/gpio/pca953x.c
@@ -188,6 +188,7 @@ static void pca953x_setup_gpio(struct pca953x_chip *chip, int gpios)
         gc->base = chip->gpio_start;
         gc->ngpio = gpios;
         gc->label = chip->client->name;
+       gc->dev = &chip->client->dev;
         gc->owner = THIS_MODULE;
  }
  
diff --git a/drivers/gpio/pcf857x.c b/drivers/gpio/pcf857x.c

index d25d356c4f200c9a197a5ac84373f7441e415e3a..fc9c6ae739ee74f8f77b8836bd5e7894001e5d9d 100644 (file)
--- a/drivers/gpio/pcf857x.c
+++ b/drivers/gpio/pcf857x.c
@@ -200,6 +200,7 @@ static int pcf857x_probe(struct i2c_client *client,
  
         gpio->chip.base = pdata->gpio_base;
         gpio->chip.can_sleep = 1;
+       gpio->chip.dev = &client->dev;
         gpio->chip.owner = THIS_MODULE;
  
         /* NOTE:  the OnSemi jlc1562b is also largely compatible with
diff --git a/drivers/i2c/chips/Kconfig b/drivers/i2c/chips/Kconfig

index 50e0a465374193d6f93d4ceb3e74367c0a0b2ec7..a95cb9465d656949bed4429279a75c35fb547b23 100644 (file)
--- a/drivers/i2c/chips/Kconfig
+++ b/drivers/i2c/chips/Kconfig
@@ -126,7 +126,7 @@ config ISP1301_OMAP
  
  config TPS65010
         tristate "TPS6501x Power Management chips"
-       depends on HAVE_GPIO_LIB
+       depends on GPIOLIB
         default y if MACH_OMAP_H2 || MACH_OMAP_H3 || MACH_OMAP_OSK
         help
           If you say yes here you get support for the TPS6501x series of
diff --git a/drivers/i2c/chips/tps65010.c b/drivers/i2c/chips/tps65010.c

index 85949685191bb3218e75cb5547aab3750b7e291d..cf02e8fceb42a7a1ef4ee91c42d0fb8cfdd14ecb 100644 (file)
--- a/drivers/i2c/chips/tps65010.c
+++ b/drivers/i2c/chips/tps65010.c
@@ -636,6 +636,8 @@ static int tps65010_probe(struct i2c_client *client,
                 tps->outmask = board->outmask;
  
                 tps->chip.label = client->name;
+               tps->chip.dev = &client->dev;
+               tps->chip.owner = THIS_MODULE;
  
                 tps->chip.set = tps65010_gpio_set;
                 tps->chip.direction_output = tps65010_output;
diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c

index aad664d5259fd279a17b2d176aaa91e2db267d2c..0d395979b2d19c2a35ddb31e6d8c915c363fcfd9 100644 (file)
--- a/drivers/input/serio/hp_sdc.c
+++ b/drivers/input/serio/hp_sdc.c
@@ -70,7 +70,6 @@
  #include <linux/semaphore.h>
  #include <linux/slab.h>
  #include <linux/hil.h>
-#include <linux/semaphore.h>
  #include <asm/io.h>
  #include <asm/system.h>
  
diff --git a/drivers/isdn/hisax/st5481.h b/drivers/isdn/hisax/st5481.h

index 2044e7173ab46a9d81f5f1b646c361383c8245a1..cff7a6354334234ca9f2030229bf5e49b08de432 100644 (file)
--- a/drivers/isdn/hisax/st5481.h
+++ b/drivers/isdn/hisax/st5481.h
@@ -220,7 +220,7 @@ enum {
  #define ERR(format, arg...) \
  printk(KERN_ERR "%s:%s: " format "\n" , __FILE__,  __func__ , ## arg)
  
-#define WARN(format, arg...) \
+#define WARNING(format, arg...) \
  printk(KERN_WARNING "%s:%s: " format "\n" , __FILE__,  __func__ , ## arg)
  
  #define INFO(format, arg...) \
@@ -412,7 +412,7 @@ struct st5481_adapter {
  ({ \
         int status; \
         if ((status = usb_submit_urb(urb, mem_flags)) < 0) { \
-               WARN("usb_submit_urb failed,status=%d", status); \
+               WARNING("usb_submit_urb failed,status=%d", status); \
         } \
          status; \
  })
diff --git a/drivers/isdn/hisax/st5481_b.c b/drivers/isdn/hisax/st5481_b.c

index fa64115cd7c7570267a2adec29c19392d9dcf448..0074b600a0efa2d98b86c8ef16f97b0ebfdd4404 100644 (file)
--- a/drivers/isdn/hisax/st5481_b.c
+++ b/drivers/isdn/hisax/st5481_b.c
@@ -180,7 +180,7 @@ static void usb_b_out_complete(struct urb *urb)
                                 DBG(4,"urb killed status %d", urb->status);
                                 return; // Give up
                         default: 
-                               WARN("urb status %d",urb->status);
+                               WARNING("urb status %d",urb->status);
                                 if (b_out->busy == 0) {
                                         st5481_usb_pipe_reset(adapter, (bcs->channel+1)*2 | USB_DIR_OUT, NULL, NULL);
                                 }
@@ -372,6 +372,6 @@ void st5481_b_l2l1(struct hisax_if *ifc, int pr, void *arg)
                 B_L1L2(bcs, PH_DEACTIVATE | INDICATION, NULL);
                 break;
         default:
-               WARN("pr %#x\n", pr);
+               WARNING("pr %#x\n", pr);
         }
  }
diff --git a/drivers/isdn/hisax/st5481_d.c b/drivers/isdn/hisax/st5481_d.c

index b8c4855cc8894c700169184e3b04ae5500dcf86c..077991c1cd050d9f93c0228155877d8377b95b88 100644 (file)
--- a/drivers/isdn/hisax/st5481_d.c
+++ b/drivers/isdn/hisax/st5481_d.c
@@ -389,7 +389,7 @@ static void usb_d_out_complete(struct urb *urb)
                                 DBG(1,"urb killed status %d", urb->status);
                                 break;
                         default: 
-                               WARN("urb status %d",urb->status);
+                               WARNING("urb status %d",urb->status);
                                 if (d_out->busy == 0) {
                                         st5481_usb_pipe_reset(adapter, EP_D_OUT | USB_DIR_OUT, fifo_reseted, adapter);
                                 }
@@ -420,7 +420,7 @@ static void dout_start_xmit(struct FsmInst *fsm, int event, void *arg)
         isdnhdlc_out_init(&d_out->hdlc_state, 1, 0);
  
         if (test_and_set_bit(buf_nr, &d_out->busy)) {
-               WARN("ep %d urb %d busy %#lx", EP_D_OUT, buf_nr, d_out->busy);
+               WARNING("ep %d urb %d busy %#lx", EP_D_OUT, buf_nr, d_out->busy);
                 return;
         }
         urb = d_out->urb[buf_nr];
@@ -601,7 +601,7 @@ void st5481_d_l2l1(struct hisax_if *hisax_d_if, int pr, void *arg)
                 FsmEvent(&adapter->d_out.fsm, EV_DOUT_START_XMIT, NULL);
                 break;
         default:
-               WARN("pr %#x\n", pr);
+               WARNING("pr %#x\n", pr);
                 break;
         }
  }
diff --git a/drivers/isdn/hisax/st5481_usb.c b/drivers/isdn/hisax/st5481_usb.c

index 427a8b0520f5aee14684503f208e5faa6f841d1b..ec3c0e507669804236ce5952fb91a19d9e721947 100644 (file)
--- a/drivers/isdn/hisax/st5481_usb.c
+++ b/drivers/isdn/hisax/st5481_usb.c
@@ -66,7 +66,7 @@ static void usb_ctrl_msg(struct st5481_adapter *adapter,
         struct ctrl_msg *ctrl_msg;
         
         if ((w_index = fifo_add(&ctrl->msg_fifo.f)) < 0) {
-               WARN("control msg FIFO full");
+               WARNING("control msg FIFO full");
                 return;
         }
         ctrl_msg = &ctrl->msg_fifo.data[w_index]; 
@@ -139,7 +139,7 @@ static void usb_ctrl_complete(struct urb *urb)
                                 DBG(1,"urb killed status %d", urb->status);
                                 return; // Give up
                         default: 
-                               WARN("urb status %d",urb->status);
+                               WARNING("urb status %d",urb->status);
                                 break;
                 }
         }
@@ -198,7 +198,7 @@ static void usb_int_complete(struct urb *urb)
                         DBG(2, "urb shutting down with status: %d", urb->status);
                         return;
                 default:
-                       WARN("nonzero urb status received: %d", urb->status);
+                       WARNING("nonzero urb status received: %d", urb->status);
                         goto exit;
         }
  
@@ -235,7 +235,7 @@ static void usb_int_complete(struct urb *urb)
  exit:
         status = usb_submit_urb (urb, GFP_ATOMIC);
         if (status)
-               WARN("usb_submit_urb failed with result %d", status);
+               WARNING("usb_submit_urb failed with result %d", status);
  }
  
  /* ======================================================================
@@ -257,7 +257,7 @@ int st5481_setup_usb(struct st5481_adapter *adapter)
         DBG(2,"");
         
         if ((status = usb_reset_configuration (dev)) < 0) {
-               WARN("reset_configuration failed,status=%d",status);
+               WARNING("reset_configuration failed,status=%d",status);
                 return status;
         }
  
@@ -269,7 +269,7 @@ int st5481_setup_usb(struct st5481_adapter *adapter)
  
         // Check if the config is sane
         if ( altsetting->desc.bNumEndpoints != 7 ) {
-               WARN("expecting 7 got %d endpoints!", altsetting->desc.bNumEndpoints);
+               WARNING("expecting 7 got %d endpoints!", altsetting->desc.bNumEndpoints);
                 return -EINVAL;
         }
  
@@ -279,7 +279,7 @@ int st5481_setup_usb(struct st5481_adapter *adapter)
  
         // Use alternative setting 3 on interface 0 to have 2B+D
         if ((status = usb_set_interface (dev, 0, 3)) < 0) {
-               WARN("usb_set_interface failed,status=%d",status);
+               WARNING("usb_set_interface failed,status=%d",status);
                 return status;
         }
  
@@ -497,7 +497,7 @@ static void usb_in_complete(struct urb *urb)
                                 DBG(1,"urb killed status %d", urb->status);
                                 return; // Give up
                         default: 
-                               WARN("urb status %d",urb->status);
+                               WARNING("urb status %d",urb->status);
                                 break;
                 }
         }
@@ -523,7 +523,7 @@ static void usb_in_complete(struct urb *urb)
                         DBG(4,"count=%d",status);
                         DBG_PACKET(0x400, in->rcvbuf, status);
                         if (!(skb = dev_alloc_skb(status))) {
-                               WARN("receive out of memory\n");
+                               WARNING("receive out of memory\n");
                                 break;
                         }
                         memcpy(skb_put(skb, status), in->rcvbuf, status);
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c

index 1a8de57289eb97e5877b9016855cfe9e99adaac1..37344aaee22f3ab44910a18576489466c0b8c30d 100644 (file)
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -98,16 +98,20 @@ static u32 lg_get_features(struct virtio_device *vdev)
         return features;
  }
  
-static void lg_set_features(struct virtio_device *vdev, u32 features)
+static void lg_finalize_features(struct virtio_device *vdev)
  {
-       unsigned int i;
+       unsigned int i, bits;
         struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
         /* Second half of bitmap is features we accept. */
         u8 *out_features = lg_features(desc) + desc->feature_len;
  
+       /* Give virtio_ring a chance to accept features. */
+       vring_transport_features(vdev);
+
         memset(out_features, 0, desc->feature_len);
-       for (i = 0; i < min(desc->feature_len * 8, 32); i++) {
-               if (features & (1 << i))
+       bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
+       for (i = 0; i < bits; i++) {
+               if (test_bit(i, vdev->features))
                         out_features[i / 8] |= (1 << (i % 8));
         }
  }
@@ -297,7 +301,7 @@ static void lg_del_vq(struct virtqueue *vq)
  /* The ops structure which hooks everything together. */
  static struct virtio_config_ops lguest_config_ops = {
         .get_features = lg_get_features,
-       .set_features = lg_set_features,
+       .finalize_features = lg_finalize_features,
         .get = lg_get,
         .set = lg_set,
         .get_status = lg_get_status,
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig

index 9f93c29fed355074cb443184da54343102c42625..1f57a99fd968b9c4802d8669b5ff5aba5c8b5500 100644 (file)
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -19,6 +19,14 @@ config MFD_SM501
           interface. The device may be connected by PCI or local bus with
           varying functions enabled.
  
+config MFD_SM501_GPIO
+       bool "Export GPIO via GPIO layer"
+       depends on MFD_SM501 && HAVE_GPIO_LIB
+        ---help---
+        This option uses the gpio library layer to export the 64 GPIO
+        lines on the SM501. The platform data is used to supply the
+        base number for the first GPIO line to register.
+
  config MFD_ASIC3
         bool "Support for Compaq ASIC3"
         depends on GENERIC_HARDIRQS && HAVE_GPIO_LIB && ARM
@@ -28,7 +36,7 @@ config MFD_ASIC3
  
  config HTC_EGPIO
         bool "HTC EGPIO support"
-       depends on GENERIC_HARDIRQS && HAVE_GPIO_LIB && ARM
+       depends on GENERIC_HARDIRQS && GPIOLIB && ARM
         help
             This driver supports the CPLD egpio chip present on
             several HTC phones.  It provides basic support for input
@@ -44,7 +52,7 @@ config HTC_PASIC3
  
  config MFD_TC6393XB
         bool "Support Toshiba TC6393XB"
-       depends on HAVE_GPIO_LIB
+       depends on GPIOLIB
         select MFD_CORE
         help
           Support for Toshiba Mobile IO Controller TC6393XB
diff --git a/drivers/mfd/htc-egpio.c b/drivers/mfd/htc-egpio.c

index 8872cc0775190c296ab63bb67160b03f99cb3723..6be43172dc656911cd5c9f41f8717426e2be9e54 100644 (file)
--- a/drivers/mfd/htc-egpio.c
+++ b/drivers/mfd/htc-egpio.c
@@ -318,6 +318,8 @@ static int __init egpio_probe(struct platform_device *pdev)
                 ei->chip[i].dev = &(pdev->dev);
                 chip = &(ei->chip[i].chip);
                 chip->label           = "htc-egpio";
+               chip->dev             = &pdev->dev;
+               chip->owner           = THIS_MODULE;
                 chip->get             = egpio_get;
                 chip->set             = egpio_set;
                 chip->direction_input = egpio_direction_input;
diff --git a/drivers/mfd/htc-pasic3.c b/drivers/mfd/htc-pasic3.c

index 633cbba072f024f013a17f42cc8b3181c63ef8a4..91b294dcc13388ebdec67de1e5b54aaee30eb6e6 100644 (file)
--- a/drivers/mfd/htc-pasic3.c
+++ b/drivers/mfd/htc-pasic3.c
@@ -238,6 +238,8 @@ static int pasic3_remove(struct platform_device *pdev)
         return 0;
  }
  
+MODULE_ALIAS("platform:pasic3");
+
  static struct platform_driver pasic3_driver = {
         .driver         = {
                 .name   = "pasic3",
diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c

index 1eab7cffceaa10180b79df6cd404f5146f81a189..b5272b5ce3fae295b7edc527463e137d6e5a1a29 100644 (file)
--- a/drivers/mfd/mcp-sa11x0.c
+++ b/drivers/mfd/mcp-sa11x0.c
@@ -242,6 +242,8 @@ static int mcp_sa11x0_resume(struct platform_device *dev)
  /*
   * The driver for the SA11x0 MCP port.
   */
+MODULE_ALIAS("platform:sa11x0-mcp");
+
  static struct platform_driver mcp_sa11x0_driver = {
         .probe          = mcp_sa11x0_probe,
         .remove         = mcp_sa11x0_remove,
diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c

index d7d88ce053a661fd4aa364c373509953340224e3..0454be4266c1cf7371dd359b9839c09fcb126f7d 100644 (file)
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -36,7 +36,7 @@ static int mfd_add_device(struct platform_device *parent,
         if (ret)
                 goto fail_device;
  
-       memzero(res, sizeof(res));
+       memset(res, 0, sizeof(res));
         for (r = 0; r < cell->num_resources; r++) {
                 res[r].name = cell->resources[r].name;
                 res[r].flags = cell->resources[r].flags;
diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c

index 2fe64734d8afd2d6e893e40224ee56ff6b935706..7aebad4c06ff939114bdff7f5f58874ee197f96d 100644 (file)
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -19,6 +19,7 @@
  #include <linux/device.h>
  #include <linux/platform_device.h>
  #include <linux/pci.h>
+#include <linux/i2c-gpio.h>
  
  #include <linux/sm501.h>
  #include <linux/sm501-regs.h>
@@ -31,10 +32,37 @@ struct sm501_device {
         struct platform_device          pdev;
  };
  
+struct sm501_gpio;
+
+#ifdef CONFIG_MFD_SM501_GPIO
+#include <linux/gpio.h>
+
+struct sm501_gpio_chip {
+       struct gpio_chip        gpio;
+       struct sm501_gpio       *ourgpio;       /* to get back to parent. */
+       void __iomem            *regbase;
+};
+
+struct sm501_gpio {
+       struct sm501_gpio_chip  low;
+       struct sm501_gpio_chip  high;
+       spinlock_t              lock;
+
+       unsigned int             registered : 1;
+       void __iomem            *regs;
+       struct resource         *regs_res;
+};
+#else
+struct sm501_gpio {
+       /* no gpio support, empty definition for sm501_devdata. */
+};
+#endif
+
  struct sm501_devdata {
         spinlock_t                       reg_lock;
         struct mutex                     clock_lock;
         struct list_head                 devices;
+       struct sm501_gpio                gpio;
  
         struct device                   *dev;
         struct resource                 *io_res;
@@ -42,6 +70,7 @@ struct sm501_devdata {
         struct resource                 *regs_claim;
         struct sm501_platdata           *platdata;
  
+
         unsigned int                     in_suspend;
         unsigned long                    pm_misc;
  
@@ -52,6 +81,7 @@ struct sm501_devdata {
         unsigned int                     rev;
  };
  
+
  #define MHZ (1000 * 1000)
  
  #ifdef DEBUG
@@ -276,58 +306,6 @@ unsigned long sm501_modify_reg(struct device *dev,
  
  EXPORT_SYMBOL_GPL(sm501_modify_reg);
  
-unsigned long sm501_gpio_get(struct device *dev,
-                            unsigned long gpio)
-{
-       struct sm501_devdata *sm = dev_get_drvdata(dev);
-       unsigned long result;
-       unsigned long reg;
-
-       reg = (gpio > 32) ? SM501_GPIO_DATA_HIGH : SM501_GPIO_DATA_LOW;
-       result = readl(sm->regs + reg);
-
-       result >>= (gpio & 31);
-       return result & 1UL;
-}
-
-EXPORT_SYMBOL_GPL(sm501_gpio_get);
-
-void sm501_gpio_set(struct device *dev,
-                   unsigned long gpio,
-                   unsigned int to,
-                   unsigned int dir)
-{
-       struct sm501_devdata *sm = dev_get_drvdata(dev);
-
-       unsigned long bit = 1 << (gpio & 31);
-       unsigned long base;
-       unsigned long save;
-       unsigned long val;
-
-       base = (gpio > 32) ? SM501_GPIO_DATA_HIGH : SM501_GPIO_DATA_LOW;
-       base += SM501_GPIO;
-
-       spin_lock_irqsave(&sm->reg_lock, save);
-
-       val = readl(sm->regs + base) & ~bit;
-       if (to)
-               val |= bit;
-       writel(val, sm->regs + base);
-
-       val = readl(sm->regs + SM501_GPIO_DDR_LOW) & ~bit;
-       if (dir)
-               val |= bit;
-
-       writel(val, sm->regs + SM501_GPIO_DDR_LOW);
-       sm501_sync_regs(sm);
-
-       spin_unlock_irqrestore(&sm->reg_lock, save);
-
-}
-
-EXPORT_SYMBOL_GPL(sm501_gpio_set);
-
-
  /* sm501_unit_power
   *
   * alters the power active gate to set specific units on or off
@@ -906,6 +884,313 @@ static int sm501_register_display(struct sm501_devdata *sm,
         return sm501_register_device(sm, pdev);
  }
  
+#ifdef CONFIG_MFD_SM501_GPIO
+
+static inline struct sm501_gpio_chip *to_sm501_gpio(struct gpio_chip *gc)
+{
+       return container_of(gc, struct sm501_gpio_chip, gpio);
+}
+
+static inline struct sm501_devdata *sm501_gpio_to_dev(struct sm501_gpio *gpio)
+{
+       return container_of(gpio, struct sm501_devdata, gpio);
+}
+
+static int sm501_gpio_get(struct gpio_chip *chip, unsigned offset)
+
+{
+       struct sm501_gpio_chip *smgpio = to_sm501_gpio(chip);
+       unsigned long result;
+
+       result = readl(smgpio->regbase + SM501_GPIO_DATA_LOW);
+       result >>= offset;
+
+       return result & 1UL;
+}
+
+static void sm501_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+
+{
+       struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
+       struct sm501_gpio *smgpio = smchip->ourgpio;
+       unsigned long bit = 1 << offset;
+       void __iomem *regs = smchip->regbase;
+       unsigned long save;
+       unsigned long val;
+
+       dev_dbg(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d)\n",
+               __func__, chip, offset);
+
+       spin_lock_irqsave(&smgpio->lock, save);
+
+       val = readl(regs + SM501_GPIO_DATA_LOW) & ~bit;
+       if (value)
+               val |= bit;
+       writel(val, regs);
+
+       sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+       spin_unlock_irqrestore(&smgpio->lock, save);
+}
+
+static int sm501_gpio_input(struct gpio_chip *chip, unsigned offset)
+{
+       struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
+       struct sm501_gpio *smgpio = smchip->ourgpio;
+       void __iomem *regs = smchip->regbase;
+       unsigned long bit = 1 << offset;
+       unsigned long save;
+       unsigned long ddr;
+
+       dev_info(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d)\n",
+                __func__, chip, offset);
+
+       spin_lock_irqsave(&smgpio->lock, save);
+
+       ddr = readl(regs + SM501_GPIO_DDR_LOW);
+       writel(ddr & ~bit, regs + SM501_GPIO_DDR_LOW);
+
+       sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+       spin_unlock_irqrestore(&smgpio->lock, save);
+
+       return 0;
+}
+
+static int sm501_gpio_output(struct gpio_chip *chip,
+                            unsigned offset, int value)
+{
+       struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
+       struct sm501_gpio *smgpio = smchip->ourgpio;
+       unsigned long bit = 1 << offset;
+       void __iomem *regs = smchip->regbase;
+       unsigned long save;
+       unsigned long val;
+       unsigned long ddr;
+
+       dev_dbg(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d,%d)\n",
+               __func__, chip, offset, value);
+
+       spin_lock_irqsave(&smgpio->lock, save);
+
+       val = readl(regs + SM501_GPIO_DATA_LOW);
+       if (value)
+               val |= bit;
+       else
+               val &= ~bit;
+       writel(val, regs);
+
+       ddr = readl(regs + SM501_GPIO_DDR_LOW);
+       writel(ddr | bit, regs + SM501_GPIO_DDR_LOW);
+
+       sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+       writel(val, regs + SM501_GPIO_DATA_LOW);
+
+       sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+       spin_unlock_irqrestore(&smgpio->lock, save);
+
+       return 0;
+}
+
+static struct gpio_chip gpio_chip_template = {
+       .ngpio                  = 32,
+       .direction_input        = sm501_gpio_input,
+       .direction_output       = sm501_gpio_output,
+       .set                    = sm501_gpio_set,
+       .get                    = sm501_gpio_get,
+};
+
+static int __devinit sm501_gpio_register_chip(struct sm501_devdata *sm,
+                                             struct sm501_gpio *gpio,
+                                             struct sm501_gpio_chip *chip)
+{
+       struct sm501_platdata *pdata = sm->platdata;
+       struct gpio_chip *gchip = &chip->gpio;
+       int base = pdata->gpio_base;
+
+       chip->gpio = gpio_chip_template;
+
+       if (chip == &gpio->high) {
+               if (base > 0)
+                       base += 32;
+               chip->regbase = gpio->regs + SM501_GPIO_DATA_HIGH;
+               gchip->label  = "SM501-HIGH";
+       } else {
+               chip->regbase = gpio->regs + SM501_GPIO_DATA_LOW;
+               gchip->label  = "SM501-LOW";
+       }
+
+       gchip->base   = base;
+       chip->ourgpio = gpio;
+
+       return gpiochip_add(gchip);
+}
+
+static int sm501_register_gpio(struct sm501_devdata *sm)
+{
+       struct sm501_gpio *gpio = &sm->gpio;
+       resource_size_t iobase = sm->io_res->start + SM501_GPIO;
+       int ret;
+       int tmp;
+
+       dev_dbg(sm->dev, "registering gpio block %08llx\n",
+               (unsigned long long)iobase);
+
+       spin_lock_init(&gpio->lock);
+
+       gpio->regs_res = request_mem_region(iobase, 0x20, "sm501-gpio");
+       if (gpio->regs_res == NULL) {
+               dev_err(sm->dev, "gpio: failed to request region\n");
+               return -ENXIO;
+       }
+
+       gpio->regs = ioremap(iobase, 0x20);
+       if (gpio->regs == NULL) {
+               dev_err(sm->dev, "gpio: failed to remap registers\n");
+               ret = -ENXIO;
+               goto err_claimed;
+       }
+
+       /* Register both our chips. */
+
+       ret = sm501_gpio_register_chip(sm, gpio, &gpio->low);
+       if (ret) {
+               dev_err(sm->dev, "failed to add low chip\n");
+               goto err_mapped;
+       }
+
+       ret = sm501_gpio_register_chip(sm, gpio, &gpio->high);
+       if (ret) {
+               dev_err(sm->dev, "failed to add high chip\n");
+               goto err_low_chip;
+       }
+
+       gpio->registered = 1;
+
+       return 0;
+
+ err_low_chip:
+       tmp = gpiochip_remove(&gpio->low.gpio);
+       if (tmp) {
+               dev_err(sm->dev, "cannot remove low chip, cannot tidy up\n");
+               return ret;
+       }
+
+ err_mapped:
+       iounmap(gpio->regs);
+
+ err_claimed:
+       release_resource(gpio->regs_res);
+       kfree(gpio->regs_res);
+
+       return ret;
+}
+
+static void sm501_gpio_remove(struct sm501_devdata *sm)
+{
+       struct sm501_gpio *gpio = &sm->gpio;
+       int ret;
+
+       if (!sm->gpio.registered)
+               return;
+
+       ret = gpiochip_remove(&gpio->low.gpio);
+       if (ret)
+               dev_err(sm->dev, "cannot remove low chip, cannot tidy up\n");
+
+       ret = gpiochip_remove(&gpio->high.gpio);
+       if (ret)
+               dev_err(sm->dev, "cannot remove high chip, cannot tidy up\n");
+
+       iounmap(gpio->regs);
+       release_resource(gpio->regs_res);
+       kfree(gpio->regs_res);
+}
+
+static inline int sm501_gpio_pin2nr(struct sm501_devdata *sm, unsigned int pin)
+{
+       struct sm501_gpio *gpio = &sm->gpio;
+       int base = (pin < 32) ? gpio->low.gpio.base : gpio->high.gpio.base;
+
+       return (pin % 32) + base;
+}
+
+static inline int sm501_gpio_isregistered(struct sm501_devdata *sm)
+{
+       return sm->gpio.registered;
+}
+#else
+static inline int sm501_register_gpio(struct sm501_devdata *sm)
+{
+       return 0;
+}
+
+static inline void sm501_gpio_remove(struct sm501_devdata *sm)
+{
+}
+
+static inline int sm501_gpio_pin2nr(struct sm501_devdata *sm, unsigned int pin)
+{
+       return -1;
+}
+
+static inline int sm501_gpio_isregistered(struct sm501_devdata *sm)
+{
+       return 0;
+}
+#endif
+
+static int sm501_register_gpio_i2c_instance(struct sm501_devdata *sm,
+                                           struct sm501_platdata_gpio_i2c *iic)
+{
+       struct i2c_gpio_platform_data *icd;
+       struct platform_device *pdev;
+
+       pdev = sm501_create_subdev(sm, "i2c-gpio", 0,
+                                  sizeof(struct i2c_gpio_platform_data));
+       if (!pdev)
+               return -ENOMEM;
+
+       icd = pdev->dev.platform_data;
+
+       /* We keep the pin_sda and pin_scl fields relative in case the
+        * same platform data is passed to >1 SM501.
+        */
+
+       icd->sda_pin = sm501_gpio_pin2nr(sm, iic->pin_sda);
+       icd->scl_pin = sm501_gpio_pin2nr(sm, iic->pin_scl);
+       icd->timeout = iic->timeout;
+       icd->udelay = iic->udelay;
+
+       /* note, we can't use either of the pin numbers, as the i2c-gpio
+        * driver uses the platform.id field to generate the bus number
+        * to register with the i2c core; The i2c core doesn't have enough
+        * entries to deal with anything we currently use.
+       */
+
+       pdev->id = iic->bus_num;
+
+       dev_info(sm->dev, "registering i2c-%d: sda=%d (%d), scl=%d (%d)\n",
+                iic->bus_num,
+                icd->sda_pin, iic->pin_sda, icd->scl_pin, iic->pin_scl);
+
+       return sm501_register_device(sm, pdev);
+}
+
+static int sm501_register_gpio_i2c(struct sm501_devdata *sm,
+                                  struct sm501_platdata *pdata)
+{
+       struct sm501_platdata_gpio_i2c *iic = pdata->gpio_i2c;
+       int index;
+       int ret;
+
+       for (index = 0; index < pdata->gpio_i2c_nr; index++, iic++) {
+               ret = sm501_register_gpio_i2c_instance(sm, iic);
+               if (ret < 0)
+                       return ret;
+       }
+
+       return 0;
+}
+
  /* sm501_dbg_regs
   *
   * Debug attribute to attach to parent device to show core registers
@@ -1013,6 +1298,7 @@ static unsigned int sm501_mem_local[] = {
  static int sm501_init_dev(struct sm501_devdata *sm)
  {
         struct sm501_initdata *idata;
+       struct sm501_platdata *pdata;
         resource_size_t mem_avail;
         unsigned long dramctrl;
         unsigned long devid;
@@ -1051,7 +1337,9 @@ static int sm501_init_dev(struct sm501_devdata *sm)
  
         /* check to see if we have some device initialisation */
  
-       idata = sm->platdata ? sm->platdata->init : NULL;
+       pdata = sm->platdata;
+       idata = pdata ? pdata->init : NULL;
+
         if (idata) {
                 sm501_init_regs(sm, idata);
  
@@ -1059,6 +1347,15 @@ static int sm501_init_dev(struct sm501_devdata *sm)
                         sm501_register_usbhost(sm, &mem_avail);
                 if (idata->devices & (SM501_USE_UART0 | SM501_USE_UART1))
                         sm501_register_uart(sm, idata->devices);
+               if (idata->devices & SM501_USE_GPIO)
+                       sm501_register_gpio(sm);
+       }
+
+       if (pdata->gpio_i2c != NULL && pdata->gpio_i2c_nr > 0) {
+               if (!sm501_gpio_isregistered(sm))
+                       dev_err(sm->dev, "no gpio available for i2c gpio.\n");
+               else
+                       sm501_register_gpio_i2c(sm, pdata);
         }
  
         ret = sm501_check_clocks(sm);
@@ -1138,8 +1435,31 @@ static int sm501_plat_probe(struct platform_device *dev)
  }
  
  #ifdef CONFIG_PM
+
  /* power management support */
  
+static void sm501_set_power(struct sm501_devdata *sm, int on)
+{
+       struct sm501_platdata *pd = sm->platdata;
+
+       if (pd == NULL)
+               return;
+
+       if (pd->get_power) {
+               if (pd->get_power(sm->dev) == on) {
+                       dev_dbg(sm->dev, "is already %d\n", on);
+                       return;
+               }
+       }
+
+       if (pd->set_power) {
+               dev_dbg(sm->dev, "setting power to %d\n", on);
+
+               pd->set_power(sm->dev, on);
+               sm501_mdelay(sm, 10);
+       }
+}
+
  static int sm501_plat_suspend(struct platform_device *pdev, pm_message_t state)
  {
         struct sm501_devdata *sm = platform_get_drvdata(pdev);
@@ -1148,6 +1468,12 @@ static int sm501_plat_suspend(struct platform_device *pdev, pm_message_t state)
         sm->pm_misc = readl(sm->regs + SM501_MISC_CONTROL);
  
         sm501_dump_regs(sm);
+
+       if (sm->platdata) {
+               if (sm->platdata->flags & SM501_FLAG_SUSPEND_OFF)
+                       sm501_set_power(sm, 0);
+       }
+
         return 0;
  }
  
@@ -1155,6 +1481,8 @@ static int sm501_plat_resume(struct platform_device *pdev)
  {
         struct sm501_devdata *sm = platform_get_drvdata(pdev);
  
+       sm501_set_power(sm, 1);
+
         sm501_dump_regs(sm);
         sm501_dump_gate(sm);
         sm501_dump_clk(sm);
@@ -1229,6 +1557,7 @@ static struct sm501_platdata_fb sm501_fb_pdata = {
  static struct sm501_platdata sm501_pci_platdata = {
         .init           = &sm501_pci_initdata,
         .fb             = &sm501_fb_pdata,
+       .gpio_base      = -1,
  };
  
  static int sm501_pci_probe(struct pci_dev *dev,
@@ -1335,6 +1664,8 @@ static void sm501_dev_remove(struct sm501_devdata *sm)
                 sm501_remove_sub(sm, smdev);
  
         device_remove_file(sm->dev, &dev_attr_dbg_regs);
+
+       sm501_gpio_remove(sm);
  }
  
  static void sm501_pci_remove(struct pci_dev *dev)
@@ -1378,6 +1709,8 @@ static struct pci_driver sm501_pci_drv = {
         .remove         = sm501_pci_remove,
  };
  
+MODULE_ALIAS("platform:sm501");
+
  static struct platform_driver sm501_plat_drv = {
         .driver         = {
                 .name   = "sm501",
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig

index d5bc288b1b0d19b1e72d6d15ef5ff00306eef31a..321eb913463504ed9e7699bdbc17f4b0fa6d297a 100644 (file)
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -77,11 +77,13 @@ config IBM_ASM
           for your IBM server.
  
  config PHANTOM
-       tristate "Sensable PHANToM"
+       tristate "Sensable PHANToM (PCI)"
         depends on PCI
         help
           Say Y here if you want to build a driver for Sensable PHANToM device.
  
+         This driver is only for PCI PHANToMs.
+
           If you choose to build module, its name will be phantom. If unsure,
           say N here.
  
@@ -212,6 +214,18 @@ config TC1100_WMI
           This is a driver for the WMI extensions (wireless and bluetooth power
           control) of the HP Compaq TC1100 tablet.
  
+config HP_WMI
+       tristate "HP WMI extras"
+       depends on ACPI_WMI
+       depends on INPUT
+       depends on RFKILL
+       help
+         Say Y here if you want to support WMI-based hotkeys on HP laptops and
+        to read data from WMI such as docking or ambient light sensor state.
+
+         To compile this driver as a module, choose M here: the module will
+         be called hp-wmi.
+
  config MSI_LAPTOP
          tristate "MSI Laptop Extras"
          depends on X86
@@ -424,6 +438,7 @@ config SGI_XP
  
  config HP_ILO
         tristate "Channel interface driver for HP iLO/iLO2 processor"
+       depends on PCI
         default n
         help
           The channel interface driver allows applications to communicate
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile

index 688fe76135e0eca608a8936edc324d37f91e4dea..f5e273420c090859847018861654d046235aa4de 100644 (file)
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ACER_WMI)                += acer-wmi.o
  obj-$(CONFIG_ATMEL_PWM)                += atmel_pwm.o
  obj-$(CONFIG_ATMEL_SSC)                += atmel-ssc.o
  obj-$(CONFIG_ATMEL_TCLIB)      += atmel_tclib.o
+obj-$(CONFIG_HP_WMI)           += hp-wmi.o
  obj-$(CONFIG_TC1100_WMI)       += tc1100-wmi.o
  obj-$(CONFIG_LKDTM)            += lkdtm.o
  obj-$(CONFIG_TIFM_CORE)        += tifm_core.o
diff --git a/drivers/misc/hp-wmi.c b/drivers/misc/hp-wmi.c

new file mode 100644 (file)

index 0000000..1dbcbcb
--- /dev/null
+++ b/drivers/misc/hp-wmi.c
@@ -0,0 +1,494 @@
+/*
+ * HP WMI hotkeys
+ *
+ * Copyright (C) 2008 Red Hat <mjg@redhat.com>
+ *
+ * Portions based on wistron_btns.c:
+ * Copyright (C) 2005 Miloslav Trmac <mitr@volny.cz>
+ * Copyright (C) 2005 Bernhard Rosenkraenzer <bero@arklinux.org>
+ * Copyright (C) 2005 Dmitry Torokhov <dtor@mail.ru>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/input.h>
+#include <acpi/acpi_drivers.h>
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+#include <linux/rfkill.h>
+#include <linux/string.h>
+
+MODULE_AUTHOR("Matthew Garrett <mjg59@srcf.ucam.org>");
+MODULE_DESCRIPTION("HP laptop WMI hotkeys driver");
+MODULE_LICENSE("GPL");
+
+MODULE_ALIAS("wmi:95F24279-4D7B-4334-9387-ACCDC67EF61C");
+MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4");
+
+#define HPWMI_EVENT_GUID "95F24279-4D7B-4334-9387-ACCDC67EF61C"
+#define HPWMI_BIOS_GUID "5FB7F034-2C63-45e9-BE91-3D44E2C707E4"
+
+#define HPWMI_DISPLAY_QUERY 0x1
+#define HPWMI_HDDTEMP_QUERY 0x2
+#define HPWMI_ALS_QUERY 0x3
+#define HPWMI_DOCK_QUERY 0x4
+#define HPWMI_WIRELESS_QUERY 0x5
+
+static int __init hp_wmi_bios_setup(struct platform_device *device);
+static int __exit hp_wmi_bios_remove(struct platform_device *device);
+
+struct bios_args {
+       u32 signature;
+       u32 command;
+       u32 commandtype;
+       u32 datasize;
+       u32 data;
+};
+
+struct bios_return {
+       u32 sigpass;
+       u32 return_code;
+       u32 value;
+};
+
+struct key_entry {
+       char type;              /* See KE_* below */
+       u8 code;
+       u16 keycode;
+};
+
+enum { KE_KEY, KE_SW, KE_END };
+
+static struct key_entry hp_wmi_keymap[] = {
+       {KE_SW, 0x01, SW_DOCK},
+       {KE_KEY, 0x02, KEY_BRIGHTNESSUP},
+       {KE_KEY, 0x03, KEY_BRIGHTNESSDOWN},
+       {KE_KEY, 0x04, KEY_HELP},
+       {KE_END, 0}
+};
+
+static struct input_dev *hp_wmi_input_dev;
+static struct platform_device *hp_wmi_platform_dev;
+
+static struct rfkill *wifi_rfkill;
+static struct rfkill *bluetooth_rfkill;
+static struct rfkill *wwan_rfkill;
+
+static struct platform_driver hp_wmi_driver = {
+       .driver = {
+                  .name = "hp-wmi",
+                  .owner = THIS_MODULE,
+       },
+       .probe = hp_wmi_bios_setup,
+       .remove = hp_wmi_bios_remove,
+};
+
+static int hp_wmi_perform_query(int query, int write, int value)
+{
+       struct bios_return bios_return;
+       acpi_status status;
+       union acpi_object *obj;
+       struct bios_args args = {
+               .signature = 0x55434553,
+               .command = write ? 0x2 : 0x1,
+               .commandtype = query,
+               .datasize = write ? 0x4 : 0,
+               .data = value,
+       };
+       struct acpi_buffer input = { sizeof(struct bios_args), &args };
+       struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
+
+       status = wmi_evaluate_method(HPWMI_BIOS_GUID, 0, 0x3, &input, &output);
+
+       obj = output.pointer;
+
+       if (!obj || obj->type != ACPI_TYPE_BUFFER)
+               return -EINVAL;
+
+       bios_return = *((struct bios_return *)obj->buffer.pointer);
+       if (bios_return.return_code > 0)
+               return bios_return.return_code * -1;
+       else
+               return bios_return.value;
+}
+
+static int hp_wmi_display_state(void)
+{
+       return hp_wmi_perform_query(HPWMI_DISPLAY_QUERY, 0, 0);
+}
+
+static int hp_wmi_hddtemp_state(void)
+{
+       return hp_wmi_perform_query(HPWMI_HDDTEMP_QUERY, 0, 0);
+}
+
+static int hp_wmi_als_state(void)
+{
+       return hp_wmi_perform_query(HPWMI_ALS_QUERY, 0, 0);
+}
+
+static int hp_wmi_dock_state(void)
+{
+       return hp_wmi_perform_query(HPWMI_DOCK_QUERY, 0, 0);
+}
+
+static int hp_wmi_wifi_set(void *data, enum rfkill_state state)
+{
+       if (state)
+               return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x101);
+       else
+               return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x100);
+}
+
+static int hp_wmi_bluetooth_set(void *data, enum rfkill_state state)
+{
+       if (state)
+               return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x202);
+       else
+               return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x200);
+}
+
+static int hp_wmi_wwan_set(void *data, enum rfkill_state state)
+{
+       if (state)
+               return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x404);
+       else
+               return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x400);
+}
+
+static int hp_wmi_wifi_state(void)
+{
+       int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0);
+
+       if (wireless & 0x100)
+               return 1;
+       else
+               return 0;
+}
+
+static int hp_wmi_bluetooth_state(void)
+{
+       int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0);
+
+       if (wireless & 0x10000)
+               return 1;
+       else
+               return 0;
+}
+
+static int hp_wmi_wwan_state(void)
+{
+       int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0);
+
+       if (wireless & 0x1000000)
+               return 1;
+       else
+               return 0;
+}
+
+static ssize_t show_display(struct device *dev, struct device_attribute *attr,
+                           char *buf)
+{
+       int value = hp_wmi_display_state();
+       if (value < 0)
+               return -EINVAL;
+       return sprintf(buf, "%d\n", value);
+}
+
+static ssize_t show_hddtemp(struct device *dev, struct device_attribute *attr,
+                           char *buf)
+{
+       int value = hp_wmi_hddtemp_state();
+       if (value < 0)
+               return -EINVAL;
+       return sprintf(buf, "%d\n", value);
+}
+
+static ssize_t show_als(struct device *dev, struct device_attribute *attr,
+                       char *buf)
+{
+       int value = hp_wmi_als_state();
+       if (value < 0)
+               return -EINVAL;
+       return sprintf(buf, "%d\n", value);
+}
+
+static ssize_t show_dock(struct device *dev, struct device_attribute *attr,
+                        char *buf)
+{
+       int value = hp_wmi_dock_state();
+       if (value < 0)
+               return -EINVAL;
+       return sprintf(buf, "%d\n", value);
+}
+
+static ssize_t set_als(struct device *dev, struct device_attribute *attr,
+                      const char *buf, size_t count)
+{
+       u32 tmp = simple_strtoul(buf, NULL, 10);
+       hp_wmi_perform_query(HPWMI_ALS_QUERY, 1, tmp);
+       return count;
+}
+
+static DEVICE_ATTR(display, S_IRUGO, show_display, NULL);
+static DEVICE_ATTR(hddtemp, S_IRUGO, show_hddtemp, NULL);
+static DEVICE_ATTR(als, S_IRUGO | S_IWUSR, show_als, set_als);
+static DEVICE_ATTR(dock, S_IRUGO, show_dock, NULL);
+
+static struct key_entry *hp_wmi_get_entry_by_scancode(int code)
+{
+       struct key_entry *key;
+
+       for (key = hp_wmi_keymap; key->type != KE_END; key++)
+               if (code == key->code)
+                       return key;
+
+       return NULL;
+}
+
+static struct key_entry *hp_wmi_get_entry_by_keycode(int keycode)
+{
+       struct key_entry *key;
+
+       for (key = hp_wmi_keymap; key->type != KE_END; key++)
+               if (key->type == KE_KEY && keycode == key->keycode)
+                       return key;
+
+       return NULL;
+}
+
+static int hp_wmi_getkeycode(struct input_dev *dev, int scancode, int *keycode)
+{
+       struct key_entry *key = hp_wmi_get_entry_by_scancode(scancode);
+
+       if (key && key->type == KE_KEY) {
+               *keycode = key->keycode;
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+static int hp_wmi_setkeycode(struct input_dev *dev, int scancode, int keycode)
+{
+       struct key_entry *key;
+       int old_keycode;
+
+       if (keycode < 0 || keycode > KEY_MAX)
+               return -EINVAL;
+
+       key = hp_wmi_get_entry_by_scancode(scancode);
+       if (key && key->type == KE_KEY) {
+               old_keycode = key->keycode;
+               key->keycode = keycode;
+               set_bit(keycode, dev->keybit);
+               if (!hp_wmi_get_entry_by_keycode(old_keycode))
+                       clear_bit(old_keycode, dev->keybit);
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+void hp_wmi_notify(u32 value, void *context)
+{
+       struct acpi_buffer response = { ACPI_ALLOCATE_BUFFER, NULL };
+       static struct key_entry *key;
+       union acpi_object *obj;
+
+       wmi_get_event_data(value, &response);
+
+       obj = (union acpi_object *)response.pointer;
+
+       if (obj && obj->type == ACPI_TYPE_BUFFER && obj->buffer.length == 8) {
+               int eventcode = *((u8 *) obj->buffer.pointer);
+               key = hp_wmi_get_entry_by_scancode(eventcode);
+               if (key) {
+                       switch (key->type) {
+                       case KE_KEY:
+                               input_report_key(hp_wmi_input_dev,
+                                                key->keycode, 1);
+                               input_sync(hp_wmi_input_dev);
+                               input_report_key(hp_wmi_input_dev,
+                                                key->keycode, 0);
+                               input_sync(hp_wmi_input_dev);
+                               break;
+                       case KE_SW:
+                               input_report_switch(hp_wmi_input_dev,
+                                                   key->keycode,
+                                                   hp_wmi_dock_state());
+                               input_sync(hp_wmi_input_dev);
+                               break;
+                       }
+               } else if (eventcode == 0x5) {
+                       if (wifi_rfkill)
+                               wifi_rfkill->state = hp_wmi_wifi_state();
+                       if (bluetooth_rfkill)
+                               bluetooth_rfkill->state =
+                                   hp_wmi_bluetooth_state();
+                       if (wwan_rfkill)
+                               wwan_rfkill->state = hp_wmi_wwan_state();
+               } else
+                       printk(KERN_INFO "HP WMI: Unknown key pressed - %x\n",
+                              eventcode);
+       } else
+               printk(KERN_INFO "HP WMI: Unknown response received\n");
+}
+
+static int __init hp_wmi_input_setup(void)
+{
+       struct key_entry *key;
+       int err;
+
+       hp_wmi_input_dev = input_allocate_device();
+
+       hp_wmi_input_dev->name = "HP WMI hotkeys";
+       hp_wmi_input_dev->phys = "wmi/input0";
+       hp_wmi_input_dev->id.bustype = BUS_HOST;
+       hp_wmi_input_dev->getkeycode = hp_wmi_getkeycode;
+       hp_wmi_input_dev->setkeycode = hp_wmi_setkeycode;
+
+       for (key = hp_wmi_keymap; key->type != KE_END; key++) {
+               switch (key->type) {
+               case KE_KEY:
+                       set_bit(EV_KEY, hp_wmi_input_dev->evbit);
+                       set_bit(key->keycode, hp_wmi_input_dev->keybit);
+                       break;
+               case KE_SW:
+                       set_bit(EV_SW, hp_wmi_input_dev->evbit);
+                       set_bit(key->keycode, hp_wmi_input_dev->swbit);
+                       break;
+               }
+       }
+
+       err = input_register_device(hp_wmi_input_dev);
+
+       if (err) {
+               input_free_device(hp_wmi_input_dev);
+               return err;
+       }
+
+       return 0;
+}
+
+static void cleanup_sysfs(struct platform_device *device)
+{
+       device_remove_file(&device->dev, &dev_attr_display);
+       device_remove_file(&device->dev, &dev_attr_hddtemp);
+       device_remove_file(&device->dev, &dev_attr_als);
+       device_remove_file(&device->dev, &dev_attr_dock);
+}
+
+static int __init hp_wmi_bios_setup(struct platform_device *device)
+{
+       int err;
+
+       err = device_create_file(&device->dev, &dev_attr_display);
+       if (err)
+               goto add_sysfs_error;
+       err = device_create_file(&device->dev, &dev_attr_hddtemp);
+       if (err)
+               goto add_sysfs_error;
+       err = device_create_file(&device->dev, &dev_attr_als);
+       if (err)
+               goto add_sysfs_error;
+       err = device_create_file(&device->dev, &dev_attr_dock);
+       if (err)
+               goto add_sysfs_error;
+
+       wifi_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WLAN);
+       wifi_rfkill->name = "hp-wifi";
+       wifi_rfkill->state = hp_wmi_wifi_state();
+       wifi_rfkill->toggle_radio = hp_wmi_wifi_set;
+       wifi_rfkill->user_claim_unsupported = 1;
+
+       bluetooth_rfkill = rfkill_allocate(&device->dev,
+                                          RFKILL_TYPE_BLUETOOTH);
+       bluetooth_rfkill->name = "hp-bluetooth";
+       bluetooth_rfkill->state = hp_wmi_bluetooth_state();
+       bluetooth_rfkill->toggle_radio = hp_wmi_bluetooth_set;
+       bluetooth_rfkill->user_claim_unsupported = 1;
+
+       wwan_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WIMAX);
+       wwan_rfkill->name = "hp-wwan";
+       wwan_rfkill->state = hp_wmi_wwan_state();
+       wwan_rfkill->toggle_radio = hp_wmi_wwan_set;
+       wwan_rfkill->user_claim_unsupported = 1;
+
+       rfkill_register(wifi_rfkill);
+       rfkill_register(bluetooth_rfkill);
+       rfkill_register(wwan_rfkill);
+
+       return 0;
+add_sysfs_error:
+       cleanup_sysfs(device);
+       return err;
+}
+
+static int __exit hp_wmi_bios_remove(struct platform_device *device)
+{
+       cleanup_sysfs(device);
+
+       rfkill_unregister(wifi_rfkill);
+       rfkill_unregister(bluetooth_rfkill);
+       rfkill_unregister(wwan_rfkill);
+
+       return 0;
+}
+
+static int __init hp_wmi_init(void)
+{
+       int err;
+
+       if (wmi_has_guid(HPWMI_EVENT_GUID)) {
+               err = wmi_install_notify_handler(HPWMI_EVENT_GUID,
+                                                hp_wmi_notify, NULL);
+               if (!err)
+                       hp_wmi_input_setup();
+       }
+
+       if (wmi_has_guid(HPWMI_BIOS_GUID)) {
+               err = platform_driver_register(&hp_wmi_driver);
+               if (err)
+                       return 0;
+               hp_wmi_platform_dev = platform_device_alloc("hp-wmi", -1);
+               if (!hp_wmi_platform_dev) {
+                       platform_driver_unregister(&hp_wmi_driver);
+                       return 0;
+               }
+               platform_device_add(hp_wmi_platform_dev);
+       }
+
+       return 0;
+}
+
+static void __exit hp_wmi_exit(void)
+{
+       if (wmi_has_guid(HPWMI_EVENT_GUID)) {
+               wmi_remove_notify_handler(HPWMI_EVENT_GUID);
+               input_unregister_device(hp_wmi_input_dev);
+       }
+       if (hp_wmi_platform_dev) {
+               platform_device_del(hp_wmi_platform_dev);
+               platform_driver_unregister(&hp_wmi_driver);
+       }
+}
+
+module_init(hp_wmi_init);
+module_exit(hp_wmi_exit);
diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c

index 4ce3bdc2f959b4081a27a30dc7f8486ee9611057..daf585689ce33a0e77a07bd3991e2165f8c04acb 100644 (file)
--- a/drivers/misc/phantom.c
+++ b/drivers/misc/phantom.c
@@ -563,6 +563,6 @@ module_init(phantom_init);
  module_exit(phantom_exit);
  
  MODULE_AUTHOR("Jiri Slaby <jirislaby@gmail.com>");
-MODULE_DESCRIPTION("Sensable Phantom driver");
+MODULE_DESCRIPTION("Sensable Phantom driver (PCI devices)");
  MODULE_LICENSE("GPL");
  MODULE_VERSION(PHANTOM_VERSION);
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c

index 961416ac06167350bf689c631a85b0ece7f067ec..c7630a2283109a39d8289cf114f748bd9770cd8f 100644 (file)
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -51,14 +51,13 @@
   * @name: MTD device name or number string
   * @vid_hdr_offs: VID header offset
   */
-struct mtd_dev_param
-{
+struct mtd_dev_param {
         char name[MTD_PARAM_LEN_MAX];
         int vid_hdr_offs;
  };
  
  /* Numbers of elements set in the @mtd_dev_param array */
-static int mtd_devs = 0;
+static int mtd_devs;
  
  /* MTD devices specification parameters */
  static struct mtd_dev_param mtd_dev_param[UBI_MAX_DEVICES];
@@ -160,8 +159,7 @@ void ubi_put_device(struct ubi_device *ubi)
  }
  
  /**
- * ubi_get_by_major - get UBI device description object by character device
- *                    major number.
+ * ubi_get_by_major - get UBI device by character device major number.
   * @major: major number
   *
   * This function is similar to 'ubi_get_device()', but it searches the device
@@ -354,16 +352,35 @@ static void kill_volumes(struct ubi_device *ubi)
                         ubi_free_volume(ubi, ubi->volumes[i]);
  }
  
+/**
+ * free_user_volumes - free all user volumes.
+ * @ubi: UBI device description object
+ *
+ * Normally the volumes are freed at the release function of the volume device
+ * objects. However, on error paths the volumes have to be freed before the
+ * device objects have been initialized.
+ */
+static void free_user_volumes(struct ubi_device *ubi)
+{
+       int i;
+
+       for (i = 0; i < ubi->vtbl_slots; i++)
+               if (ubi->volumes[i]) {
+                       kfree(ubi->volumes[i]->eba_tbl);
+                       kfree(ubi->volumes[i]);
+               }
+}
+
  /**
   * uif_init - initialize user interfaces for an UBI device.
   * @ubi: UBI device description object
   *
   * This function returns zero in case of success and a negative error code in
- * case of failure.
+ * case of failure. Note, this function destroys all volumes if it failes.
   */
  static int uif_init(struct ubi_device *ubi)
  {
-       int i, err;
+       int i, err, do_free = 0;
         dev_t dev;
  
         sprintf(ubi->ubi_name, UBI_NAME_STR "%d", ubi->ubi_num);
@@ -384,7 +401,7 @@ static int uif_init(struct ubi_device *ubi)
  
         ubi_assert(MINOR(dev) == 0);
         cdev_init(&ubi->cdev, &ubi_cdev_operations);
-       dbg_msg("%s major is %u", ubi->ubi_name, MAJOR(dev));
+       dbg_gen("%s major is %u", ubi->ubi_name, MAJOR(dev));
         ubi->cdev.owner = THIS_MODULE;
  
         err = cdev_add(&ubi->cdev, dev, 1);
@@ -410,10 +427,13 @@ static int uif_init(struct ubi_device *ubi)
  
  out_volumes:
         kill_volumes(ubi);
+       do_free = 0;
  out_sysfs:
         ubi_sysfs_close(ubi);
         cdev_del(&ubi->cdev);
  out_unreg:
+       if (do_free)
+               free_user_volumes(ubi);
         unregister_chrdev_region(ubi->cdev.dev, ubi->vtbl_slots + 1);
         ubi_err("cannot initialize UBI %s, error %d", ubi->ubi_name, err);
         return err;
@@ -422,6 +442,10 @@ out_unreg:
  /**
   * uif_close - close user interfaces for an UBI device.
   * @ubi: UBI device description object
+ *
+ * Note, since this function un-registers UBI volume device objects (@vol->dev),
+ * the memory allocated voe the volumes is freed as well (in the release
+ * function).
   */
  static void uif_close(struct ubi_device *ubi)
  {
@@ -431,6 +455,21 @@ static void uif_close(struct ubi_device *ubi)
         unregister_chrdev_region(ubi->cdev.dev, ubi->vtbl_slots + 1);
  }
  
+/**
+ * free_internal_volumes - free internal volumes.
+ * @ubi: UBI device description object
+ */
+static void free_internal_volumes(struct ubi_device *ubi)
+{
+       int i;
+
+       for (i = ubi->vtbl_slots;
+            i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) {
+               kfree(ubi->volumes[i]->eba_tbl);
+               kfree(ubi->volumes[i]);
+       }
+}
+
  /**
   * attach_by_scanning - attach an MTD device using scanning method.
   * @ubi: UBI device descriptor
@@ -475,6 +514,7 @@ static int attach_by_scanning(struct ubi_device *ubi)
  out_wl:
         ubi_wl_close(ubi);
  out_vtbl:
+       free_internal_volumes(ubi);
         vfree(ubi->vtbl);
  out_si:
         ubi_scan_destroy_si(si);
@@ -482,7 +522,7 @@ out_si:
  }
  
  /**
- * io_init - initialize I/O unit for a given UBI device.
+ * io_init - initialize I/O sub-system for a given UBI device.
   * @ubi: UBI device description object
   *
   * If @ubi->vid_hdr_offset or @ubi->leb_start is zero, default offsets are
@@ -530,7 +570,11 @@ static int io_init(struct ubi_device *ubi)
         ubi->min_io_size = ubi->mtd->writesize;
         ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft;
  
-       /* Make sure minimal I/O unit is power of 2 */
+       /*
+        * Make sure minimal I/O unit is power of 2. Note, there is no
+        * fundamental reason for this assumption. It is just an optimization
+        * which allows us to avoid costly division operations.
+        */
         if (!is_power_of_2(ubi->min_io_size)) {
                 ubi_err("min. I/O unit (%d) is not power of 2",
                         ubi->min_io_size);
@@ -581,7 +625,7 @@ static int io_init(struct ubi_device *ubi)
         if (ubi->vid_hdr_offset < UBI_EC_HDR_SIZE ||
             ubi->leb_start < ubi->vid_hdr_offset + UBI_VID_HDR_SIZE ||
             ubi->leb_start > ubi->peb_size - UBI_VID_HDR_SIZE ||
-           ubi->leb_start % ubi->min_io_size) {
+           ubi->leb_start & (ubi->min_io_size - 1)) {
                 ubi_err("bad VID header (%d) or data offsets (%d)",
                         ubi->vid_hdr_offset, ubi->leb_start);
                 return -EINVAL;
@@ -646,7 +690,7 @@ static int autoresize(struct ubi_device *ubi, int vol_id)
  
         /*
          * Clear the auto-resize flag in the volume in-memory copy of the
-        * volume table, and 'ubi_resize_volume()' will propogate this change
+        * volume table, and 'ubi_resize_volume()' will propagate this change
          * to the flash.
          */
         ubi->vtbl[vol_id].flags &= ~UBI_VTBL_AUTORESIZE_FLG;
@@ -655,7 +699,7 @@ static int autoresize(struct ubi_device *ubi, int vol_id)
                 struct ubi_vtbl_record vtbl_rec;
  
                 /*
-                * No avalilable PEBs to re-size the volume, clear the flag on
+                * No available PEBs to re-size the volume, clear the flag on
                  * flash and exit.
                  */
                 memcpy(&vtbl_rec, &ubi->vtbl[vol_id],
@@ -682,13 +726,13 @@ static int autoresize(struct ubi_device *ubi, int vol_id)
  
  /**
   * ubi_attach_mtd_dev - attach an MTD device.
- * @mtd_dev: MTD device description object
+ * @mtd: MTD device description object
   * @ubi_num: number to assign to the new UBI device
   * @vid_hdr_offset: VID header offset
   *
   * This function attaches MTD device @mtd_dev to UBI and assign @ubi_num number
   * to the newly created UBI device, unless @ubi_num is %UBI_DEV_NUM_AUTO, in
- * which case this function finds a vacant device nubert and assings it
+ * which case this function finds a vacant device number and assigns it
   * automatically. Returns the new UBI device number in case of success and a
   * negative error code in case of failure.
   *
@@ -698,7 +742,7 @@ static int autoresize(struct ubi_device *ubi, int vol_id)
  int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
  {
         struct ubi_device *ubi;
-       int i, err;
+       int i, err, do_free = 1;
  
         /*
          * Check if we already have the same MTD device attached.
@@ -735,7 +779,8 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
                         if (!ubi_devices[ubi_num])
                                 break;
                 if (ubi_num == UBI_MAX_DEVICES) {
-                       dbg_err("only %d UBI devices may be created", UBI_MAX_DEVICES);
+                       dbg_err("only %d UBI devices may be created",
+                               UBI_MAX_DEVICES);
                         return -ENFILE;
                 }
         } else {
@@ -760,6 +805,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
  
         mutex_init(&ubi->buf_mutex);
         mutex_init(&ubi->ckvol_mutex);
+       mutex_init(&ubi->mult_mutex);
         mutex_init(&ubi->volumes_mutex);
         spin_lock_init(&ubi->volumes_lock);
  
@@ -798,7 +844,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
  
         err = uif_init(ubi);
         if (err)
-               goto out_detach;
+               goto out_nofree;
  
         ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name);
         if (IS_ERR(ubi->bgt_thread)) {
@@ -824,20 +870,22 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
                 ubi->beb_rsvd_pebs);
         ubi_msg("max/mean erase counter: %d/%d", ubi->max_ec, ubi->mean_ec);
  
-       /* Enable the background thread */
-       if (!DBG_DISABLE_BGT) {
+       if (!DBG_DISABLE_BGT)
                 ubi->thread_enabled = 1;
-               wake_up_process(ubi->bgt_thread);
-       }
+       wake_up_process(ubi->bgt_thread);
  
         ubi_devices[ubi_num] = ubi;
         return ubi_num;
  
  out_uif:
         uif_close(ubi);
+out_nofree:
+       do_free = 0;
  out_detach:
-       ubi_eba_close(ubi);
         ubi_wl_close(ubi);
+       if (do_free)
+               free_user_volumes(ubi);
+       free_internal_volumes(ubi);
         vfree(ubi->vtbl);
  out_free:
         vfree(ubi->peb_buf1);
@@ -899,8 +947,8 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway)
                 kthread_stop(ubi->bgt_thread);
  
         uif_close(ubi);
-       ubi_eba_close(ubi);
         ubi_wl_close(ubi);
+       free_internal_volumes(ubi);
         vfree(ubi->vtbl);
         put_mtd_device(ubi->mtd);
         vfree(ubi->peb_buf1);
@@ -1044,8 +1092,7 @@ static void __exit ubi_exit(void)
  module_exit(ubi_exit);
  
  /**
- * bytes_str_to_int - convert a string representing number of bytes to an
- * integer.
+ * bytes_str_to_int - convert a number of bytes string into an integer.
   * @str: the string to convert
   *
   * This function returns positive resulting integer in case of success and a
diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c

index 89193ba9451e78e771740724fbb98ff04135aeb7..03c759b4eeb5e852b192794f242020bc45792c22 100644 (file)
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -39,9 +39,9 @@
  #include <linux/stat.h>
  #include <linux/ioctl.h>
  #include <linux/capability.h>
+#include <linux/uaccess.h>
  #include <linux/smp_lock.h>
  #include <mtd/ubi-user.h>
-#include <asm/uaccess.h>
  #include <asm/div64.h>
  #include "ubi.h"
  
@@ -116,7 +116,7 @@ static int vol_cdev_open(struct inode *inode, struct file *file)
         else
                 mode = UBI_READONLY;
  
-       dbg_msg("open volume %d, mode %d", vol_id, mode);
+       dbg_gen("open volume %d, mode %d", vol_id, mode);
  
         desc = ubi_open_volume(ubi_num, vol_id, mode);
         unlock_kernel();
@@ -132,7 +132,7 @@ static int vol_cdev_release(struct inode *inode, struct file *file)
         struct ubi_volume_desc *desc = file->private_data;
         struct ubi_volume *vol = desc->vol;
  
-       dbg_msg("release volume %d, mode %d", vol->vol_id, desc->mode);
+       dbg_gen("release volume %d, mode %d", vol->vol_id, desc->mode);
  
         if (vol->updating) {
                 ubi_warn("update of volume %d not finished, volume is damaged",
@@ -141,7 +141,7 @@ static int vol_cdev_release(struct inode *inode, struct file *file)
                 vol->updating = 0;
                 vfree(vol->upd_buf);
         } else if (vol->changing_leb) {
-               dbg_msg("only %lld of %lld bytes received for atomic LEB change"
+               dbg_gen("only %lld of %lld bytes received for atomic LEB change"
                         " for volume %d:%d, cancel", vol->upd_received,
                         vol->upd_bytes, vol->ubi->ubi_num, vol->vol_id);
                 vol->changing_leb = 0;
@@ -183,7 +183,7 @@ static loff_t vol_cdev_llseek(struct file *file, loff_t offset, int origin)
                 return -EINVAL;
         }
  
-       dbg_msg("seek volume %d, offset %lld, origin %d, new offset %lld",
+       dbg_gen("seek volume %d, offset %lld, origin %d, new offset %lld",
                 vol->vol_id, offset, origin, new_offset);
  
         file->f_pos = new_offset;
@@ -201,7 +201,7 @@ static ssize_t vol_cdev_read(struct file *file, __user char *buf, size_t count,
         void *tbuf;
         uint64_t tmp;
  
-       dbg_msg("read %zd bytes from offset %lld of volume %d",
+       dbg_gen("read %zd bytes from offset %lld of volume %d",
                 count, *offp, vol->vol_id);
  
         if (vol->updating) {
@@ -216,7 +216,7 @@ static ssize_t vol_cdev_read(struct file *file, __user char *buf, size_t count,
                 return 0;
  
         if (vol->corrupted)
-               dbg_msg("read from corrupted volume %d", vol->vol_id);
+               dbg_gen("read from corrupted volume %d", vol->vol_id);
  
         if (*offp + count > vol->used_bytes)
                 count_save = count = vol->used_bytes - *offp;
@@ -285,7 +285,7 @@ static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf,
         char *tbuf;
         uint64_t tmp;
  
-       dbg_msg("requested: write %zd bytes to offset %lld of volume %u",
+       dbg_gen("requested: write %zd bytes to offset %lld of volume %u",
                 count, *offp, vol->vol_id);
  
         if (vol->vol_type == UBI_STATIC_VOLUME)
@@ -295,7 +295,7 @@ static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf,
         off = do_div(tmp, vol->usable_leb_size);
         lnum = tmp;
  
-       if (off % ubi->min_io_size) {
+       if (off & (ubi->min_io_size - 1)) {
                 dbg_err("unaligned position");
                 return -EINVAL;
         }
@@ -304,7 +304,7 @@ static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf,
                 count_save = count = vol->used_bytes - *offp;
  
         /* We can write only in fractions of the minimum I/O unit */
-       if (count % ubi->min_io_size) {
+       if (count & (ubi->min_io_size - 1)) {
                 dbg_err("unaligned write length");
                 return -EINVAL;
         }
@@ -352,7 +352,7 @@ static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf,
  }
  
  #else
-#define vol_cdev_direct_write(file, buf, count, offp) -EPERM
+#define vol_cdev_direct_write(file, buf, count, offp) (-EPERM)
  #endif /* CONFIG_MTD_UBI_DEBUG_USERSPACE_IO */
  
  static ssize_t vol_cdev_write(struct file *file, const char __user *buf,
@@ -437,7 +437,8 @@ static int vol_cdev_ioctl(struct inode *inode, struct file *file,
                         break;
                 }
  
-               rsvd_bytes = vol->reserved_pebs * (ubi->leb_size-vol->data_pad);
+               rsvd_bytes = (long long)vol->reserved_pebs *
+                                       ubi->leb_size-vol->data_pad;
                 if (bytes < 0 || bytes > rsvd_bytes) {
                         err = -EINVAL;
                         break;
@@ -513,7 +514,7 @@ static int vol_cdev_ioctl(struct inode *inode, struct file *file,
                         break;
                 }
  
-               dbg_msg("erase LEB %d:%d", vol->vol_id, lnum);
+               dbg_gen("erase LEB %d:%d", vol->vol_id, lnum);
                 err = ubi_eba_unmap_leb(ubi, vol, lnum);
                 if (err)
                         break;
@@ -564,7 +565,7 @@ static int verify_mkvol_req(const struct ubi_device *ubi,
         if (req->alignment > ubi->leb_size)
                 goto bad;
  
-       n = req->alignment % ubi->min_io_size;
+       n = req->alignment & (ubi->min_io_size - 1);
         if (req->alignment != 1 && n)
                 goto bad;
  
@@ -573,6 +574,10 @@ static int verify_mkvol_req(const struct ubi_device *ubi,
                 goto bad;
         }
  
+       n = strnlen(req->name, req->name_len + 1);
+       if (n != req->name_len)
+               goto bad;
+
         return 0;
  
  bad:
@@ -600,6 +605,166 @@ static int verify_rsvol_req(const struct ubi_device *ubi,
         return 0;
  }
  
+/**
+ * rename_volumes - rename UBI volumes.
+ * @ubi: UBI device description object
+ * @req: volumes re-name request
+ *
+ * This is a helper function for the volume re-name IOCTL which validates the
+ * the request, opens the volume and calls corresponding volumes management
+ * function. Returns zero in case of success and a negative error code in case
+ * of failure.
+ */
+static int rename_volumes(struct ubi_device *ubi,
+                         struct ubi_rnvol_req *req)
+{
+       int i, n, err;
+       struct list_head rename_list;
+       struct ubi_rename_entry *re, *re1;
+
+       if (req->count < 0 || req->count > UBI_MAX_RNVOL)
+               return -EINVAL;
+
+       if (req->count == 0)
+               return 0;
+
+       /* Validate volume IDs and names in the request */
+       for (i = 0; i < req->count; i++) {
+               if (req->ents[i].vol_id < 0 ||
+                   req->ents[i].vol_id >= ubi->vtbl_slots)
+                       return -EINVAL;
+               if (req->ents[i].name_len < 0)
+                       return -EINVAL;
+               if (req->ents[i].name_len > UBI_VOL_NAME_MAX)
+                       return -ENAMETOOLONG;
+               req->ents[i].name[req->ents[i].name_len] = '\0';
+               n = strlen(req->ents[i].name);
+               if (n != req->ents[i].name_len)
+                       err = -EINVAL;
+       }
+
+       /* Make sure volume IDs and names are unique */
+       for (i = 0; i < req->count - 1; i++) {
+               for (n = i + 1; n < req->count; n++) {
+                       if (req->ents[i].vol_id == req->ents[n].vol_id) {
+                               dbg_err("duplicated volume id %d",
+                                       req->ents[i].vol_id);
+                               return -EINVAL;
+                       }
+                       if (!strcmp(req->ents[i].name, req->ents[n].name)) {
+                               dbg_err("duplicated volume name \"%s\"",
+                                       req->ents[i].name);
+                               return -EINVAL;
+                       }
+               }
+       }
+
+       /* Create the re-name list */
+       INIT_LIST_HEAD(&rename_list);
+       for (i = 0; i < req->count; i++) {
+               int vol_id = req->ents[i].vol_id;
+               int name_len = req->ents[i].name_len;
+               const char *name = req->ents[i].name;
+
+               re = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL);
+               if (!re) {
+                       err = -ENOMEM;
+                       goto out_free;
+               }
+
+               re->desc = ubi_open_volume(ubi->ubi_num, vol_id, UBI_EXCLUSIVE);
+               if (IS_ERR(re->desc)) {
+                       err = PTR_ERR(re->desc);
+                       dbg_err("cannot open volume %d, error %d", vol_id, err);
+                       kfree(re);
+                       goto out_free;
+               }
+
+               /* Skip this re-naming if the name does not really change */
+               if (re->desc->vol->name_len == name_len &&
+                   !memcmp(re->desc->vol->name, name, name_len)) {
+                       ubi_close_volume(re->desc);
+                       kfree(re);
+                       continue;
+               }
+
+               re->new_name_len = name_len;
+               memcpy(re->new_name, name, name_len);
+               list_add_tail(&re->list, &rename_list);
+               dbg_msg("will rename volume %d from \"%s\" to \"%s\"",
+                       vol_id, re->desc->vol->name, name);
+       }
+
+       if (list_empty(&rename_list))
+               return 0;
+
+       /* Find out the volumes which have to be removed */
+       list_for_each_entry(re, &rename_list, list) {
+               struct ubi_volume_desc *desc;
+               int no_remove_needed = 0;
+
+               /*
+                * Volume @re->vol_id is going to be re-named to
+                * @re->new_name, while its current name is @name. If a volume
+                * with name @re->new_name currently exists, it has to be
+                * removed, unless it is also re-named in the request (@req).
+                */
+               list_for_each_entry(re1, &rename_list, list) {
+                       if (re->new_name_len == re1->desc->vol->name_len &&
+                           !memcmp(re->new_name, re1->desc->vol->name,
+                                   re1->desc->vol->name_len)) {
+                               no_remove_needed = 1;
+                               break;
+                       }
+               }
+
+               if (no_remove_needed)
+                       continue;
+
+               /*
+                * It seems we need to remove volume with name @re->new_name,
+                * if it exists.
+                */
+               desc = ubi_open_volume_nm(ubi->ubi_num, re->new_name, UBI_EXCLUSIVE);
+               if (IS_ERR(desc)) {
+                       err = PTR_ERR(desc);
+                       if (err == -ENODEV)
+                               /* Re-naming into a non-existing volume name */
+                               continue;
+
+                       /* The volume exists but busy, or an error occurred */
+                       dbg_err("cannot open volume \"%s\", error %d",
+                               re->new_name, err);
+                       goto out_free;
+               }
+
+               re = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL);
+               if (!re) {
+                       err = -ENOMEM;
+                       ubi_close_volume(desc);
+                       goto out_free;
+               }
+
+               re->remove = 1;
+               re->desc = desc;
+               list_add(&re->list, &rename_list);
+               dbg_msg("will remove volume %d, name \"%s\"",
+                       re->desc->vol->vol_id, re->desc->vol->name);
+       }
+
+       mutex_lock(&ubi->volumes_mutex);
+       err = ubi_rename_volumes(ubi, &rename_list);
+       mutex_unlock(&ubi->volumes_mutex);
+
+out_free:
+       list_for_each_entry_safe(re, re1, &rename_list, list) {
+               ubi_close_volume(re->desc);
+               list_del(&re->list);
+               kfree(re);
+       }
+       return err;
+}
+
  static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
                           unsigned int cmd, unsigned long arg)
  {
@@ -621,19 +786,18 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
         {
                 struct ubi_mkvol_req req;
  
-               dbg_msg("create volume");
+               dbg_gen("create volume");
                 err = copy_from_user(&req, argp, sizeof(struct ubi_mkvol_req));
                 if (err) {
                         err = -EFAULT;
                         break;
                 }
  
+               req.name[req.name_len] = '\0';
                 err = verify_mkvol_req(ubi, &req);
                 if (err)
                         break;
  
-               req.name[req.name_len] = '\0';
-
                 mutex_lock(&ubi->volumes_mutex);
                 err = ubi_create_volume(ubi, &req);
                 mutex_unlock(&ubi->volumes_mutex);
@@ -652,7 +816,7 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
         {
                 int vol_id;
  
-               dbg_msg("remove volume");
+               dbg_gen("remove volume");
                 err = get_user(vol_id, (__user int32_t *)argp);
                 if (err) {
                         err = -EFAULT;
@@ -666,7 +830,7 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
                 }
  
                 mutex_lock(&ubi->volumes_mutex);
-               err = ubi_remove_volume(desc);
+               err = ubi_remove_volume(desc, 0);
                 mutex_unlock(&ubi->volumes_mutex);
  
                 /*
@@ -685,7 +849,7 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
                 uint64_t tmp;
                 struct ubi_rsvol_req req;
  
-               dbg_msg("re-size volume");
+               dbg_gen("re-size volume");
                 err = copy_from_user(&req, argp, sizeof(struct ubi_rsvol_req));
                 if (err) {
                         err = -EFAULT;
@@ -713,6 +877,32 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
                 break;
         }
  
+       /* Re-name volumes command */
+       case UBI_IOCRNVOL:
+       {
+               struct ubi_rnvol_req *req;
+
+               dbg_msg("re-name volumes");
+               req = kmalloc(sizeof(struct ubi_rnvol_req), GFP_KERNEL);
+               if (!req) {
+                       err = -ENOMEM;
+                       break;
+               };
+
+               err = copy_from_user(req, argp, sizeof(struct ubi_rnvol_req));
+               if (err) {
+                       err = -EFAULT;
+                       kfree(req);
+                       break;
+               }
+
+               mutex_lock(&ubi->mult_mutex);
+               err = rename_volumes(ubi, req);
+               mutex_unlock(&ubi->mult_mutex);
+               kfree(req);
+               break;
+       }
+
         default:
                 err = -ENOTTY;
                 break;
@@ -738,7 +928,7 @@ static int ctrl_cdev_ioctl(struct inode *inode, struct file *file,
                 struct ubi_attach_req req;
                 struct mtd_info *mtd;
  
-               dbg_msg("attach MTD device");
+               dbg_gen("attach MTD device");
                 err = copy_from_user(&req, argp, sizeof(struct ubi_attach_req));
                 if (err) {
                         err = -EFAULT;
@@ -778,7 +968,7 @@ static int ctrl_cdev_ioctl(struct inode *inode, struct file *file,
         {
                 int ubi_num;
  
-               dbg_msg("dettach MTD device");
+               dbg_gen("dettach MTD device");
                 err = get_user(ubi_num, (__user int32_t *)argp);
                 if (err) {
                         err = -EFAULT;
diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c

index 56956ec2845ff70bf4139f08ba56184487005816..c0ed60e8ade978ca74091aed6a8e5b14f1a47e63 100644 (file)
--- a/drivers/mtd/ubi/debug.c
+++ b/drivers/mtd/ubi/debug.c
@@ -24,7 +24,7 @@
   * changes.
   */
  
-#ifdef CONFIG_MTD_UBI_DEBUG_MSG
+#ifdef CONFIG_MTD_UBI_DEBUG
  
  #include "ubi.h"
  
@@ -34,14 +34,19 @@
   */
  void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr)
  {
-       dbg_msg("erase counter header dump:");
-       dbg_msg("magic          %#08x", be32_to_cpu(ec_hdr->magic));
-       dbg_msg("version        %d",    (int)ec_hdr->version);
-       dbg_msg("ec             %llu",  (long long)be64_to_cpu(ec_hdr->ec));
-       dbg_msg("vid_hdr_offset %d",    be32_to_cpu(ec_hdr->vid_hdr_offset));
-       dbg_msg("data_offset    %d",    be32_to_cpu(ec_hdr->data_offset));
-       dbg_msg("hdr_crc        %#08x", be32_to_cpu(ec_hdr->hdr_crc));
-       dbg_msg("erase counter header hexdump:");
+       printk(KERN_DEBUG "Erase counter header dump:\n");
+       printk(KERN_DEBUG "\tmagic          %#08x\n",
+              be32_to_cpu(ec_hdr->magic));
+       printk(KERN_DEBUG "\tversion        %d\n", (int)ec_hdr->version);
+       printk(KERN_DEBUG "\tec             %llu\n",
+              (long long)be64_to_cpu(ec_hdr->ec));
+       printk(KERN_DEBUG "\tvid_hdr_offset %d\n",
+              be32_to_cpu(ec_hdr->vid_hdr_offset));
+       printk(KERN_DEBUG "\tdata_offset    %d\n",
+              be32_to_cpu(ec_hdr->data_offset));
+       printk(KERN_DEBUG "\thdr_crc        %#08x\n",
+              be32_to_cpu(ec_hdr->hdr_crc));
+       printk(KERN_DEBUG "erase counter header hexdump:\n");
         print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
                        ec_hdr, UBI_EC_HDR_SIZE, 1);
  }
@@ -52,22 +57,23 @@ void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr)
   */
  void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr)
  {
-       dbg_msg("volume identifier header dump:");
-       dbg_msg("magic     %08x", be32_to_cpu(vid_hdr->magic));
-       dbg_msg("version   %d",   (int)vid_hdr->version);
-       dbg_msg("vol_type  %d",   (int)vid_hdr->vol_type);
-       dbg_msg("copy_flag %d",   (int)vid_hdr->copy_flag);
-       dbg_msg("compat    %d",   (int)vid_hdr->compat);
-       dbg_msg("vol_id    %d",   be32_to_cpu(vid_hdr->vol_id));
-       dbg_msg("lnum      %d",   be32_to_cpu(vid_hdr->lnum));
-       dbg_msg("leb_ver   %u",   be32_to_cpu(vid_hdr->leb_ver));
-       dbg_msg("data_size %d",   be32_to_cpu(vid_hdr->data_size));
-       dbg_msg("used_ebs  %d",   be32_to_cpu(vid_hdr->used_ebs));
-       dbg_msg("data_pad  %d",   be32_to_cpu(vid_hdr->data_pad));
-       dbg_msg("sqnum     %llu",
+       printk(KERN_DEBUG "Volume identifier header dump:\n");
+       printk(KERN_DEBUG "\tmagic     %08x\n", be32_to_cpu(vid_hdr->magic));
+       printk(KERN_DEBUG "\tversion   %d\n",   (int)vid_hdr->version);
+       printk(KERN_DEBUG "\tvol_type  %d\n",   (int)vid_hdr->vol_type);
+       printk(KERN_DEBUG "\tcopy_flag %d\n",   (int)vid_hdr->copy_flag);
+       printk(KERN_DEBUG "\tcompat    %d\n",   (int)vid_hdr->compat);
+       printk(KERN_DEBUG "\tvol_id    %d\n",   be32_to_cpu(vid_hdr->vol_id));
+       printk(KERN_DEBUG "\tlnum      %d\n",   be32_to_cpu(vid_hdr->lnum));
+       printk(KERN_DEBUG "\tdata_size %d\n",   be32_to_cpu(vid_hdr->data_size));
+       printk(KERN_DEBUG "\tused_ebs  %d\n",   be32_to_cpu(vid_hdr->used_ebs));
+       printk(KERN_DEBUG "\tdata_pad  %d\n",   be32_to_cpu(vid_hdr->data_pad));
+       printk(KERN_DEBUG "\tsqnum     %llu\n",
                 (unsigned long long)be64_to_cpu(vid_hdr->sqnum));
-       dbg_msg("hdr_crc   %08x", be32_to_cpu(vid_hdr->hdr_crc));
-       dbg_msg("volume identifier header hexdump:");
+       printk(KERN_DEBUG "\thdr_crc   %08x\n", be32_to_cpu(vid_hdr->hdr_crc));
+       printk(KERN_DEBUG "Volume identifier header hexdump:\n");
+       print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
+                      vid_hdr, UBI_VID_HDR_SIZE, 1);
  }
  
  /**
@@ -76,27 +82,27 @@ void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr)
   */
  void ubi_dbg_dump_vol_info(const struct ubi_volume *vol)
  {
-       dbg_msg("volume information dump:");
-       dbg_msg("vol_id          %d", vol->vol_id);
-       dbg_msg("reserved_pebs   %d", vol->reserved_pebs);
-       dbg_msg("alignment       %d", vol->alignment);
-       dbg_msg("data_pad        %d", vol->data_pad);
-       dbg_msg("vol_type        %d", vol->vol_type);
-       dbg_msg("name_len        %d", vol->name_len);
-       dbg_msg("usable_leb_size %d", vol->usable_leb_size);
-       dbg_msg("used_ebs        %d", vol->used_ebs);
-       dbg_msg("used_bytes      %lld", vol->used_bytes);
-       dbg_msg("last_eb_bytes   %d", vol->last_eb_bytes);
-       dbg_msg("corrupted       %d", vol->corrupted);
-       dbg_msg("upd_marker      %d", vol->upd_marker);
+       printk(KERN_DEBUG "Volume information dump:\n");
+       printk(KERN_DEBUG "\tvol_id          %d\n", vol->vol_id);
+       printk(KERN_DEBUG "\treserved_pebs   %d\n", vol->reserved_pebs);
+       printk(KERN_DEBUG "\talignment       %d\n", vol->alignment);
+       printk(KERN_DEBUG "\tdata_pad        %d\n", vol->data_pad);
+       printk(KERN_DEBUG "\tvol_type        %d\n", vol->vol_type);
+       printk(KERN_DEBUG "\tname_len        %d\n", vol->name_len);
+       printk(KERN_DEBUG "\tusable_leb_size %d\n", vol->usable_leb_size);
+       printk(KERN_DEBUG "\tused_ebs        %d\n", vol->used_ebs);
+       printk(KERN_DEBUG "\tused_bytes      %lld\n", vol->used_bytes);
+       printk(KERN_DEBUG "\tlast_eb_bytes   %d\n", vol->last_eb_bytes);
+       printk(KERN_DEBUG "\tcorrupted       %d\n", vol->corrupted);
+       printk(KERN_DEBUG "\tupd_marker      %d\n", vol->upd_marker);
  
         if (vol->name_len <= UBI_VOL_NAME_MAX &&
             strnlen(vol->name, vol->name_len + 1) == vol->name_len) {
-               dbg_msg("name            %s", vol->name);
+               printk(KERN_DEBUG "\tname            %s\n", vol->name);
         } else {
-               dbg_msg("the 1st 5 characters of the name: %c%c%c%c%c",
-                       vol->name[0], vol->name[1], vol->name[2],
-                       vol->name[3], vol->name[4]);
+               printk(KERN_DEBUG "\t1st 5 characters of name: %c%c%c%c%c\n",
+                      vol->name[0], vol->name[1], vol->name[2],
+                      vol->name[3], vol->name[4]);
         }
  }
  
@@ -109,28 +115,29 @@ void ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx)
  {
         int name_len = be16_to_cpu(r->name_len);
  
-       dbg_msg("volume table record %d dump:", idx);
-       dbg_msg("reserved_pebs   %d", be32_to_cpu(r->reserved_pebs));
-       dbg_msg("alignment       %d", be32_to_cpu(r->alignment));
-       dbg_msg("data_pad        %d", be32_to_cpu(r->data_pad));
-       dbg_msg("vol_type        %d", (int)r->vol_type);
-       dbg_msg("upd_marker      %d", (int)r->upd_marker);
-       dbg_msg("name_len        %d", name_len);
+       printk(KERN_DEBUG "Volume table record %d dump:\n", idx);
+       printk(KERN_DEBUG "\treserved_pebs   %d\n",
+              be32_to_cpu(r->reserved_pebs));
+       printk(KERN_DEBUG "\talignment       %d\n", be32_to_cpu(r->alignment));
+       printk(KERN_DEBUG "\tdata_pad        %d\n", be32_to_cpu(r->data_pad));
+       printk(KERN_DEBUG "\tvol_type        %d\n", (int)r->vol_type);
+       printk(KERN_DEBUG "\tupd_marker      %d\n", (int)r->upd_marker);
+       printk(KERN_DEBUG "\tname_len        %d\n", name_len);
  
         if (r->name[0] == '\0') {
-               dbg_msg("name            NULL");
+               printk(KERN_DEBUG "\tname            NULL\n");
                 return;
         }
  
         if (name_len <= UBI_VOL_NAME_MAX &&
             strnlen(&r->name[0], name_len + 1) == name_len) {
-               dbg_msg("name            %s", &r->name[0]);
+               printk(KERN_DEBUG "\tname            %s\n", &r->name[0]);
         } else {
-               dbg_msg("1st 5 characters of the name: %c%c%c%c%c",
+               printk(KERN_DEBUG "\t1st 5 characters of name: %c%c%c%c%c\n",
                         r->name[0], r->name[1], r->name[2], r->name[3],
                         r->name[4]);
         }
-       dbg_msg("crc             %#08x", be32_to_cpu(r->crc));
+       printk(KERN_DEBUG "\tcrc             %#08x\n", be32_to_cpu(r->crc));
  }
  
  /**
@@ -139,15 +146,15 @@ void ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx)
   */
  void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv)
  {
-       dbg_msg("volume scanning information dump:");
-       dbg_msg("vol_id         %d", sv->vol_id);
-       dbg_msg("highest_lnum   %d", sv->highest_lnum);
-       dbg_msg("leb_count      %d", sv->leb_count);
-       dbg_msg("compat         %d", sv->compat);
-       dbg_msg("vol_type       %d", sv->vol_type);
-       dbg_msg("used_ebs       %d", sv->used_ebs);
-       dbg_msg("last_data_size %d", sv->last_data_size);
-       dbg_msg("data_pad       %d", sv->data_pad);
+       printk(KERN_DEBUG "Volume scanning information dump:\n");
+       printk(KERN_DEBUG "\tvol_id         %d\n", sv->vol_id);
+       printk(KERN_DEBUG "\thighest_lnum   %d\n", sv->highest_lnum);
+       printk(KERN_DEBUG "\tleb_count      %d\n", sv->leb_count);
+       printk(KERN_DEBUG "\tcompat         %d\n", sv->compat);
+       printk(KERN_DEBUG "\tvol_type       %d\n", sv->vol_type);
+       printk(KERN_DEBUG "\tused_ebs       %d\n", sv->used_ebs);
+       printk(KERN_DEBUG "\tlast_data_size %d\n", sv->last_data_size);
+       printk(KERN_DEBUG "\tdata_pad       %d\n", sv->data_pad);
  }
  
  /**
@@ -157,14 +164,13 @@ void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv)
   */
  void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type)
  {
-       dbg_msg("eraseblock scanning information dump:");
-       dbg_msg("ec       %d", seb->ec);
-       dbg_msg("pnum     %d", seb->pnum);
+       printk(KERN_DEBUG "eraseblock scanning information dump:\n");
+       printk(KERN_DEBUG "\tec       %d\n", seb->ec);
+       printk(KERN_DEBUG "\tpnum     %d\n", seb->pnum);
         if (type == 0) {
-               dbg_msg("lnum     %d", seb->lnum);
-               dbg_msg("scrub    %d", seb->scrub);
-               dbg_msg("sqnum    %llu", seb->sqnum);
-               dbg_msg("leb_ver  %u", seb->leb_ver);
+               printk(KERN_DEBUG "\tlnum     %d\n", seb->lnum);
+               printk(KERN_DEBUG "\tscrub    %d\n", seb->scrub);
+               printk(KERN_DEBUG "\tsqnum    %llu\n", seb->sqnum);
         }
  }
  
@@ -176,16 +182,16 @@ void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req)
  {
         char nm[17];
  
-       dbg_msg("volume creation request dump:");
-       dbg_msg("vol_id    %d",   req->vol_id);
-       dbg_msg("alignment %d",   req->alignment);
-       dbg_msg("bytes     %lld", (long long)req->bytes);
-       dbg_msg("vol_type  %d",   req->vol_type);
-       dbg_msg("name_len  %d",   req->name_len);
+       printk(KERN_DEBUG "Volume creation request dump:\n");
+       printk(KERN_DEBUG "\tvol_id    %d\n",   req->vol_id);
+       printk(KERN_DEBUG "\talignment %d\n",   req->alignment);
+       printk(KERN_DEBUG "\tbytes     %lld\n", (long long)req->bytes);
+       printk(KERN_DEBUG "\tvol_type  %d\n",   req->vol_type);
+       printk(KERN_DEBUG "\tname_len  %d\n",   req->name_len);
  
         memcpy(nm, req->name, 16);
         nm[16] = 0;
-       dbg_msg("the 1st 16 characters of the name: %s", nm);
+       printk(KERN_DEBUG "\t1st 16 characters of name: %s\n", nm);
  }
  
-#endif /* CONFIG_MTD_UBI_DEBUG_MSG */
+#endif /* CONFIG_MTD_UBI_DEBUG */
diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h

index 8ea99d8c9e1f0459862b77bbd54918e3b68bf88a..78e914d23ece835d597e0c10c753965fed3d5e31 100644 (file)
--- a/drivers/mtd/ubi/debug.h
+++ b/drivers/mtd/ubi/debug.h
@@ -24,21 +24,16 @@
  #ifdef CONFIG_MTD_UBI_DEBUG
  #include <linux/random.h>
  
-#define ubi_assert(expr)  BUG_ON(!(expr))
  #define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__)
-#else
-#define ubi_assert(expr)  ({})
-#define dbg_err(fmt, ...) ({})
-#endif
  
-#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT
-#define DBG_DISABLE_BGT 1
-#else
-#define DBG_DISABLE_BGT 0
-#endif
+#define ubi_assert(expr)  do {                                               \
+        if (unlikely(!(expr))) {                                             \
+                printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \
+                       __func__, __LINE__, current->pid);                    \
+                ubi_dbg_dump_stack();                                        \
+        }                                                                    \
+} while (0)
  
-#ifdef CONFIG_MTD_UBI_DEBUG_MSG
-/* Generic debugging message */
  #define dbg_msg(fmt, ...)                                    \
         printk(KERN_DEBUG "UBI DBG (pid %d): %s: " fmt "\n", \
                current->pid, __func__, ##__VA_ARGS__)
@@ -61,36 +56,29 @@ void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv);
  void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type);
  void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req);
  
+#ifdef CONFIG_MTD_UBI_DEBUG_MSG
+/* General debugging messages */
+#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
  #else
-
-#define dbg_msg(fmt, ...)    ({})
-#define ubi_dbg_dump_stack() ({})
-#define ubi_dbg_dump_ec_hdr(ec_hdr)      ({})
-#define ubi_dbg_dump_vid_hdr(vid_hdr)    ({})
-#define ubi_dbg_dump_vol_info(vol)       ({})
-#define ubi_dbg_dump_vtbl_record(r, idx) ({})
-#define ubi_dbg_dump_sv(sv)              ({})
-#define ubi_dbg_dump_seb(seb, type)      ({})
-#define ubi_dbg_dump_mkvol_req(req)      ({})
-
-#endif /* CONFIG_MTD_UBI_DEBUG_MSG */
+#define dbg_gen(fmt, ...) ({})
+#endif
  
  #ifdef CONFIG_MTD_UBI_DEBUG_MSG_EBA
-/* Messages from the eraseblock association unit */
+/* Messages from the eraseblock association sub-system */
  #define dbg_eba(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
  #else
  #define dbg_eba(fmt, ...) ({})
  #endif
  
  #ifdef CONFIG_MTD_UBI_DEBUG_MSG_WL
-/* Messages from the wear-leveling unit */
+/* Messages from the wear-leveling sub-system */
  #define dbg_wl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
  #else
  #define dbg_wl(fmt, ...) ({})
  #endif
  
  #ifdef CONFIG_MTD_UBI_DEBUG_MSG_IO
-/* Messages from the input/output unit */
+/* Messages from the input/output sub-system */
  #define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
  #else
  #define dbg_io(fmt, ...) ({})
@@ -105,6 +93,12 @@ void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req);
  #define UBI_IO_DEBUG 0
  #endif
  
+#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT
+#define DBG_DISABLE_BGT 1
+#else
+#define DBG_DISABLE_BGT 0
+#endif
+
  #ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_BITFLIPS
  /**
   * ubi_dbg_is_bitflip - if it is time to emulate a bit-flip.
@@ -149,4 +143,30 @@ static inline int ubi_dbg_is_erase_failure(void)
  #define ubi_dbg_is_erase_failure() 0
  #endif
  
+#else
+
+#define ubi_assert(expr)                 ({})
+#define dbg_err(fmt, ...)                ({})
+#define dbg_msg(fmt, ...)                ({})
+#define dbg_gen(fmt, ...)                ({})
+#define dbg_eba(fmt, ...)                ({})
+#define dbg_wl(fmt, ...)                 ({})
+#define dbg_io(fmt, ...)                 ({})
+#define dbg_bld(fmt, ...)                ({})
+#define ubi_dbg_dump_stack()             ({})
+#define ubi_dbg_dump_ec_hdr(ec_hdr)      ({})
+#define ubi_dbg_dump_vid_hdr(vid_hdr)    ({})
+#define ubi_dbg_dump_vol_info(vol)       ({})
+#define ubi_dbg_dump_vtbl_record(r, idx) ({})
+#define ubi_dbg_dump_sv(sv)              ({})
+#define ubi_dbg_dump_seb(seb, type)      ({})
+#define ubi_dbg_dump_mkvol_req(req)      ({})
+
+#define UBI_IO_DEBUG               0
+#define DBG_DISABLE_BGT            0
+#define ubi_dbg_is_bitflip()       0
+#define ubi_dbg_is_write_failure() 0
+#define ubi_dbg_is_erase_failure() 0
+
+#endif /* !CONFIG_MTD_UBI_DEBUG */
  #endif /* !__UBI_DEBUG_H__ */
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c

index 7ce91ca742b136c7b60ac6db25ffb3d3771d2d6b..e04bcf1dff87c1b2dfdd75ef2428a0bb76781ef7 100644 (file)
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -19,20 +19,20 @@
   */
  
  /*
- * The UBI Eraseblock Association (EBA) unit.
+ * The UBI Eraseblock Association (EBA) sub-system.
   *
- * This unit is responsible for I/O to/from logical eraseblock.
+ * This sub-system is responsible for I/O to/from logical eraseblock.
   *
   * Although in this implementation the EBA table is fully kept and managed in
   * RAM, which assumes poor scalability, it might be (partially) maintained on
   * flash in future implementations.
   *
- * The EBA unit implements per-logical eraseblock locking. Before accessing a
- * logical eraseblock it is locked for reading or writing. The per-logical
- * eraseblock locking is implemented by means of the lock tree. The lock tree
- * is an RB-tree which refers all the currently locked logical eraseblocks. The
- * lock tree elements are &struct ubi_ltree_entry objects. They are indexed by
- * (@vol_id, @lnum) pairs.
+ * The EBA sub-system implements per-logical eraseblock locking. Before
+ * accessing a logical eraseblock it is locked for reading or writing. The
+ * per-logical eraseblock locking is implemented by means of the lock tree. The
+ * lock tree is an RB-tree which refers all the currently locked logical
+ * eraseblocks. The lock tree elements are &struct ubi_ltree_entry objects.
+ * They are indexed by (@vol_id, @lnum) pairs.
   *
   * EBA also maintains the global sequence counter which is incremented each
   * time a logical eraseblock is mapped to a physical eraseblock and it is
@@ -189,9 +189,7 @@ static struct ubi_ltree_entry *ltree_add_entry(struct ubi_device *ubi,
         le->users += 1;
         spin_unlock(&ubi->ltree_lock);
  
-       if (le_free)
-               kfree(le_free);
-
+       kfree(le_free);
         return le;
  }
  
@@ -223,22 +221,18 @@ static int leb_read_lock(struct ubi_device *ubi, int vol_id, int lnum)
   */
  static void leb_read_unlock(struct ubi_device *ubi, int vol_id, int lnum)
  {
-       int free = 0;
         struct ubi_ltree_entry *le;
  
         spin_lock(&ubi->ltree_lock);
         le = ltree_lookup(ubi, vol_id, lnum);
         le->users -= 1;
         ubi_assert(le->users >= 0);
+       up_read(&le->mutex);
         if (le->users == 0) {
                 rb_erase(&le->rb, &ubi->ltree);
-               free = 1;
+               kfree(le);
         }
         spin_unlock(&ubi->ltree_lock);
-
-       up_read(&le->mutex);
-       if (free)
-               kfree(le);
  }
  
  /**
@@ -274,7 +268,6 @@ static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum)
   */
  static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum)
  {
-       int free;
         struct ubi_ltree_entry *le;
  
         le = ltree_add_entry(ubi, vol_id, lnum);
@@ -289,12 +282,9 @@ static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum)
         ubi_assert(le->users >= 0);
         if (le->users == 0) {
                 rb_erase(&le->rb, &ubi->ltree);
-               free = 1;
-       } else
-               free = 0;
-       spin_unlock(&ubi->ltree_lock);
-       if (free)
                 kfree(le);
+       }
+       spin_unlock(&ubi->ltree_lock);
  
         return 1;
  }
@@ -307,23 +297,18 @@ static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum)
   */
  static void leb_write_unlock(struct ubi_device *ubi, int vol_id, int lnum)
  {
-       int free;
         struct ubi_ltree_entry *le;
  
         spin_lock(&ubi->ltree_lock);
         le = ltree_lookup(ubi, vol_id, lnum);
         le->users -= 1;
         ubi_assert(le->users >= 0);
+       up_write(&le->mutex);
         if (le->users == 0) {
                 rb_erase(&le->rb, &ubi->ltree);
-               free = 1;
-       } else
-               free = 0;
-       spin_unlock(&ubi->ltree_lock);
-
-       up_write(&le->mutex);
-       if (free)
                 kfree(le);
+       }
+       spin_unlock(&ubi->ltree_lock);
  }
  
  /**
@@ -516,9 +501,8 @@ static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum,
         struct ubi_vid_hdr *vid_hdr;
  
         vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
-       if (!vid_hdr) {
+       if (!vid_hdr)
                 return -ENOMEM;
-       }
  
         mutex_lock(&ubi->buf_mutex);
  
@@ -752,7 +736,7 @@ int ubi_eba_write_leb_st(struct ubi_device *ubi, struct ubi_volume *vol,
                 /* If this is the last LEB @len may be unaligned */
                 len = ALIGN(data_size, ubi->min_io_size);
         else
-               ubi_assert(len % ubi->min_io_size == 0);
+               ubi_assert(!(len & (ubi->min_io_size - 1)));
  
         vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
         if (!vid_hdr)
@@ -919,7 +903,7 @@ retry:
         }
  
         if (vol->eba_tbl[lnum] >= 0) {
-               err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 1);
+               err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 0);
                 if (err)
                         goto out_leb_unlock;
         }
@@ -1141,7 +1125,7 @@ out_unlock_leb:
  }
  
  /**
- * ubi_eba_init_scan - initialize the EBA unit using scanning information.
+ * ubi_eba_init_scan - initialize the EBA sub-system using scanning information.
   * @ubi: UBI device description object
   * @si: scanning information
   *
@@ -1156,7 +1140,7 @@ int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
         struct ubi_scan_leb *seb;
         struct rb_node *rb;
  
-       dbg_eba("initialize EBA unit");
+       dbg_eba("initialize EBA sub-system");
  
         spin_lock_init(&ubi->ltree_lock);
         mutex_init(&ubi->alc_mutex);
@@ -1222,7 +1206,7 @@ int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
                 ubi->rsvd_pebs  += ubi->beb_rsvd_pebs;
         }
  
-       dbg_eba("EBA unit is initialized");
+       dbg_eba("EBA sub-system is initialized");
         return 0;
  
  out_free:
@@ -1233,20 +1217,3 @@ out_free:
         }
         return err;
  }
-
-/**
- * ubi_eba_close - close EBA unit.
- * @ubi: UBI device description object
- */
-void ubi_eba_close(const struct ubi_device *ubi)
-{
-       int i, num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT;
-
-       dbg_eba("close EBA unit");
-
-       for (i = 0; i < num_volumes; i++) {
-               if (!ubi->volumes[i])
-                       continue;
-               kfree(ubi->volumes[i]->eba_tbl);
-       }
-}
diff --git a/drivers/mtd/ubi/gluebi.c b/drivers/mtd/ubi/gluebi.c

index e909b390069a263e63d5d80a9a8bdf239644fb9f..605812bb0b1a3d1f5dcefa40929842c00bdbf2f1 100644 (file)
--- a/drivers/mtd/ubi/gluebi.c
+++ b/drivers/mtd/ubi/gluebi.c
@@ -111,7 +111,7 @@ static int gluebi_read(struct mtd_info *mtd, loff_t from, size_t len,
         struct ubi_device *ubi;
         uint64_t tmp = from;
  
-       dbg_msg("read %zd bytes from offset %lld", len, from);
+       dbg_gen("read %zd bytes from offset %lld", len, from);
  
         if (len < 0 || from < 0 || from + len > mtd->size)
                 return -EINVAL;
@@ -162,7 +162,7 @@ static int gluebi_write(struct mtd_info *mtd, loff_t to, size_t len,
         struct ubi_device *ubi;
         uint64_t tmp = to;
  
-       dbg_msg("write %zd bytes to offset %lld", len, to);
+       dbg_gen("write %zd bytes to offset %lld", len, to);
  
         if (len < 0 || to < 0 || len + to > mtd->size)
                 return -EINVAL;
@@ -215,7 +215,7 @@ static int gluebi_erase(struct mtd_info *mtd, struct erase_info *instr)
         struct ubi_volume *vol;
         struct ubi_device *ubi;
  
-       dbg_msg("erase %u bytes at offset %u", instr->len, instr->addr);
+       dbg_gen("erase %u bytes at offset %u", instr->len, instr->addr);
  
         if (instr->addr < 0 || instr->addr > mtd->size - mtd->erasesize)
                 return -EINVAL;
@@ -249,8 +249,8 @@ static int gluebi_erase(struct mtd_info *mtd, struct erase_info *instr)
         if (err)
                 goto out_err;
  
-        instr->state = MTD_ERASE_DONE;
-        mtd_erase_callback(instr);
+       instr->state = MTD_ERASE_DONE;
+       mtd_erase_callback(instr);
         return 0;
  
  out_err:
@@ -299,12 +299,12 @@ int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol)
                 mtd->size = vol->used_bytes;
  
         if (add_mtd_device(mtd)) {
-               ubi_err("cannot not add MTD device\n");
+               ubi_err("cannot not add MTD device");
                 kfree(mtd->name);
                 return -ENFILE;
         }
  
-       dbg_msg("added mtd%d (\"%s\"), size %u, EB size %u",
+       dbg_gen("added mtd%d (\"%s\"), size %u, EB size %u",
                 mtd->index, mtd->name, mtd->size, mtd->erasesize);
         return 0;
  }
@@ -322,7 +322,7 @@ int ubi_destroy_gluebi(struct ubi_volume *vol)
         int err;
         struct mtd_info *mtd = &vol->gluebi_mtd;
  
-       dbg_msg("remove mtd%d", mtd->index);
+       dbg_gen("remove mtd%d", mtd->index);
         err = del_mtd_device(mtd);
         if (err)
                 return err;
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c

index 4ac11df7b048d3a24684744c4f399f13b81653ea..2fb64be44f1bab1efde6fd48988b8b22dabd5f39 100644 (file)
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -20,15 +20,15 @@
   */
  
  /*
- * UBI input/output unit.
+ * UBI input/output sub-system.
   *
- * This unit provides a uniform way to work with all kinds of the underlying
- * MTD devices. It also implements handy functions for reading and writing UBI
- * headers.
+ * This sub-system provides a uniform way to work with all kinds of the
+ * underlying MTD devices. It also implements handy functions for reading and
+ * writing UBI headers.
   *
   * We are trying to have a paranoid mindset and not to trust to what we read
- * from the flash media in order to be more secure and robust. So this unit
- * validates every single header it reads from the flash media.
+ * from the flash media in order to be more secure and robust. So this
+ * sub-system validates every single header it reads from the flash media.
   *
   * Some words about how the eraseblock headers are stored.
   *
@@ -79,11 +79,11 @@
   * 512-byte chunks, we have to allocate one more buffer and copy our VID header
   * to offset 448 of this buffer.
   *
- * The I/O unit does the following trick in order to avoid this extra copy.
- * It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID header
- * and returns a pointer to offset @ubi->vid_hdr_shift of this buffer. When the
- * VID header is being written out, it shifts the VID header pointer back and
- * writes the whole sub-page.
+ * The I/O sub-system does the following trick in order to avoid this extra
+ * copy. It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID
+ * header and returns a pointer to offset @ubi->vid_hdr_shift of this buffer.
+ * When the VID header is being written out, it shifts the VID header pointer
+ * back and writes the whole sub-page.
   */
  
  #include <linux/crc32.h>
@@ -156,15 +156,19 @@ retry:
                         /*
                          * -EUCLEAN is reported if there was a bit-flip which
                          * was corrected, so this is harmless.
+                        *
+                        * We do not report about it here unless debugging is
+                        * enabled. A corresponding message will be printed
+                        * later, when it is has been scrubbed.
                          */
-                       ubi_msg("fixable bit-flip detected at PEB %d", pnum);
+                       dbg_msg("fixable bit-flip detected at PEB %d", pnum);
                         ubi_assert(len == read);
                         return UBI_IO_BITFLIPS;
                 }
  
                 if (read != len && retries++ < UBI_IO_RETRIES) {
-                       dbg_io("error %d while reading %d bytes from PEB %d:%d, "
-                              "read only %zd bytes, retry",
+                       dbg_io("error %d while reading %d bytes from PEB %d:%d,"
+                              " read only %zd bytes, retry",
                                err, len, pnum, offset, read);
                         yield();
                         goto retry;
@@ -187,7 +191,7 @@ retry:
                 ubi_assert(len == read);
  
                 if (ubi_dbg_is_bitflip()) {
-                       dbg_msg("bit-flip (emulated)");
+                       dbg_gen("bit-flip (emulated)");
                         err = UBI_IO_BITFLIPS;
                 }
         }
@@ -391,6 +395,7 @@ static int torture_peb(struct ubi_device *ubi, int pnum)
  {
         int err, i, patt_count;
  
+       ubi_msg("run torture test for PEB %d", pnum);
         patt_count = ARRAY_SIZE(patterns);
         ubi_assert(patt_count > 0);
  
@@ -434,6 +439,7 @@ static int torture_peb(struct ubi_device *ubi, int pnum)
         }
  
         err = patt_count;
+       ubi_msg("PEB %d passed torture test, do not mark it a bad", pnum);
  
  out:
         mutex_unlock(&ubi->buf_mutex);
@@ -699,8 +705,8 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum,
  
         if (hdr_crc != crc) {
                 if (verbose) {
-                       ubi_warn("bad EC header CRC at PEB %d, calculated %#08x,"
-                                " read %#08x", pnum, crc, hdr_crc);
+                       ubi_warn("bad EC header CRC at PEB %d, calculated "
+                                "%#08x, read %#08x", pnum, crc, hdr_crc);
                         ubi_dbg_dump_ec_hdr(ec_hdr);
                 }
                 return UBI_IO_BAD_EC_HDR;
@@ -1095,8 +1101,7 @@ fail:
  }
  
  /**
- * paranoid_check_peb_ec_hdr - check that the erase counter header of a
- * physical eraseblock is in-place and is all right.
+ * paranoid_check_peb_ec_hdr - check erase counter header.
   * @ubi: UBI device description object
   * @pnum: the physical eraseblock number to check
   *
@@ -1174,8 +1179,7 @@ fail:
  }
  
  /**
- * paranoid_check_peb_vid_hdr - check that the volume identifier header of a
- * physical eraseblock is in-place and is all right.
+ * paranoid_check_peb_vid_hdr - check volume identifier header.
   * @ubi: UBI device description object
   * @pnum: the physical eraseblock number to check
   *
@@ -1256,7 +1260,7 @@ static int paranoid_check_all_ff(struct ubi_device *ubi, int pnum, int offset,
  
  fail:
         ubi_err("paranoid check failed for PEB %d", pnum);
-       dbg_msg("hex dump of the %d-%d region", offset, offset + len);
+       ubi_msg("hex dump of the %d-%d region", offset, offset + len);
         print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
                        ubi->dbg_peb_buf, len, 1);
         err = 1;
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c

index a70d58823f8d116804c4c05bfaf807cead9015e5..5d9bcf109c13be5e584b17bf345b6ddd016cc876 100644 (file)
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -106,7 +106,7 @@ struct ubi_volume_desc *ubi_open_volume(int ubi_num, int vol_id, int mode)
         struct ubi_device *ubi;
         struct ubi_volume *vol;
  
-       dbg_msg("open device %d volume %d, mode %d", ubi_num, vol_id, mode);
+       dbg_gen("open device %d volume %d, mode %d", ubi_num, vol_id, mode);
  
         if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES)
                 return ERR_PTR(-EINVAL);
@@ -215,7 +215,7 @@ struct ubi_volume_desc *ubi_open_volume_nm(int ubi_num, const char *name,
         struct ubi_device *ubi;
         struct ubi_volume_desc *ret;
  
-       dbg_msg("open volume %s, mode %d", name, mode);
+       dbg_gen("open volume %s, mode %d", name, mode);
  
         if (!name)
                 return ERR_PTR(-EINVAL);
@@ -266,7 +266,7 @@ void ubi_close_volume(struct ubi_volume_desc *desc)
         struct ubi_volume *vol = desc->vol;
         struct ubi_device *ubi = vol->ubi;
  
-       dbg_msg("close volume %d, mode %d", vol->vol_id, desc->mode);
+       dbg_gen("close volume %d, mode %d", vol->vol_id, desc->mode);
  
         spin_lock(&ubi->volumes_lock);
         switch (desc->mode) {
@@ -323,7 +323,7 @@ int ubi_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
         struct ubi_device *ubi = vol->ubi;
         int err, vol_id = vol->vol_id;
  
-       dbg_msg("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset);
+       dbg_gen("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset);
  
         if (vol_id < 0 || vol_id >= ubi->vtbl_slots || lnum < 0 ||
             lnum >= vol->used_ebs || offset < 0 || len < 0 ||
@@ -388,7 +388,7 @@ int ubi_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
         struct ubi_device *ubi = vol->ubi;
         int vol_id = vol->vol_id;
  
-       dbg_msg("write %d bytes to LEB %d:%d:%d", len, vol_id, lnum, offset);
+       dbg_gen("write %d bytes to LEB %d:%d:%d", len, vol_id, lnum, offset);
  
         if (vol_id < 0 || vol_id >= ubi->vtbl_slots)
                 return -EINVAL;
@@ -397,8 +397,8 @@ int ubi_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
                 return -EROFS;
  
         if (lnum < 0 || lnum >= vol->reserved_pebs || offset < 0 || len < 0 ||
-           offset + len > vol->usable_leb_size || offset % ubi->min_io_size ||
-           len % ubi->min_io_size)
+           offset + len > vol->usable_leb_size ||
+           offset & (ubi->min_io_size - 1) || len & (ubi->min_io_size - 1))
                 return -EINVAL;
  
         if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM &&
@@ -438,7 +438,7 @@ int ubi_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
         struct ubi_device *ubi = vol->ubi;
         int vol_id = vol->vol_id;
  
-       dbg_msg("atomically write %d bytes to LEB %d:%d", len, vol_id, lnum);
+       dbg_gen("atomically write %d bytes to LEB %d:%d", len, vol_id, lnum);
  
         if (vol_id < 0 || vol_id >= ubi->vtbl_slots)
                 return -EINVAL;
@@ -447,7 +447,7 @@ int ubi_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
                 return -EROFS;
  
         if (lnum < 0 || lnum >= vol->reserved_pebs || len < 0 ||
-           len > vol->usable_leb_size || len % ubi->min_io_size)
+           len > vol->usable_leb_size || len & (ubi->min_io_size - 1))
                 return -EINVAL;
  
         if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM &&
@@ -482,7 +482,7 @@ int ubi_leb_erase(struct ubi_volume_desc *desc, int lnum)
         struct ubi_device *ubi = vol->ubi;
         int err;
  
-       dbg_msg("erase LEB %d:%d", vol->vol_id, lnum);
+       dbg_gen("erase LEB %d:%d", vol->vol_id, lnum);
  
         if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
                 return -EROFS;
@@ -542,7 +542,7 @@ int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum)
         struct ubi_volume *vol = desc->vol;
         struct ubi_device *ubi = vol->ubi;
  
-       dbg_msg("unmap LEB %d:%d", vol->vol_id, lnum);
+       dbg_gen("unmap LEB %d:%d", vol->vol_id, lnum);
  
         if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
                 return -EROFS;
@@ -579,7 +579,7 @@ int ubi_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype)
         struct ubi_volume *vol = desc->vol;
         struct ubi_device *ubi = vol->ubi;
  
-       dbg_msg("unmap LEB %d:%d", vol->vol_id, lnum);
+       dbg_gen("unmap LEB %d:%d", vol->vol_id, lnum);
  
         if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
                 return -EROFS;
@@ -621,7 +621,7 @@ int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum)
  {
         struct ubi_volume *vol = desc->vol;
  
-       dbg_msg("test LEB %d:%d", vol->vol_id, lnum);
+       dbg_gen("test LEB %d:%d", vol->vol_id, lnum);
  
         if (lnum < 0 || lnum >= vol->reserved_pebs)
                 return -EINVAL;
@@ -632,3 +632,27 @@ int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum)
         return vol->eba_tbl[lnum] >= 0;
  }
  EXPORT_SYMBOL_GPL(ubi_is_mapped);
+
+/**
+ * ubi_sync - synchronize UBI device buffers.
+ * @ubi_num: UBI device to synchronize
+ *
+ * The underlying MTD device may cache data in hardware or in software. This
+ * function ensures the caches are flushed. Returns zero in case of success and
+ * a negative error code in case of failure.
+ */
+int ubi_sync(int ubi_num)
+{
+       struct ubi_device *ubi;
+
+       ubi = ubi_get_device(ubi_num);
+       if (!ubi)
+               return -ENODEV;
+
+       if (ubi->mtd->sync)
+               ubi->mtd->sync(ubi->mtd);
+
+       ubi_put_device(ubi);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(ubi_sync);
diff --git a/drivers/mtd/ubi/misc.c b/drivers/mtd/ubi/misc.c

index 93e05281201292c4641d95593eb03158797d29a9..22ad314029452935dfffff32165d49047da2603d 100644 (file)
--- a/drivers/mtd/ubi/misc.c
+++ b/drivers/mtd/ubi/misc.c
@@ -37,7 +37,7 @@ int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf,
  {
         int i;
  
-       ubi_assert(length % ubi->min_io_size == 0);
+       ubi_assert(!(length & (ubi->min_io_size - 1)));
  
         for (i = length - 1; i >= 0; i--)
                 if (((const uint8_t *)buf)[i] != 0xFF)
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c

index 96d410e106ab1cb72cf9f6bda211e756eaa8c98d..967bb4406df9e01c48aa5e5e0ff71a514558412d 100644 (file)
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -19,9 +19,9 @@
   */
  
  /*
- * UBI scanning unit.
+ * UBI scanning sub-system.
   *
- * This unit is responsible for scanning the flash media, checking UBI
+ * This sub-system is responsible for scanning the flash media, checking UBI
   * headers and providing complete information about the UBI flash image.
   *
   * The scanning information is represented by a &struct ubi_scan_info' object.
@@ -93,8 +93,7 @@ static int add_to_list(struct ubi_scan_info *si, int pnum, int ec,
  }
  
  /**
- * validate_vid_hdr - check that volume identifier header is correct and
- * consistent.
+ * validate_vid_hdr - check volume identifier header.
   * @vid_hdr: the volume identifier header to check
   * @sv: information about the volume this logical eraseblock belongs to
   * @pnum: physical eraseblock number the VID header came from
@@ -103,7 +102,7 @@ static int add_to_list(struct ubi_scan_info *si, int pnum, int ec,
   * non-zero if an inconsistency was found and zero if not.
   *
   * Note, UBI does sanity check of everything it reads from the flash media.
- * Most of the checks are done in the I/O unit. Here we check that the
+ * Most of the checks are done in the I/O sub-system. Here we check that the
   * information in the VID header is consistent to the information in other VID
   * headers of the same volume.
   */
@@ -247,45 +246,21 @@ static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb,
         struct ubi_vid_hdr *vh = NULL;
         unsigned long long sqnum2 = be64_to_cpu(vid_hdr->sqnum);
  
-       if (seb->sqnum == 0 && sqnum2 == 0) {
-               long long abs, v1 = seb->leb_ver, v2 = be32_to_cpu(vid_hdr->leb_ver);
-
+       if (sqnum2 == seb->sqnum) {
                 /*
-                * UBI constantly increases the logical eraseblock version
-                * number and it can overflow. Thus, we have to bear in mind
-                * that versions that are close to %0xFFFFFFFF are less then
-                * versions that are close to %0.
-                *
-                * The UBI WL unit guarantees that the number of pending tasks
-                * is not greater then %0x7FFFFFFF. So, if the difference
-                * between any two versions is greater or equivalent to
-                * %0x7FFFFFFF, there was an overflow and the logical
-                * eraseblock with lower version is actually newer then the one
-                * with higher version.
-                *
-                * FIXME: but this is anyway obsolete and will be removed at
-                * some point.
+                * This must be a really ancient UBI image which has been
+                * created before sequence numbers support has been added. At
+                * that times we used 32-bit LEB versions stored in logical
+                * eraseblocks. That was before UBI got into mainline. We do not
+                * support these images anymore. Well, those images will work
+                * still work, but only if no unclean reboots happened.
                  */
-               dbg_bld("using old crappy leb_ver stuff");
-
-               if (v1 == v2) {
-                       ubi_err("PEB %d and PEB %d have the same version %lld",
-                               seb->pnum, pnum, v1);
-                       return -EINVAL;
-               }
-
-               abs = v1 - v2;
-               if (abs < 0)
-                       abs = -abs;
+               ubi_err("unsupported on-flash UBI format\n");
+               return -EINVAL;
+       }
  
-               if (abs < 0x7FFFFFFF)
-                       /* Non-overflow situation */
-                       second_is_newer = (v2 > v1);
-               else
-                       second_is_newer = (v2 < v1);
-       } else
-               /* Obviously the LEB with lower sequence counter is older */
-               second_is_newer = sqnum2 > seb->sqnum;
+       /* Obviously the LEB with lower sequence counter is older */
+       second_is_newer = !!(sqnum2 > seb->sqnum);
  
         /*
          * Now we know which copy is newer. If the copy flag of the PEB with
@@ -293,7 +268,7 @@ static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb,
          * check data CRC. For the second PEB we already have the VID header,
          * for the first one - we'll need to re-read it from flash.
          *
-        * FIXME: this may be optimized so that we wouldn't read twice.
+        * Note: this may be optimized so that we wouldn't read twice.
          */
  
         if (second_is_newer) {
@@ -379,8 +354,7 @@ out_free_vidh:
  }
  
  /**
- * ubi_scan_add_used - add information about a physical eraseblock to the
- * scanning information.
+ * ubi_scan_add_used - add physical eraseblock to the scanning information.
   * @ubi: UBI device description object
   * @si: scanning information
   * @pnum: the physical eraseblock number
@@ -400,7 +374,6 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
                       int bitflips)
  {
         int err, vol_id, lnum;
-       uint32_t leb_ver;
         unsigned long long sqnum;
         struct ubi_scan_volume *sv;
         struct ubi_scan_leb *seb;
@@ -409,10 +382,9 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
         vol_id = be32_to_cpu(vid_hdr->vol_id);
         lnum = be32_to_cpu(vid_hdr->lnum);
         sqnum = be64_to_cpu(vid_hdr->sqnum);
-       leb_ver = be32_to_cpu(vid_hdr->leb_ver);
  
-       dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, ver %u, bitflips %d",
-               pnum, vol_id, lnum, ec, sqnum, leb_ver, bitflips);
+       dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, bitflips %d",
+               pnum, vol_id, lnum, ec, sqnum, bitflips);
  
         sv = add_volume(si, vol_id, pnum, vid_hdr);
         if (IS_ERR(sv) < 0)
@@ -445,25 +417,20 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
                  */
  
                 dbg_bld("this LEB already exists: PEB %d, sqnum %llu, "
-                       "LEB ver %u, EC %d", seb->pnum, seb->sqnum,
-                       seb->leb_ver, seb->ec);
-
-               /*
-                * Make sure that the logical eraseblocks have different
-                * versions. Otherwise the image is bad.
-                */
-               if (seb->leb_ver == leb_ver && leb_ver != 0) {
-                       ubi_err("two LEBs with same version %u", leb_ver);
-                       ubi_dbg_dump_seb(seb, 0);
-                       ubi_dbg_dump_vid_hdr(vid_hdr);
-                       return -EINVAL;
-               }
+                       "EC %d", seb->pnum, seb->sqnum, seb->ec);
  
                 /*
                  * Make sure that the logical eraseblocks have different
                  * sequence numbers. Otherwise the image is bad.
                  *
-                * FIXME: remove 'sqnum != 0' check when leb_ver is removed.
+                * However, if the sequence number is zero, we assume it must
+                * be an ancient UBI image from the era when UBI did not have
+                * sequence numbers. We still can attach these images, unless
+                * there is a need to distinguish between old and new
+                * eraseblocks, in which case we'll refuse the image in
+                * 'compare_lebs()'. In other words, we attach old clean
+                * images, but refuse attaching old images with duplicated
+                * logical eraseblocks because there was an unclean reboot.
                  */
                 if (seb->sqnum == sqnum && sqnum != 0) {
                         ubi_err("two LEBs with same sequence number %llu",
@@ -503,7 +470,6 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
                         seb->pnum = pnum;
                         seb->scrub = ((cmp_res & 2) || bitflips);
                         seb->sqnum = sqnum;
-                       seb->leb_ver = leb_ver;
  
                         if (sv->highest_lnum == lnum)
                                 sv->last_data_size =
@@ -540,7 +506,6 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
         seb->lnum = lnum;
         seb->sqnum = sqnum;
         seb->scrub = bitflips;
-       seb->leb_ver = leb_ver;
  
         if (sv->highest_lnum <= lnum) {
                 sv->highest_lnum = lnum;
@@ -554,8 +519,7 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
  }
  
  /**
- * ubi_scan_find_sv - find information about a particular volume in the
- * scanning information.
+ * ubi_scan_find_sv - find volume in the scanning information.
   * @si: scanning information
   * @vol_id: the requested volume ID
   *
@@ -584,8 +548,7 @@ struct ubi_scan_volume *ubi_scan_find_sv(const struct ubi_scan_info *si,
  }
  
  /**
- * ubi_scan_find_seb - find information about a particular logical
- * eraseblock in the volume scanning information.
+ * ubi_scan_find_seb - find LEB in the volume scanning information.
   * @sv: a pointer to the volume scanning information
   * @lnum: the requested logical eraseblock
   *
@@ -645,9 +608,9 @@ void ubi_scan_rm_volume(struct ubi_scan_info *si, struct ubi_scan_volume *sv)
   *
   * This function erases physical eraseblock 'pnum', and writes the erase
   * counter header to it. This function should only be used on UBI device
- * initialization stages, when the EBA unit had not been yet initialized. This
- * function returns zero in case of success and a negative error code in case
- * of failure.
+ * initialization stages, when the EBA sub-system had not been yet initialized.
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
   */
  int ubi_scan_erase_peb(struct ubi_device *ubi, const struct ubi_scan_info *si,
                        int pnum, int ec)
@@ -687,9 +650,10 @@ out_free:
   * @si: scanning information
   *
   * This function returns a free physical eraseblock. It is supposed to be
- * called on the UBI initialization stages when the wear-leveling unit is not
- * initialized yet. This function picks a physical eraseblocks from one of the
- * lists, writes the EC header if it is needed, and removes it from the list.
+ * called on the UBI initialization stages when the wear-leveling sub-system is
+ * not initialized yet. This function picks a physical eraseblocks from one of
+ * the lists, writes the EC header if it is needed, and removes it from the
+ * list.
   *
   * This function returns scanning physical eraseblock information in case of
   * success and an error code in case of failure.
@@ -742,8 +706,7 @@ struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi,
  }
  
  /**
- * process_eb - read UBI headers, check them and add corresponding data
- * to the scanning information.
+ * process_eb - read, check UBI headers, and add them to scanning information.
   * @ubi: UBI device description object
   * @si: scanning information
   * @pnum: the physical eraseblock number
@@ -751,7 +714,8 @@ struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi,
   * This function returns a zero if the physical eraseblock was successfully
   * handled and a negative error code in case of failure.
   */
-static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum)
+static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si,
+                     int pnum)
  {
         long long uninitialized_var(ec);
         int err, bitflips = 0, vol_id, ec_corr = 0;
@@ -764,8 +728,9 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum
                 return err;
         else if (err) {
                 /*
-                * FIXME: this is actually duty of the I/O unit to initialize
-                * this, but MTD does not provide enough information.
+                * FIXME: this is actually duty of the I/O sub-system to
+                * initialize this, but MTD does not provide enough
+                * information.
                  */
                 si->bad_peb_count += 1;
                 return 0;
@@ -930,7 +895,7 @@ struct ubi_scan_info *ubi_scan(struct ubi_device *ubi)
         for (pnum = 0; pnum < ubi->peb_count; pnum++) {
                 cond_resched();
  
-               dbg_msg("process PEB %d", pnum);
+               dbg_gen("process PEB %d", pnum);
                 err = process_eb(ubi, si, pnum);
                 if (err < 0)
                         goto out_vidh;
@@ -1079,8 +1044,7 @@ void ubi_scan_destroy_si(struct ubi_scan_info *si)
  #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
  
  /**
- * paranoid_check_si - check if the scanning information is correct and
- * consistent.
+ * paranoid_check_si - check the scanning information.
   * @ubi: UBI device description object
   * @si: scanning information
   *
@@ -1265,11 +1229,6 @@ static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si)
                                 ubi_err("bad data_pad %d", sv->data_pad);
                                 goto bad_vid_hdr;
                         }
-
-                       if (seb->leb_ver != be32_to_cpu(vidh->leb_ver)) {
-                               ubi_err("bad leb_ver %u", seb->leb_ver);
-                               goto bad_vid_hdr;
-                       }
                 }
  
                 if (!last_seb)
@@ -1299,8 +1258,7 @@ static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si)
                 if (err < 0) {
                         kfree(buf);
                         return err;
-               }
-               else if (err)
+               } else if (err)
                         buf[pnum] = 1;
         }
  
diff --git a/drivers/mtd/ubi/scan.h b/drivers/mtd/ubi/scan.h

index 966b9b682a423819921f69ca02e626ad0c89772b..61df208e2f2077de860db28494e8639349368111 100644 (file)
--- a/drivers/mtd/ubi/scan.h
+++ b/drivers/mtd/ubi/scan.h
@@ -34,7 +34,6 @@
   * @u: unions RB-tree or @list links
   * @u.rb: link in the per-volume RB-tree of &struct ubi_scan_leb objects
   * @u.list: link in one of the eraseblock lists
- * @leb_ver: logical eraseblock version (obsolete)
   *
   * One object of this type is allocated for each physical eraseblock during
   * scanning.
@@ -49,7 +48,6 @@ struct ubi_scan_leb {
                 struct rb_node rb;
                 struct list_head list;
         } u;
-       uint32_t leb_ver;
  };
  
  /**
@@ -59,16 +57,16 @@ struct ubi_scan_leb {
   * @leb_count: number of logical eraseblocks in this volume
   * @vol_type: volume type
   * @used_ebs: number of used logical eraseblocks in this volume (only for
- * static volumes)
+ *            static volumes)
   * @last_data_size: amount of data in the last logical eraseblock of this
- * volume (always equivalent to the usable logical eraseblock size in case of
- * dynamic volumes)
+ *                  volume (always equivalent to the usable logical eraseblock
+ *                  size in case of dynamic volumes)
   * @data_pad: how many bytes at the end of logical eraseblocks of this volume
- * are not used (due to volume alignment)
+ *            are not used (due to volume alignment)
   * @compat: compatibility flags of this volume
   * @rb: link in the volume RB-tree
   * @root: root of the RB-tree containing all the eraseblock belonging to this
- * volume (&struct ubi_scan_leb objects)
+ *        volume (&struct ubi_scan_leb objects)
   *
   * One object of this type is allocated for each volume during scanning.
   */
@@ -92,8 +90,8 @@ struct ubi_scan_volume {
   * @free: list of free physical eraseblocks
   * @erase: list of physical eraseblocks which have to be erased
   * @alien: list of physical eraseblocks which should not be used by UBI (e.g.,
+ *         those belonging to "preserve"-compatible internal volumes)
   * @bad_peb_count: count of bad physical eraseblocks
- * those belonging to "preserve"-compatible internal volumes)
   * @vols_found: number of volumes found during scanning
   * @highest_vol_id: highest volume ID
   * @alien_peb_count: count of physical eraseblocks in the @alien list
@@ -106,8 +104,8 @@ struct ubi_scan_volume {
   * @ec_count: a temporary variable used when calculating @mean_ec
   *
   * This data structure contains the result of scanning and may be used by other
- * UBI units to build final UBI data structures, further error-recovery and so
- * on.
+ * UBI sub-systems to build final UBI data structures, further error-recovery
+ * and so on.
   */
  struct ubi_scan_info {
         struct rb_root volumes;
@@ -132,8 +130,7 @@ struct ubi_device;
  struct ubi_vid_hdr;
  
  /*
- * ubi_scan_move_to_list - move a physical eraseblock from the volume tree to a
- * list.
+ * ubi_scan_move_to_list - move a PEB from the volume tree to a list.
   *
   * @sv: volume scanning information
   * @seb: scanning eraseblock infprmation
diff --git a/drivers/mtd/ubi/ubi-media.h b/drivers/mtd/ubi/ubi-media.h

index c3185d9fd048836dfd09c52e266e1875e9571337..2ad94040905322528d9d36ec4edadeae15e086a8 100644 (file)
--- a/drivers/mtd/ubi/ubi-media.h
+++ b/drivers/mtd/ubi/ubi-media.h
@@ -98,10 +98,11 @@ enum {
   * Compatibility constants used by internal volumes.
   *
   * @UBI_COMPAT_DELETE: delete this internal volume before anything is written
- * to the flash
+ *                     to the flash
   * @UBI_COMPAT_RO: attach this device in read-only mode
   * @UBI_COMPAT_PRESERVE: preserve this internal volume - do not touch its
- * physical eraseblocks, don't allow the wear-leveling unit to move them
+ *                       physical eraseblocks, don't allow the wear-leveling
+ *                       sub-system to move them
   * @UBI_COMPAT_REJECT: reject this UBI image
   */
  enum {
@@ -123,7 +124,7 @@ enum {
   * struct ubi_ec_hdr - UBI erase counter header.
   * @magic: erase counter header magic number (%UBI_EC_HDR_MAGIC)
   * @version: version of UBI implementation which is supposed to accept this
- * UBI image
+ *           UBI image
   * @padding1: reserved for future, zeroes
   * @ec: the erase counter
   * @vid_hdr_offset: where the VID header starts
@@ -159,24 +160,23 @@ struct ubi_ec_hdr {
   * struct ubi_vid_hdr - on-flash UBI volume identifier header.
   * @magic: volume identifier header magic number (%UBI_VID_HDR_MAGIC)
   * @version: UBI implementation version which is supposed to accept this UBI
- * image (%UBI_VERSION)
+ *           image (%UBI_VERSION)
   * @vol_type: volume type (%UBI_VID_DYNAMIC or %UBI_VID_STATIC)
   * @copy_flag: if this logical eraseblock was copied from another physical
- * eraseblock (for wear-leveling reasons)
+ *             eraseblock (for wear-leveling reasons)
   * @compat: compatibility of this volume (%0, %UBI_COMPAT_DELETE,
- * %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT)
+ *          %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT)
   * @vol_id: ID of this volume
   * @lnum: logical eraseblock number
- * @leb_ver: version of this logical eraseblock (IMPORTANT: obsolete, to be
- * removed, kept only for not breaking older UBI users)
+ * @padding1: reserved for future, zeroes
   * @data_size: how many bytes of data this logical eraseblock contains
   * @used_ebs: total number of used logical eraseblocks in this volume
   * @data_pad: how many bytes at the end of this physical eraseblock are not
- * used
+ *            used
   * @data_crc: CRC checksum of the data stored in this logical eraseblock
- * @padding1: reserved for future, zeroes
- * @sqnum: sequence number
   * @padding2: reserved for future, zeroes
+ * @sqnum: sequence number
+ * @padding3: reserved for future, zeroes
   * @hdr_crc: volume identifier header CRC checksum
   *
   * The @sqnum is the value of the global sequence counter at the time when this
@@ -224,10 +224,6 @@ struct ubi_ec_hdr {
   * checksum is correct, this physical eraseblock is selected (P1). Otherwise
   * the older one (P) is selected.
   *
- * Note, there is an obsolete @leb_ver field which was used instead of @sqnum
- * in the past. But it is not used anymore and we keep it in order to be able
- * to deal with old UBI images. It will be removed at some point.
- *
   * There are 2 sorts of volumes in UBI: user volumes and internal volumes.
   * Internal volumes are not seen from outside and are used for various internal
   * UBI purposes. In this implementation there is only one internal volume - the
@@ -248,9 +244,9 @@ struct ubi_ec_hdr {
   * The @data_crc field contains the CRC checksum of the contents of the logical
   * eraseblock if this is a static volume. In case of dynamic volumes, it does
   * not contain the CRC checksum as a rule. The only exception is when the
- * data of the physical eraseblock was moved by the wear-leveling unit, then
- * the wear-leveling unit calculates the data CRC and stores it in the
- * @data_crc field. And of course, the @copy_flag is %in this case.
+ * data of the physical eraseblock was moved by the wear-leveling sub-system,
+ * then the wear-leveling sub-system calculates the data CRC and stores it in
+ * the @data_crc field. And of course, the @copy_flag is %in this case.
   *
   * The @data_size field is used only for static volumes because UBI has to know
   * how many bytes of data are stored in this eraseblock. For dynamic volumes,
@@ -277,14 +273,14 @@ struct ubi_vid_hdr {
         __u8    compat;
         __be32  vol_id;
         __be32  lnum;
-       __be32  leb_ver; /* obsolete, to be removed, don't use */
+       __u8    padding1[4];
         __be32  data_size;
         __be32  used_ebs;
         __be32  data_pad;
         __be32  data_crc;
-       __u8    padding1[4];
+       __u8    padding2[4];
         __be64  sqnum;
-       __u8    padding2[12];
+       __u8    padding3[12];
         __be32  hdr_crc;
  } __attribute__ ((packed));
  
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h

index 67dcbd11c15c27c10cfd4349ede90c96ec11ea59..1c3fa18c26a7f9fdbf1b0b6a0d2443d82a65745e 100644 (file)
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -74,15 +74,15 @@
  #define UBI_IO_RETRIES 3
  
  /*
- * Error codes returned by the I/O unit.
+ * Error codes returned by the I/O sub-system.
   *
   * UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only
- * 0xFF bytes
+ *                   %0xFF bytes
   * UBI_IO_PEB_FREE: the physical eraseblock is free, i.e. it contains only a
- * valid erase counter header, and the rest are %0xFF bytes
+ *                  valid erase counter header, and the rest are %0xFF bytes
   * UBI_IO_BAD_EC_HDR: the erase counter header is corrupted (bad magic or CRC)
   * UBI_IO_BAD_VID_HDR: the volume identifier header is corrupted (bad magic or
- * CRC)
+ *                     CRC)
   * UBI_IO_BITFLIPS: bit-flips were detected and corrected
   */
  enum {
@@ -99,9 +99,9 @@ enum {
   * @ec: erase counter
   * @pnum: physical eraseblock number
   *
- * This data structure is used in the WL unit. Each physical eraseblock has a
- * corresponding &struct wl_entry object which may be kept in different
- * RB-trees. See WL unit for details.
+ * This data structure is used in the WL sub-system. Each physical eraseblock
+ * has a corresponding &struct wl_entry object which may be kept in different
+ * RB-trees. See WL sub-system for details.
   */
  struct ubi_wl_entry {
         struct rb_node rb;
@@ -118,10 +118,10 @@ struct ubi_wl_entry {
   * @mutex: read/write mutex to implement read/write access serialization to
   *         the (@vol_id, @lnum) logical eraseblock
   *
- * This data structure is used in the EBA unit to implement per-LEB locking.
- * When a logical eraseblock is being locked - corresponding
+ * This data structure is used in the EBA sub-system to implement per-LEB
+ * locking. When a logical eraseblock is being locked - corresponding
   * &struct ubi_ltree_entry object is inserted to the lock tree (@ubi->ltree).
- * See EBA unit for details.
+ * See EBA sub-system for details.
   */
  struct ubi_ltree_entry {
         struct rb_node rb;
@@ -131,6 +131,27 @@ struct ubi_ltree_entry {
         struct rw_semaphore mutex;
  };
  
+/**
+ * struct ubi_rename_entry - volume re-name description data structure.
+ * @new_name_len: new volume name length
+ * @new_name: new volume name
+ * @remove: if not zero, this volume should be removed, not re-named
+ * @desc: descriptor of the volume
+ * @list: links re-name entries into a list
+ *
+ * This data structure is utilized in the multiple volume re-name code. Namely,
+ * UBI first creates a list of &struct ubi_rename_entry objects from the
+ * &struct ubi_rnvol_req request object, and then utilizes this list to do all
+ * the job.
+ */
+struct ubi_rename_entry {
+       int new_name_len;
+       char new_name[UBI_VOL_NAME_MAX + 1];
+       int remove;
+       struct ubi_volume_desc *desc;
+       struct list_head list;
+};
+
  struct ubi_volume_desc;
  
  /**
@@ -206,7 +227,7 @@ struct ubi_volume {
         int alignment;
         int data_pad;
         int name_len;
-       char name[UBI_VOL_NAME_MAX+1];
+       char name[UBI_VOL_NAME_MAX + 1];
  
         int upd_ebs;
         int ch_lnum;
@@ -225,7 +246,7 @@ struct ubi_volume {
  #ifdef CONFIG_MTD_UBI_GLUEBI
         /*
          * Gluebi-related stuff may be compiled out.
-        * TODO: this should not be built into UBI but should be a separate
+        * Note: this should not be built into UBI but should be a separate
          * ubimtd driver which works on top of UBI and emulates MTD devices.
          */
         struct ubi_volume_desc *gluebi_desc;
@@ -235,8 +256,7 @@ struct ubi_volume {
  };
  
  /**
- * struct ubi_volume_desc - descriptor of the UBI volume returned when it is
- * opened.
+ * struct ubi_volume_desc - UBI volume descriptor returned when it is opened.
   * @vol: reference to the corresponding volume description object
   * @mode: open mode (%UBI_READONLY, %UBI_READWRITE, or %UBI_EXCLUSIVE)
   */
@@ -273,7 +293,7 @@ struct ubi_wl_entry;
   * @vtbl_size: size of the volume table in bytes
   * @vtbl: in-RAM volume table copy
   * @volumes_mutex: protects on-flash volume table and serializes volume
- *                 changes, like creation, deletion, update, resize
+ *                 changes, like creation, deletion, update, re-size and re-name
   *
   * @max_ec: current highest erase counter value
   * @mean_ec: current mean erase counter value
@@ -293,6 +313,7 @@ struct ubi_wl_entry;
   *           @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works
   *           fields
   * @move_mutex: serializes eraseblock moves
+ * @work_sem: sycnhronizes the WL worker with use tasks
   * @wl_scheduled: non-zero if the wear-leveling was scheduled
   * @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any
   *             physical eraseblock
@@ -316,11 +337,11 @@ struct ubi_wl_entry;
   * @ro_mode: if the UBI device is in read-only mode
   * @leb_size: logical eraseblock size
   * @leb_start: starting offset of logical eraseblocks within physical
- * eraseblocks
+ *             eraseblocks
   * @ec_hdr_alsize: size of the EC header aligned to @hdrs_min_io_size
   * @vid_hdr_alsize: size of the VID header aligned to @hdrs_min_io_size
   * @vid_hdr_offset: starting offset of the volume identifier header (might be
- * unaligned)
+ *                  unaligned)
   * @vid_hdr_aloffset: starting offset of the VID header aligned to
   * @hdrs_min_io_size
   * @vid_hdr_shift: contains @vid_hdr_offset - @vid_hdr_aloffset
@@ -331,6 +352,8 @@ struct ubi_wl_entry;
   * @peb_buf1: a buffer of PEB size used for different purposes
   * @peb_buf2: another buffer of PEB size used for different purposes
   * @buf_mutex: proptects @peb_buf1 and @peb_buf2
+ * @ckvol_mutex: serializes static volume checking when opening
+ * @mult_mutex: serializes operations on multiple volumes, like re-nameing
   * @dbg_peb_buf: buffer of PEB size used for debugging
   * @dbg_buf_mutex: proptects @dbg_peb_buf
   */
@@ -356,16 +379,16 @@ struct ubi_device {
         struct mutex volumes_mutex;
  
         int max_ec;
-       /* TODO: mean_ec is not updated run-time, fix */
+       /* Note, mean_ec is not updated run-time - should be fixed */
         int mean_ec;
  
-       /* EBA unit's stuff */
+       /* EBA sub-system's stuff */
         unsigned long long global_sqnum;
         spinlock_t ltree_lock;
         struct rb_root ltree;
         struct mutex alc_mutex;
  
-       /* Wear-leveling unit's stuff */
+       /* Wear-leveling sub-system's stuff */
         struct rb_root used;
         struct rb_root free;
         struct rb_root scrub;
@@ -388,7 +411,7 @@ struct ubi_device {
         int thread_enabled;
         char bgt_name[sizeof(UBI_BGT_NAME_PATTERN)+2];
  
-       /* I/O unit's stuff */
+       /* I/O sub-system's stuff */
         long long flash_size;
         int peb_count;
         int peb_size;
@@ -411,6 +434,7 @@ struct ubi_device {
         void *peb_buf2;
         struct mutex buf_mutex;
         struct mutex ckvol_mutex;
+       struct mutex mult_mutex;
  #ifdef CONFIG_MTD_UBI_DEBUG
         void *dbg_peb_buf;
         struct mutex dbg_buf_mutex;
@@ -427,12 +451,15 @@ extern struct mutex ubi_devices_mutex;
  /* vtbl.c */
  int ubi_change_vtbl_record(struct ubi_device *ubi, int idx,
                            struct ubi_vtbl_record *vtbl_rec);
+int ubi_vtbl_rename_volumes(struct ubi_device *ubi,
+                           struct list_head *rename_list);
  int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si);
  
  /* vmt.c */
  int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req);
-int ubi_remove_volume(struct ubi_volume_desc *desc);
+int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl);
  int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs);
+int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list);
  int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol);
  void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol);
  
@@ -447,7 +474,8 @@ int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol,
                              const void __user *buf, int count);
  
  /* misc.c */
-int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, int length);
+int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf,
+                     int length);
  int ubi_check_volume(struct ubi_device *ubi, int vol_id);
  void ubi_calculate_reserved(struct ubi_device *ubi);
  
@@ -477,7 +505,6 @@ int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol,
  int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
                      struct ubi_vid_hdr *vid_hdr);
  int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si);
-void ubi_eba_close(const struct ubi_device *ubi);
  
  /* wl.c */
  int ubi_wl_get_peb(struct ubi_device *ubi, int dtype);
diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c

index ddaa1a56cc692d278f16e1f92cc9a799fcd72b90..8b89cc18ff0b69753326dbe48b87015726cf3cc1 100644 (file)
--- a/drivers/mtd/ubi/upd.c
+++ b/drivers/mtd/ubi/upd.c
@@ -39,7 +39,7 @@
   */
  
  #include <linux/err.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
  #include <asm/div64.h>
  #include "ubi.h"
  
@@ -56,11 +56,11 @@ static int set_update_marker(struct ubi_device *ubi, struct ubi_volume *vol)
         int err;
         struct ubi_vtbl_record vtbl_rec;
  
-       dbg_msg("set update marker for volume %d", vol->vol_id);
+       dbg_gen("set update marker for volume %d", vol->vol_id);
  
         if (vol->upd_marker) {
                 ubi_assert(ubi->vtbl[vol->vol_id].upd_marker);
-               dbg_msg("already set");
+               dbg_gen("already set");
                 return 0;
         }
  
@@ -92,7 +92,7 @@ static int clear_update_marker(struct ubi_device *ubi, struct ubi_volume *vol,
         uint64_t tmp;
         struct ubi_vtbl_record vtbl_rec;
  
-       dbg_msg("clear update marker for volume %d", vol->vol_id);
+       dbg_gen("clear update marker for volume %d", vol->vol_id);
  
         memcpy(&vtbl_rec, &ubi->vtbl[vol->vol_id],
                sizeof(struct ubi_vtbl_record));
@@ -133,7 +133,7 @@ int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol,
         int i, err;
         uint64_t tmp;
  
-       dbg_msg("start update of volume %d, %llu bytes", vol->vol_id, bytes);
+       dbg_gen("start update of volume %d, %llu bytes", vol->vol_id, bytes);
         ubi_assert(!vol->updating && !vol->changing_leb);
         vol->updating = 1;
  
@@ -183,7 +183,7 @@ int ubi_start_leb_change(struct ubi_device *ubi, struct ubi_volume *vol,
  {
         ubi_assert(!vol->updating && !vol->changing_leb);
  
-       dbg_msg("start changing LEB %d:%d, %u bytes",
+       dbg_gen("start changing LEB %d:%d, %u bytes",
                 vol->vol_id, req->lnum, req->bytes);
         if (req->bytes == 0)
                 return ubi_eba_atomic_leb_change(ubi, vol, req->lnum, NULL, 0,
@@ -237,16 +237,17 @@ static int write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum,
         int err;
  
         if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
-               len = ALIGN(len, ubi->min_io_size);
-               memset(buf + len, 0xFF, len - len);
+               int l = ALIGN(len, ubi->min_io_size);
  
-               len = ubi_calc_data_len(ubi, buf, len);
+               memset(buf + len, 0xFF, l - len);
+               len = ubi_calc_data_len(ubi, buf, l);
                 if (len == 0) {
-                       dbg_msg("all %d bytes contain 0xFF - skip", len);
+                       dbg_gen("all %d bytes contain 0xFF - skip", len);
                         return 0;
                 }
  
-               err = ubi_eba_write_leb(ubi, vol, lnum, buf, 0, len, UBI_UNKNOWN);
+               err = ubi_eba_write_leb(ubi, vol, lnum, buf, 0, len,
+                                       UBI_UNKNOWN);
         } else {
                 /*
                  * When writing static volume, and this is the last logical
@@ -267,6 +268,7 @@ static int write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum,
  
  /**
   * ubi_more_update_data - write more update data.
+ * @ubi: UBI device description object
   * @vol: volume description object
   * @buf: write data (user-space memory buffer)
   * @count: how much bytes to write
@@ -283,7 +285,7 @@ int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol,
         uint64_t tmp;
         int lnum, offs, err = 0, len, to_write = count;
  
-       dbg_msg("write %d of %lld bytes, %lld already passed",
+       dbg_gen("write %d of %lld bytes, %lld already passed",
                 count, vol->upd_bytes, vol->upd_received);
  
         if (ubi->ro_mode)
@@ -384,6 +386,7 @@ int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol,
  
  /**
   * ubi_more_leb_change_data - accept more data for atomic LEB change.
+ * @ubi: UBI device description object
   * @vol: volume description object
   * @buf: write data (user-space memory buffer)
   * @count: how much bytes to write
@@ -400,7 +403,7 @@ int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol,
  {
         int err;
  
-       dbg_msg("write %d of %lld bytes, %lld already passed",
+       dbg_gen("write %d of %lld bytes, %lld already passed",
                 count, vol->upd_bytes, vol->upd_received);
  
         if (ubi->ro_mode)
@@ -418,7 +421,8 @@ int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol,
         if (vol->upd_received == vol->upd_bytes) {
                 int len = ALIGN((int)vol->upd_bytes, ubi->min_io_size);
  
-               memset(vol->upd_buf + vol->upd_bytes, 0xFF, len - vol->upd_bytes);
+               memset(vol->upd_buf + vol->upd_bytes, 0xFF,
+                      len - vol->upd_bytes);
                 len = ubi_calc_data_len(ubi, vol->upd_buf, len);
                 err = ubi_eba_atomic_leb_change(ubi, vol, vol->ch_lnum,
                                                 vol->upd_buf, len, UBI_UNKNOWN);
diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c

index 5be58d85c6393c53c2be0f4693ff8a9bd13e40b3..3531ca9a1e24e9ec4721b21eea4a7c77244d7db8 100644 (file)
--- a/drivers/mtd/ubi/vmt.c
+++ b/drivers/mtd/ubi/vmt.c
@@ -28,9 +28,9 @@
  #include "ubi.h"
  
  #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
-static void paranoid_check_volumes(struct ubi_device *ubi);
+static int paranoid_check_volumes(struct ubi_device *ubi);
  #else
-#define paranoid_check_volumes(ubi)
+#define paranoid_check_volumes(ubi) 0
  #endif
  
  static ssize_t vol_attribute_show(struct device *dev,
@@ -127,6 +127,7 @@ static void vol_release(struct device *dev)
  {
         struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev);
  
+       kfree(vol->eba_tbl);
         kfree(vol);
  }
  
@@ -201,7 +202,7 @@ static void volume_sysfs_close(struct ubi_volume *vol)
   */
  int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
  {
-       int i, err, vol_id = req->vol_id, dont_free = 0;
+       int i, err, vol_id = req->vol_id, do_free = 1;
         struct ubi_volume *vol;
         struct ubi_vtbl_record vtbl_rec;
         uint64_t bytes;
@@ -217,7 +218,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
         spin_lock(&ubi->volumes_lock);
         if (vol_id == UBI_VOL_NUM_AUTO) {
                 /* Find unused volume ID */
-               dbg_msg("search for vacant volume ID");
+               dbg_gen("search for vacant volume ID");
                 for (i = 0; i < ubi->vtbl_slots; i++)
                         if (!ubi->volumes[i]) {
                                 vol_id = i;
@@ -232,7 +233,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
                 req->vol_id = vol_id;
         }
  
-       dbg_msg("volume ID %d, %llu bytes, type %d, name %s",
+       dbg_gen("volume ID %d, %llu bytes, type %d, name %s",
                 vol_id, (unsigned long long)req->bytes,
                 (int)req->vol_type, req->name);
  
@@ -252,7 +253,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
                         goto out_unlock;
                 }
  
-        /* Calculate how many eraseblocks are requested */
+       /* Calculate how many eraseblocks are requested */
         vol->usable_leb_size = ubi->leb_size - ubi->leb_size % req->alignment;
         bytes = req->bytes;
         if (do_div(bytes, vol->usable_leb_size))
@@ -274,7 +275,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
         vol->data_pad  = ubi->leb_size % vol->alignment;
         vol->vol_type  = req->vol_type;
         vol->name_len  = req->name_len;
-       memcpy(vol->name, req->name, vol->name_len + 1);
+       memcpy(vol->name, req->name, vol->name_len);
         vol->ubi = ubi;
  
         /*
@@ -349,7 +350,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
                 vtbl_rec.vol_type = UBI_VID_DYNAMIC;
         else
                 vtbl_rec.vol_type = UBI_VID_STATIC;
-       memcpy(vtbl_rec.name, vol->name, vol->name_len + 1);
+       memcpy(vtbl_rec.name, vol->name, vol->name_len);
  
         err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec);
         if (err)
@@ -360,19 +361,19 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
         ubi->vol_count += 1;
         spin_unlock(&ubi->volumes_lock);
  
-       paranoid_check_volumes(ubi);
-       return 0;
+       err = paranoid_check_volumes(ubi);
+       return err;
  
  out_sysfs:
         /*
-        * We have registered our device, we should not free the volume*
+        * We have registered our device, we should not free the volume
          * description object in this function in case of an error - it is
          * freed by the release function.
          *
          * Get device reference to prevent the release function from being
          * called just after sysfs has been closed.
          */
-       dont_free = 1;
+       do_free = 0;
         get_device(&vol->dev);
         volume_sysfs_close(vol);
  out_gluebi:
@@ -382,17 +383,18 @@ out_gluebi:
  out_cdev:
         cdev_del(&vol->cdev);
  out_mapping:
-       kfree(vol->eba_tbl);
+       if (do_free)
+               kfree(vol->eba_tbl);
  out_acc:
         spin_lock(&ubi->volumes_lock);
         ubi->rsvd_pebs -= vol->reserved_pebs;
         ubi->avail_pebs += vol->reserved_pebs;
  out_unlock:
         spin_unlock(&ubi->volumes_lock);
-       if (dont_free)
-               put_device(&vol->dev);
-       else
+       if (do_free)
                 kfree(vol);
+       else
+               put_device(&vol->dev);
         ubi_err("cannot create volume %d, error %d", vol_id, err);
         return err;
  }
@@ -400,19 +402,20 @@ out_unlock:
  /**
   * ubi_remove_volume - remove volume.
   * @desc: volume descriptor
+ * @no_vtbl: do not change volume table if not zero
   *
   * This function removes volume described by @desc. The volume has to be opened
   * in "exclusive" mode. Returns zero in case of success and a negative error
   * code in case of failure. The caller has to have the @ubi->volumes_mutex
   * locked.
   */
-int ubi_remove_volume(struct ubi_volume_desc *desc)
+int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl)
  {
         struct ubi_volume *vol = desc->vol;
         struct ubi_device *ubi = vol->ubi;
         int i, err, vol_id = vol->vol_id, reserved_pebs = vol->reserved_pebs;
  
-       dbg_msg("remove UBI volume %d", vol_id);
+       dbg_gen("remove UBI volume %d", vol_id);
         ubi_assert(desc->mode == UBI_EXCLUSIVE);
         ubi_assert(vol == ubi->volumes[vol_id]);
  
@@ -435,9 +438,11 @@ int ubi_remove_volume(struct ubi_volume_desc *desc)
         if (err)
                 goto out_err;
  
-       err = ubi_change_vtbl_record(ubi, vol_id, NULL);
-       if (err)
-               goto out_err;
+       if (!no_vtbl) {
+               err = ubi_change_vtbl_record(ubi, vol_id, NULL);
+               if (err)
+                       goto out_err;
+       }
  
         for (i = 0; i < vol->reserved_pebs; i++) {
                 err = ubi_eba_unmap_leb(ubi, vol, i);
@@ -445,8 +450,6 @@ int ubi_remove_volume(struct ubi_volume_desc *desc)
                         goto out_err;
         }
  
-       kfree(vol->eba_tbl);
-       vol->eba_tbl = NULL;
         cdev_del(&vol->cdev);
         volume_sysfs_close(vol);
  
@@ -465,8 +468,9 @@ int ubi_remove_volume(struct ubi_volume_desc *desc)
         ubi->vol_count -= 1;
         spin_unlock(&ubi->volumes_lock);
  
-       paranoid_check_volumes(ubi);
-       return 0;
+       if (!no_vtbl)
+               err = paranoid_check_volumes(ubi);
+       return err;
  
  out_err:
         ubi_err("cannot remove volume %d, error %d", vol_id, err);
@@ -497,7 +501,7 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
         if (ubi->ro_mode)
                 return -EROFS;
  
-       dbg_msg("re-size volume %d to from %d to %d PEBs",
+       dbg_gen("re-size volume %d to from %d to %d PEBs",
                 vol_id, vol->reserved_pebs, reserved_pebs);
  
         if (vol->vol_type == UBI_STATIC_VOLUME &&
@@ -586,8 +590,8 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
                         (long long)vol->used_ebs * vol->usable_leb_size;
         }
  
-       paranoid_check_volumes(ubi);
-       return 0;
+       err = paranoid_check_volumes(ubi);
+       return err;
  
  out_acc:
         if (pebs > 0) {
@@ -601,6 +605,44 @@ out_free:
         return err;
  }
  
+/**
+ * ubi_rename_volumes - re-name UBI volumes.
+ * @ubi: UBI device description object
+ * @rename_list: list of &struct ubi_rename_entry objects
+ *
+ * This function re-names or removes volumes specified in the re-name list.
+ * Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list)
+{
+       int err;
+       struct ubi_rename_entry *re;
+
+       err = ubi_vtbl_rename_volumes(ubi, rename_list);
+       if (err)
+               return err;
+
+       list_for_each_entry(re, rename_list, list) {
+               if (re->remove) {
+                       err = ubi_remove_volume(re->desc, 1);
+                       if (err)
+                               break;
+               } else {
+                       struct ubi_volume *vol = re->desc->vol;
+
+                       spin_lock(&ubi->volumes_lock);
+                       vol->name_len = re->new_name_len;
+                       memcpy(vol->name, re->new_name, re->new_name_len + 1);
+                       spin_unlock(&ubi->volumes_lock);
+               }
+       }
+
+       if (!err)
+               err = paranoid_check_volumes(ubi);
+       return err;
+}
+
  /**
   * ubi_add_volume - add volume.
   * @ubi: UBI device description object
@@ -615,8 +657,7 @@ int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol)
         int err, vol_id = vol->vol_id;
         dev_t dev;
  
-       dbg_msg("add volume %d", vol_id);
-       ubi_dbg_dump_vol_info(vol);
+       dbg_gen("add volume %d", vol_id);
  
         /* Register character device for the volume */
         cdev_init(&vol->cdev, &ubi_vol_cdev_operations);
@@ -650,8 +691,8 @@ int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol)
                 return err;
         }
  
-       paranoid_check_volumes(ubi);
-       return 0;
+       err = paranoid_check_volumes(ubi);
+       return err;
  
  out_gluebi:
         err = ubi_destroy_gluebi(vol);
@@ -672,7 +713,7 @@ void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol)
  {
         int err;
  
-       dbg_msg("free volume %d", vol->vol_id);
+       dbg_gen("free volume %d", vol->vol_id);
  
         ubi->volumes[vol->vol_id] = NULL;
         err = ubi_destroy_gluebi(vol);
@@ -686,8 +727,10 @@ void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol)
   * paranoid_check_volume - check volume information.
   * @ubi: UBI device description object
   * @vol_id: volume ID
+ *
+ * Returns zero if volume is all right and a a negative error code if not.
   */
-static void paranoid_check_volume(struct ubi_device *ubi, int vol_id)
+static int paranoid_check_volume(struct ubi_device *ubi, int vol_id)
  {
         int idx = vol_id2idx(ubi, vol_id);
         int reserved_pebs, alignment, data_pad, vol_type, name_len, upd_marker;
@@ -705,16 +748,7 @@ static void paranoid_check_volume(struct ubi_device *ubi, int vol_id)
                         goto fail;
                 }
                 spin_unlock(&ubi->volumes_lock);
-               return;
-       }
-
-       if (vol->exclusive) {
-               /*
-                * The volume may be being created at the moment, do not check
-                * it (e.g., it may be in the middle of ubi_create_volume().
-                */
-               spin_unlock(&ubi->volumes_lock);
-               return;
+               return 0;
         }
  
         if (vol->reserved_pebs < 0 || vol->alignment < 0 || vol->data_pad < 0 ||
@@ -727,7 +761,7 @@ static void paranoid_check_volume(struct ubi_device *ubi, int vol_id)
                 goto fail;
         }
  
-       n = vol->alignment % ubi->min_io_size;
+       n = vol->alignment & (ubi->min_io_size - 1);
         if (vol->alignment != 1 && n) {
                 ubi_err("alignment is not multiple of min I/O unit");
                 goto fail;
@@ -824,31 +858,39 @@ static void paranoid_check_volume(struct ubi_device *ubi, int vol_id)
  
         if (alignment != vol->alignment || data_pad != vol->data_pad ||
             upd_marker != vol->upd_marker || vol_type != vol->vol_type ||
-           name_len!= vol->name_len || strncmp(name, vol->name, name_len)) {
+           name_len != vol->name_len || strncmp(name, vol->name, name_len)) {
                 ubi_err("volume info is different");
                 goto fail;
         }
  
         spin_unlock(&ubi->volumes_lock);
-       return;
+       return 0;
  
  fail:
         ubi_err("paranoid check failed for volume %d", vol_id);
-       ubi_dbg_dump_vol_info(vol);
+       if (vol)
+               ubi_dbg_dump_vol_info(vol);
         ubi_dbg_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id);
         spin_unlock(&ubi->volumes_lock);
-       BUG();
+       return -EINVAL;
  }
  
  /**
   * paranoid_check_volumes - check information about all volumes.
   * @ubi: UBI device description object
+ *
+ * Returns zero if volumes are all right and a a negative error code if not.
   */
-static void paranoid_check_volumes(struct ubi_device *ubi)
+static int paranoid_check_volumes(struct ubi_device *ubi)
  {
-       int i;
+       int i, err = 0;
  
-       for (i = 0; i < ubi->vtbl_slots; i++)
-               paranoid_check_volume(ubi, i);
+       for (i = 0; i < ubi->vtbl_slots; i++) {
+               err = paranoid_check_volume(ubi, i);
+               if (err)
+                       break;
+       }
+
+       return err;
  }
  #endif
diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c

index af36b12be27871ddefd1c34a0d17f95483042343..217d0e111b2a4652212f563cb9baf5255f2bbdca 100644 (file)
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c
@@ -115,8 +115,58 @@ int ubi_change_vtbl_record(struct ubi_device *ubi, int idx,
  }
  
  /**
- * vtbl_check - check if volume table is not corrupted and contains sensible
- *              data.
+ * ubi_vtbl_rename_volumes - rename UBI volumes in the volume table.
+ * @ubi: UBI device description object
+ * @rename_list: list of &struct ubi_rename_entry objects
+ *
+ * This function re-names multiple volumes specified in @req in the volume
+ * table. Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+int ubi_vtbl_rename_volumes(struct ubi_device *ubi,
+                           struct list_head *rename_list)
+{
+       int i, err;
+       struct ubi_rename_entry *re;
+       struct ubi_volume *layout_vol;
+
+       list_for_each_entry(re, rename_list, list) {
+               uint32_t crc;
+               struct ubi_volume *vol = re->desc->vol;
+               struct ubi_vtbl_record *vtbl_rec = &ubi->vtbl[vol->vol_id];
+
+               if (re->remove) {
+                       memcpy(vtbl_rec, &empty_vtbl_record,
+                              sizeof(struct ubi_vtbl_record));
+                       continue;
+               }
+
+               vtbl_rec->name_len = cpu_to_be16(re->new_name_len);
+               memcpy(vtbl_rec->name, re->new_name, re->new_name_len);
+               memset(vtbl_rec->name + re->new_name_len, 0,
+                      UBI_VOL_NAME_MAX + 1 - re->new_name_len);
+               crc = crc32(UBI_CRC32_INIT, vtbl_rec,
+                           UBI_VTBL_RECORD_SIZE_CRC);
+               vtbl_rec->crc = cpu_to_be32(crc);
+       }
+
+       layout_vol = ubi->volumes[vol_id2idx(ubi, UBI_LAYOUT_VOLUME_ID)];
+       for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) {
+               err = ubi_eba_unmap_leb(ubi, layout_vol, i);
+               if (err)
+                       return err;
+
+               err = ubi_eba_write_leb(ubi, layout_vol, i, ubi->vtbl, 0,
+                                       ubi->vtbl_size, UBI_LONGTERM);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+/**
+ * vtbl_check - check if volume table is not corrupted and sensible.
   * @ubi: UBI device description object
   * @vtbl: volume table
   *
@@ -127,7 +177,7 @@ static int vtbl_check(const struct ubi_device *ubi,
                       const struct ubi_vtbl_record *vtbl)
  {
         int i, n, reserved_pebs, alignment, data_pad, vol_type, name_len;
-       int upd_marker;
+       int upd_marker, err;
         uint32_t crc;
         const char *name;
  
@@ -153,7 +203,7 @@ static int vtbl_check(const struct ubi_device *ubi,
                 if (reserved_pebs == 0) {
                         if (memcmp(&vtbl[i], &empty_vtbl_record,
                                                 UBI_VTBL_RECORD_SIZE)) {
-                               dbg_err("bad empty record");
+                               err = 2;
                                 goto bad;
                         }
                         continue;
@@ -161,56 +211,57 @@ static int vtbl_check(const struct ubi_device *ubi,
  
                 if (reserved_pebs < 0 || alignment < 0 || data_pad < 0 ||
                     name_len < 0) {
-                       dbg_err("negative values");
+                       err = 3;
                         goto bad;
                 }
  
                 if (alignment > ubi->leb_size || alignment == 0) {
-                       dbg_err("bad alignment");
+                       err = 4;
                         goto bad;
                 }
  
-               n = alignment % ubi->min_io_size;
+               n = alignment & (ubi->min_io_size - 1);
                 if (alignment != 1 && n) {
-                       dbg_err("alignment is not multiple of min I/O unit");
+                       err = 5;
                         goto bad;
                 }
  
                 n = ubi->leb_size % alignment;
                 if (data_pad != n) {
                         dbg_err("bad data_pad, has to be %d", n);
+                       err = 6;
                         goto bad;
                 }
  
                 if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) {
-                       dbg_err("bad vol_type");
+                       err = 7;
                         goto bad;
                 }
  
                 if (upd_marker != 0 && upd_marker != 1) {
-                       dbg_err("bad upd_marker");
+                       err = 8;
                         goto bad;
                 }
  
                 if (reserved_pebs > ubi->good_peb_count) {
                         dbg_err("too large reserved_pebs, good PEBs %d",
                                 ubi->good_peb_count);
+                       err = 9;
                         goto bad;
                 }
  
                 if (name_len > UBI_VOL_NAME_MAX) {
-                       dbg_err("too long volume name, max %d",
-                               UBI_VOL_NAME_MAX);
+                       err = 10;
                         goto bad;
                 }
  
                 if (name[0] == '\0') {
-                       dbg_err("NULL volume name");
+                       err = 11;
                         goto bad;
                 }
  
                 if (name_len != strnlen(name, name_len + 1)) {
-                       dbg_err("bad name_len");
+                       err = 12;
                         goto bad;
                 }
         }
@@ -235,7 +286,7 @@ static int vtbl_check(const struct ubi_device *ubi,
         return 0;
  
  bad:
-       ubi_err("volume table check failed, record %d", i);
+       ubi_err("volume table check failed: record %d, error %d", i, err);
         ubi_dbg_dump_vtbl_record(&vtbl[i], i);
         return -EINVAL;
  }
@@ -287,7 +338,6 @@ retry:
                              vid_hdr->data_pad = cpu_to_be32(0);
         vid_hdr->lnum = cpu_to_be32(copy);
         vid_hdr->sqnum = cpu_to_be64(++si->max_sqnum);
-       vid_hdr->leb_ver = cpu_to_be32(old_seb ? old_seb->leb_ver + 1: 0);
  
         /* The EC header is already there, write the VID header */
         err = ubi_io_write_vid_hdr(ubi, new_seb->pnum, vid_hdr);
@@ -370,7 +420,7 @@ static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi,
          *    to LEB 0.
          */
  
-       dbg_msg("check layout volume");
+       dbg_gen("check layout volume");
  
         /* Read both LEB 0 and LEB 1 into memory */
         ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) {
@@ -384,7 +434,16 @@ static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi,
                 err = ubi_io_read_data(ubi, leb[seb->lnum], seb->pnum, 0,
                                        ubi->vtbl_size);
                 if (err == UBI_IO_BITFLIPS || err == -EBADMSG)
-                       /* Scrub the PEB later */
+                       /*
+                        * Scrub the PEB later. Note, -EBADMSG indicates an
+                        * uncorrectable ECC error, but we have our own CRC and
+                        * the data will be checked later. If the data is OK,
+                        * the PEB will be scrubbed (because we set
+                        * seb->scrub). If the data is not OK, the contents of
+                        * the PEB will be recovered from the second copy, and
+                        * seb->scrub will be cleared in
+                        * 'ubi_scan_add_used()'.
+                        */
                         seb->scrub = 1;
                 else if (err)
                         goto out_free;
@@ -400,7 +459,8 @@ static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi,
         if (!leb_corrupted[0]) {
                 /* LEB 0 is OK */
                 if (leb[1])
-                       leb_corrupted[1] = memcmp(leb[0], leb[1], ubi->vtbl_size);
+                       leb_corrupted[1] = memcmp(leb[0], leb[1],
+                                                 ubi->vtbl_size);
                 if (leb_corrupted[1]) {
                         ubi_warn("volume table copy #2 is corrupted");
                         err = create_vtbl(ubi, si, 1, leb[0]);
@@ -620,30 +680,32 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si,
  static int check_sv(const struct ubi_volume *vol,
                     const struct ubi_scan_volume *sv)
  {
+       int err;
+
         if (sv->highest_lnum >= vol->reserved_pebs) {
-               dbg_err("bad highest_lnum");
+               err = 1;
                 goto bad;
         }
         if (sv->leb_count > vol->reserved_pebs) {
-               dbg_err("bad leb_count");
+               err = 2;
                 goto bad;
         }
         if (sv->vol_type != vol->vol_type) {
-               dbg_err("bad vol_type");
+               err = 3;
                 goto bad;
         }
         if (sv->used_ebs > vol->reserved_pebs) {
-               dbg_err("bad used_ebs");
+               err = 4;
                 goto bad;
         }
         if (sv->data_pad != vol->data_pad) {
-               dbg_err("bad data_pad");
+               err = 5;
                 goto bad;
         }
         return 0;
  
  bad:
-       ubi_err("bad scanning information");
+       ubi_err("bad scanning information, error %d", err);
         ubi_dbg_dump_sv(sv);
         ubi_dbg_dump_vol_info(vol);
         return -EINVAL;
@@ -672,14 +734,13 @@ static int check_scanning_info(const struct ubi_device *ubi,
                 return -EINVAL;
         }
  
-       if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT&&
+       if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT &&
             si->highest_vol_id < UBI_INTERNAL_VOL_START) {
                 ubi_err("too large volume ID %d found by scanning",
                         si->highest_vol_id);
                 return -EINVAL;
         }
  
-
         for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) {
                 cond_resched();
  
@@ -717,8 +778,7 @@ static int check_scanning_info(const struct ubi_device *ubi,
  }
  
  /**
- * ubi_read_volume_table - read volume table.
- * information.
+ * ubi_read_volume_table - read the volume table.
   * @ubi: UBI device description object
   * @si: scanning information
   *
@@ -797,11 +857,10 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si)
  
  out_free:
         vfree(ubi->vtbl);
-       for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++)
-               if (ubi->volumes[i]) {
-                       kfree(ubi->volumes[i]);
-                       ubi->volumes[i] = NULL;
-               }
+       for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) {
+               kfree(ubi->volumes[i]);
+               ubi->volumes[i] = NULL;
+       }
         return err;
  }
  
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c

index a471a491f0ab0c2840543594966640487e7bacd4..05d70937b54322d8ecd878d05cc69e084ef4a5ed 100644 (file)
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -19,22 +19,22 @@
   */
  
  /*
- * UBI wear-leveling unit.
+ * UBI wear-leveling sub-system.
   *
- * This unit is responsible for wear-leveling. It works in terms of physical
- * eraseblocks and erase counters and knows nothing about logical eraseblocks,
- * volumes, etc. From this unit's perspective all physical eraseblocks are of
- * two types - used and free. Used physical eraseblocks are those that were
- * "get" by the 'ubi_wl_get_peb()' function, and free physical eraseblocks are
- * those that were put by the 'ubi_wl_put_peb()' function.
+ * This sub-system is responsible for wear-leveling. It works in terms of
+ * physical* eraseblocks and erase counters and knows nothing about logical
+ * eraseblocks, volumes, etc. From this sub-system's perspective all physical
+ * eraseblocks are of two types - used and free. Used physical eraseblocks are
+ * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical
+ * eraseblocks are those that were put by the 'ubi_wl_put_peb()' function.
   *
   * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter
- * header. The rest of the physical eraseblock contains only 0xFF bytes.
+ * header. The rest of the physical eraseblock contains only %0xFF bytes.
   *
- * When physical eraseblocks are returned to the WL unit by means of the
+ * When physical eraseblocks are returned to the WL sub-system by means of the
   * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is
   * done asynchronously in context of the per-UBI device background thread,
- * which is also managed by the WL unit.
+ * which is also managed by the WL sub-system.
   *
   * The wear-leveling is ensured by means of moving the contents of used
   * physical eraseblocks with low erase counter to free physical eraseblocks
@@ -43,34 +43,36 @@
   * The 'ubi_wl_get_peb()' function accepts data type hints which help to pick
   * an "optimal" physical eraseblock. For example, when it is known that the
   * physical eraseblock will be "put" soon because it contains short-term data,
- * the WL unit may pick a free physical eraseblock with low erase counter, and
- * so forth.
+ * the WL sub-system may pick a free physical eraseblock with low erase
+ * counter, and so forth.
   *
- * If the WL unit fails to erase a physical eraseblock, it marks it as bad.
+ * If the WL sub-system fails to erase a physical eraseblock, it marks it as
+ * bad.
   *
- * This unit is also responsible for scrubbing. If a bit-flip is detected in a
- * physical eraseblock, it has to be moved. Technically this is the same as
- * moving it for wear-leveling reasons.
+ * This sub-system is also responsible for scrubbing. If a bit-flip is detected
+ * in a physical eraseblock, it has to be moved. Technically this is the same
+ * as moving it for wear-leveling reasons.
   *
- * As it was said, for the UBI unit all physical eraseblocks are either "free"
- * or "used". Free eraseblock are kept in the @wl->free RB-tree, while used
- * eraseblocks are kept in a set of different RB-trees: @wl->used,
+ * As it was said, for the UBI sub-system all physical eraseblocks are either
+ * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while
+ * used eraseblocks are kept in a set of different RB-trees: @wl->used,
   * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub.
   *
   * Note, in this implementation, we keep a small in-RAM object for each physical
   * eraseblock. This is surely not a scalable solution. But it appears to be good
   * enough for moderately large flashes and it is simple. In future, one may
- * re-work this unit and make it more scalable.
+ * re-work this sub-system and make it more scalable.
   *
- * At the moment this unit does not utilize the sequence number, which was
- * introduced relatively recently. But it would be wise to do this because the
- * sequence number of a logical eraseblock characterizes how old is it. For
+ * At the moment this sub-system does not utilize the sequence number, which
+ * was introduced relatively recently. But it would be wise to do this because
+ * the sequence number of a logical eraseblock characterizes how old is it. For
   * example, when we move a PEB with low erase counter, and we need to pick the
   * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we
   * pick target PEB with an average EC if our PEB is not very "old". This is a
- * room for future re-works of the WL unit.
+ * room for future re-works of the WL sub-system.
   *
- * FIXME: looks too complex, should be simplified (later).
+ * Note: the stuff with protection trees looks too complex and is difficult to
+ * understand. Should be fixed.
   */
  
  #include <linux/slab.h>
@@ -92,20 +94,21 @@
  
  /*
   * Maximum difference between two erase counters. If this threshold is
- * exceeded, the WL unit starts moving data from used physical eraseblocks with
- * low erase counter to free physical eraseblocks with high erase counter.
+ * exceeded, the WL sub-system starts moving data from used physical
+ * eraseblocks with low erase counter to free physical eraseblocks with high
+ * erase counter.
   */
  #define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD
  
  /*
- * When a physical eraseblock is moved, the WL unit has to pick the target
+ * When a physical eraseblock is moved, the WL sub-system has to pick the target
   * physical eraseblock to move to. The simplest way would be just to pick the
   * one with the highest erase counter. But in certain workloads this could lead
   * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a
   * situation when the picked physical eraseblock is constantly erased after the
   * data is written to it. So, we have a constant which limits the highest erase
- * counter of the free physical eraseblock to pick. Namely, the WL unit does
- * not pick eraseblocks with erase counter greater then the lowest erase
+ * counter of the free physical eraseblock to pick. Namely, the WL sub-system
+ * does not pick eraseblocks with erase counter greater then the lowest erase
   * counter plus %WL_FREE_MAX_DIFF.
   */
  #define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD)
@@ -123,11 +126,11 @@
   * @abs_ec: the absolute erase counter value when the protection ends
   * @e: the wear-leveling entry of the physical eraseblock under protection
   *
- * When the WL unit returns a physical eraseblock, the physical eraseblock is
- * protected from being moved for some "time". For this reason, the physical
- * eraseblock is not directly moved from the @wl->free tree to the @wl->used
- * tree. There is one more tree in between where this physical eraseblock is
- * temporarily stored (@wl->prot).
+ * When the WL sub-system returns a physical eraseblock, the physical
+ * eraseblock is protected from being moved for some "time". For this reason,
+ * the physical eraseblock is not directly moved from the @wl->free tree to the
+ * @wl->used tree. There is one more tree in between where this physical
+ * eraseblock is temporarily stored (@wl->prot).
   *
   * All this protection stuff is needed because:
   *  o we don't want to move physical eraseblocks just after we have given them
@@ -175,7 +178,6 @@ struct ubi_wl_prot_entry {
   * @list: a link in the list of pending works
   * @func: worker function
   * @priv: private data of the worker function
- *
   * @e: physical eraseblock to erase
   * @torture: if the physical eraseblock has to be tortured
   *
@@ -473,52 +475,47 @@ retry:
         }
  
         switch (dtype) {
-               case UBI_LONGTERM:
-                       /*
-                        * For long term data we pick a physical eraseblock
-                        * with high erase counter. But the highest erase
-                        * counter we can pick is bounded by the the lowest
-                        * erase counter plus %WL_FREE_MAX_DIFF.
-                        */
-                       e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
-                       protect = LT_PROTECTION;
-                       break;
-               case UBI_UNKNOWN:
-                       /*
-                        * For unknown data we pick a physical eraseblock with
-                        * medium erase counter. But we by no means can pick a
-                        * physical eraseblock with erase counter greater or
-                        * equivalent than the lowest erase counter plus
-                        * %WL_FREE_MAX_DIFF.
-                        */
-                       first = rb_entry(rb_first(&ubi->free),
-                                        struct ubi_wl_entry, rb);
-                       last = rb_entry(rb_last(&ubi->free),
-                                       struct ubi_wl_entry, rb);
+       case UBI_LONGTERM:
+               /*
+                * For long term data we pick a physical eraseblock with high
+                * erase counter. But the highest erase counter we can pick is
+                * bounded by the the lowest erase counter plus
+                * %WL_FREE_MAX_DIFF.
+                */
+               e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
+               protect = LT_PROTECTION;
+               break;
+       case UBI_UNKNOWN:
+               /*
+                * For unknown data we pick a physical eraseblock with medium
+                * erase counter. But we by no means can pick a physical
+                * eraseblock with erase counter greater or equivalent than the
+                * lowest erase counter plus %WL_FREE_MAX_DIFF.
+                */
+               first = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb);
+               last = rb_entry(rb_last(&ubi->free), struct ubi_wl_entry, rb);
  
-                       if (last->ec - first->ec < WL_FREE_MAX_DIFF)
-                               e = rb_entry(ubi->free.rb_node,
-                                               struct ubi_wl_entry, rb);
-                       else {
-                               medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2;
-                               e = find_wl_entry(&ubi->free, medium_ec);
-                       }
-                       protect = U_PROTECTION;
-                       break;
-               case UBI_SHORTTERM:
-                       /*
-                        * For short term data we pick a physical eraseblock
-                        * with the lowest erase counter as we expect it will
-                        * be erased soon.
-                        */
-                       e = rb_entry(rb_first(&ubi->free),
-                                    struct ubi_wl_entry, rb);
-                       protect = ST_PROTECTION;
-                       break;
-               default:
-                       protect = 0;
-                       e = NULL;
-                       BUG();
+               if (last->ec - first->ec < WL_FREE_MAX_DIFF)
+                       e = rb_entry(ubi->free.rb_node,
+                                       struct ubi_wl_entry, rb);
+               else {
+                       medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2;
+                       e = find_wl_entry(&ubi->free, medium_ec);
+               }
+               protect = U_PROTECTION;
+               break;
+       case UBI_SHORTTERM:
+               /*
+                * For short term data we pick a physical eraseblock with the
+                * lowest erase counter as we expect it will be erased soon.
+                */
+               e = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb);
+               protect = ST_PROTECTION;
+               break;
+       default:
+               protect = 0;
+               e = NULL;
+               BUG();
         }
  
         /*
@@ -582,7 +579,8 @@ found:
   * This function returns zero in case of success and a negative error code in
   * case of failure.
   */
-static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int torture)
+static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
+                     int torture)
  {
         int err;
         struct ubi_ec_hdr *ec_hdr;
@@ -634,8 +632,7 @@ out_free:
  }
  
  /**
- * check_protection_over - check if it is time to stop protecting some
- * physical eraseblocks.
+ * check_protection_over - check if it is time to stop protecting some PEBs.
   * @ubi: UBI device description object
   *
   * This function is called after each erase operation, when the absolute erase
@@ -871,6 +868,10 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
         }
  
         ubi_free_vid_hdr(ubi, vid_hdr);
+       if (scrubbing && !protect)
+               ubi_msg("scrubbed PEB %d, data moved to PEB %d",
+                       e1->pnum, e2->pnum);
+
         spin_lock(&ubi->wl_lock);
         if (protect)
                 prot_tree_add(ubi, e1, pe, protect);
@@ -1054,8 +1055,8 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
                 spin_unlock(&ubi->wl_lock);
  
                 /*
-                * One more erase operation has happened, take care about protected
-                * physical eraseblocks.
+                * One more erase operation has happened, take care about
+                * protected physical eraseblocks.
                  */
                 check_protection_over(ubi);
  
@@ -1136,7 +1137,7 @@ out_ro:
  }
  
  /**
- * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling unit.
+ * ubi_wl_put_peb - return a PEB to the wear-leveling sub-system.
   * @ubi: UBI device description object
   * @pnum: physical eraseblock to return
   * @torture: if this physical eraseblock has to be tortured
@@ -1175,11 +1176,11 @@ retry:
                 /*
                  * User is putting the physical eraseblock which was selected
                  * as the target the data is moved to. It may happen if the EBA
-                * unit already re-mapped the LEB in 'ubi_eba_copy_leb()' but
-                * the WL unit has not put the PEB to the "used" tree yet, but
-                * it is about to do this. So we just set a flag which will
-                * tell the WL worker that the PEB is not needed anymore and
-                * should be scheduled for erasure.
+                * sub-system already re-mapped the LEB in 'ubi_eba_copy_leb()'
+                * but the WL sub-system has not put the PEB to the "used" tree
+                * yet, but it is about to do this. So we just set a flag which
+                * will tell the WL worker that the PEB is not needed anymore
+                * and should be scheduled for erasure.
                  */
                 dbg_wl("PEB %d is the target of data moving", pnum);
                 ubi_assert(!ubi->move_to_put);
@@ -1229,7 +1230,7 @@ int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum)
  {
         struct ubi_wl_entry *e;
  
-       ubi_msg("schedule PEB %d for scrubbing", pnum);
+       dbg_msg("schedule PEB %d for scrubbing", pnum);
  
  retry:
         spin_lock(&ubi->wl_lock);
@@ -1368,7 +1369,7 @@ int ubi_thread(void *u)
                 int err;
  
                 if (kthread_should_stop())
-                       goto out;
+                       break;
  
                 if (try_to_freeze())
                         continue;
@@ -1403,7 +1404,6 @@ int ubi_thread(void *u)
                 cond_resched();
         }
  
-out:
         dbg_wl("background thread \"%s\" is killed", ubi->bgt_name);
         return 0;
  }
@@ -1426,8 +1426,7 @@ static void cancel_pending(struct ubi_device *ubi)
  }
  
  /**
- * ubi_wl_init_scan - initialize the wear-leveling unit using scanning
- * information.
+ * ubi_wl_init_scan - initialize the WL sub-system using scanning information.
   * @ubi: UBI device description object
   * @si: scanning information
   *
@@ -1584,13 +1583,12 @@ static void protection_trees_destroy(struct ubi_device *ubi)
  }
  
  /**
- * ubi_wl_close - close the wear-leveling unit.
+ * ubi_wl_close - close the wear-leveling sub-system.
   * @ubi: UBI device description object
   */
  void ubi_wl_close(struct ubi_device *ubi)
  {
-       dbg_wl("close the UBI wear-leveling unit");
-
+       dbg_wl("close the WL sub-system");
         cancel_pending(ubi);
         protection_trees_destroy(ubi);
         tree_destroy(&ubi->used);
@@ -1602,8 +1600,7 @@ void ubi_wl_close(struct ubi_device *ubi)
  #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
  
  /**
- * paranoid_check_ec - make sure that the erase counter of a physical eraseblock
- * is correct.
+ * paranoid_check_ec - make sure that the erase counter of a PEB is correct.
   * @ubi: UBI device description object
   * @pnum: the physical eraseblock number to check
   * @ec: the erase counter to check
@@ -1644,13 +1641,12 @@ out_free:
  }
  
  /**
- * paranoid_check_in_wl_tree - make sure that a wear-leveling entry is present
- * in a WL RB-tree.
+ * paranoid_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree.
   * @e: the wear-leveling entry to check
   * @root: the root of the tree
   *
- * This function returns zero if @e is in the @root RB-tree and %1 if it
- * is not.
+ * This function returns zero if @e is in the @root RB-tree and %1 if it is
+ * not.
   */
  static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
                                      struct rb_root *root)
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c

index 00527805e4f170a8a1bdb6888aa2a30829210d6b..e5a6e2e84540f841a91f98c171cce122045724ea 100644 (file)
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -33,6 +33,7 @@
  */
  
  #include <linux/module.h>
+#include <linux/moduleparam.h>
  #include <linux/types.h>
  #include <linux/errno.h>
  #include <linux/ioport.h>
@@ -52,7 +53,9 @@
  #include <asm/hvcall.h>
  #include <asm/atomic.h>
  #include <asm/vio.h>
+#include <asm/iommu.h>
  #include <asm/uaccess.h>
+#include <asm/firmware.h>
  #include <linux/seq_file.h>
  
  #include "ibmveth.h"
@@ -94,8 +97,10 @@ static void ibmveth_proc_register_adapter(struct ibmveth_adapter *adapter);
  static void ibmveth_proc_unregister_adapter(struct ibmveth_adapter *adapter);
  static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance);
  static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter);
+static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev);
  static struct kobj_type ktype_veth_pool;
  
+
  #ifdef CONFIG_PROC_FS
  #define IBMVETH_PROC_DIR "ibmveth"
  static struct proc_dir_entry *ibmveth_proc_dir;
@@ -226,16 +231,16 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc
         u32 i;
         u32 count = pool->size - atomic_read(&pool->available);
         u32 buffers_added = 0;
+       struct sk_buff *skb;
+       unsigned int free_index, index;
+       u64 correlator;
+       unsigned long lpar_rc;
+       dma_addr_t dma_addr;
  
         mb();
  
         for(i = 0; i < count; ++i) {
-               struct sk_buff *skb;
-               unsigned int free_index, index;
-               u64 correlator;
                 union ibmveth_buf_desc desc;
-               unsigned long lpar_rc;
-               dma_addr_t dma_addr;
  
                 skb = alloc_skb(pool->buff_size, GFP_ATOMIC);
  
@@ -255,6 +260,9 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc
                 dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
                                 pool->buff_size, DMA_FROM_DEVICE);
  
+               if (dma_mapping_error(dma_addr))
+                       goto failure;
+
                 pool->free_map[free_index] = IBM_VETH_INVALID_MAP;
                 pool->dma_addr[index] = dma_addr;
                 pool->skbuff[index] = skb;
@@ -267,25 +275,32 @@ static void ibmveth_replenish_buffer_pool(struct ibmveth_adapter *adapter, struc
  
                 lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, desc.desc);
  
-               if(lpar_rc != H_SUCCESS) {
-                       pool->free_map[free_index] = index;
-                       pool->skbuff[index] = NULL;
-                       if (pool->consumer_index == 0)
-                               pool->consumer_index = pool->size - 1;
-                       else
-                               pool->consumer_index--;
-                       dma_unmap_single(&adapter->vdev->dev,
-                                       pool->dma_addr[index], pool->buff_size,
-                                       DMA_FROM_DEVICE);
-                       dev_kfree_skb_any(skb);
-                       adapter->replenish_add_buff_failure++;
-                       break;
-               } else {
+               if (lpar_rc != H_SUCCESS)
+                       goto failure;
+               else {
                         buffers_added++;
                         adapter->replenish_add_buff_success++;
                 }
         }
  
+       mb();
+       atomic_add(buffers_added, &(pool->available));
+       return;
+
+failure:
+       pool->free_map[free_index] = index;
+       pool->skbuff[index] = NULL;
+       if (pool->consumer_index == 0)
+               pool->consumer_index = pool->size - 1;
+       else
+               pool->consumer_index--;
+       if (!dma_mapping_error(dma_addr))
+               dma_unmap_single(&adapter->vdev->dev,
+                                pool->dma_addr[index], pool->buff_size,
+                                DMA_FROM_DEVICE);
+       dev_kfree_skb_any(skb);
+       adapter->replenish_add_buff_failure++;
+
         mb();
         atomic_add(buffers_added, &(pool->available));
  }
@@ -297,7 +312,7 @@ static void ibmveth_replenish_task(struct ibmveth_adapter *adapter)
  
         adapter->replenish_task_cycles++;
  
-       for(i = 0; i < IbmVethNumBufferPools; i++)
+       for (i = (IbmVethNumBufferPools - 1); i >= 0; i--)
                 if(adapter->rx_buff_pool[i].active)
                         ibmveth_replenish_buffer_pool(adapter,
                                                      &adapter->rx_buff_pool[i]);
@@ -472,6 +487,18 @@ static void ibmveth_cleanup(struct ibmveth_adapter *adapter)
                 if (adapter->rx_buff_pool[i].active)
                         ibmveth_free_buffer_pool(adapter,
                                                  &adapter->rx_buff_pool[i]);
+
+       if (adapter->bounce_buffer != NULL) {
+               if (!dma_mapping_error(adapter->bounce_buffer_dma)) {
+                       dma_unmap_single(&adapter->vdev->dev,
+                                       adapter->bounce_buffer_dma,
+                                       adapter->netdev->mtu + IBMVETH_BUFF_OH,
+                                       DMA_BIDIRECTIONAL);
+                       adapter->bounce_buffer_dma = DMA_ERROR_CODE;
+               }
+               kfree(adapter->bounce_buffer);
+               adapter->bounce_buffer = NULL;
+       }
  }
  
  static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter,
@@ -607,6 +634,24 @@ static int ibmveth_open(struct net_device *netdev)
                 return rc;
         }
  
+       adapter->bounce_buffer =
+           kmalloc(netdev->mtu + IBMVETH_BUFF_OH, GFP_KERNEL);
+       if (!adapter->bounce_buffer) {
+               ibmveth_error_printk("unable to allocate bounce buffer\n");
+               ibmveth_cleanup(adapter);
+               napi_disable(&adapter->napi);
+               return -ENOMEM;
+       }
+       adapter->bounce_buffer_dma =
+           dma_map_single(&adapter->vdev->dev, adapter->bounce_buffer,
+                          netdev->mtu + IBMVETH_BUFF_OH, DMA_BIDIRECTIONAL);
+       if (dma_mapping_error(adapter->bounce_buffer_dma)) {
+               ibmveth_error_printk("unable to map bounce buffer\n");
+               ibmveth_cleanup(adapter);
+               napi_disable(&adapter->napi);
+               return -ENOMEM;
+       }
+
         ibmveth_debug_printk("initial replenish cycle\n");
         ibmveth_interrupt(netdev->irq, netdev);
  
@@ -853,10 +898,12 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev)
         unsigned int tx_packets = 0;
         unsigned int tx_send_failed = 0;
         unsigned int tx_map_failed = 0;
+       int used_bounce = 0;
+       unsigned long data_dma_addr;
  
         desc.fields.flags_len = IBMVETH_BUF_VALID | skb->len;
-       desc.fields.address = dma_map_single(&adapter->vdev->dev, skb->data,
-                                            skb->len, DMA_TO_DEVICE);
+       data_dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
+                                      skb->len, DMA_TO_DEVICE);
  
         if (skb->ip_summed == CHECKSUM_PARTIAL &&
             ip_hdr(skb)->protocol != IPPROTO_TCP && skb_checksum_help(skb)) {
@@ -875,12 +922,16 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev)
                 buf[1] = 0;
         }
  
-       if (dma_mapping_error(desc.fields.address)) {
-               ibmveth_error_printk("tx: unable to map xmit buffer\n");
+       if (dma_mapping_error(data_dma_addr)) {
+               if (!firmware_has_feature(FW_FEATURE_CMO))
+                       ibmveth_error_printk("tx: unable to map xmit buffer\n");
+               skb_copy_from_linear_data(skb, adapter->bounce_buffer,
+                                         skb->len);
+               desc.fields.address = adapter->bounce_buffer_dma;
                 tx_map_failed++;
-               tx_dropped++;
-               goto out;
-       }
+               used_bounce = 1;
+       } else
+               desc.fields.address = data_dma_addr;
  
         /* send the frame. Arbitrarily set retrycount to 1024 */
         correlator = 0;
@@ -904,8 +955,9 @@ static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev)
                 netdev->trans_start = jiffies;
         }
  
-       dma_unmap_single(&adapter->vdev->dev, desc.fields.address,
-                        skb->len, DMA_TO_DEVICE);
+       if (!used_bounce)
+               dma_unmap_single(&adapter->vdev->dev, data_dma_addr,
+                                skb->len, DMA_TO_DEVICE);
  
  out:   spin_lock_irqsave(&adapter->stats_lock, flags);
         netdev->stats.tx_dropped += tx_dropped;
@@ -1053,9 +1105,9 @@ static void ibmveth_set_multicast_list(struct net_device *netdev)
  static int ibmveth_change_mtu(struct net_device *dev, int new_mtu)
  {
         struct ibmveth_adapter *adapter = dev->priv;
+       struct vio_dev *viodev = adapter->vdev;
         int new_mtu_oh = new_mtu + IBMVETH_BUFF_OH;
-       int reinit = 0;
-       int i, rc;
+       int i;
  
         if (new_mtu < IBMVETH_MAX_MTU)
                 return -EINVAL;
@@ -1067,23 +1119,34 @@ static int ibmveth_change_mtu(struct net_device *dev, int new_mtu)
         if (i == IbmVethNumBufferPools)
                 return -EINVAL;
  
+       /* Deactivate all the buffer pools so that the next loop can activate
+          only the buffer pools necessary to hold the new MTU */
+       for (i = 0; i < IbmVethNumBufferPools; i++)
+               if (adapter->rx_buff_pool[i].active) {
+                       ibmveth_free_buffer_pool(adapter,
+                                                &adapter->rx_buff_pool[i]);
+                       adapter->rx_buff_pool[i].active = 0;
+               }
+
         /* Look for an active buffer pool that can hold the new MTU */
         for(i = 0; i<IbmVethNumBufferPools; i++) {
-               if (!adapter->rx_buff_pool[i].active) {
-                       adapter->rx_buff_pool[i].active = 1;
-                       reinit = 1;
-               }
+               adapter->rx_buff_pool[i].active = 1;
  
                 if (new_mtu_oh < adapter->rx_buff_pool[i].buff_size) {
-                       if (reinit && netif_running(adapter->netdev)) {
+                       if (netif_running(adapter->netdev)) {
                                 adapter->pool_config = 1;
                                 ibmveth_close(adapter->netdev);
                                 adapter->pool_config = 0;
                                 dev->mtu = new_mtu;
-                               if ((rc = ibmveth_open(adapter->netdev)))
-                                       return rc;
-                       } else
-                               dev->mtu = new_mtu;
+                               vio_cmo_set_dev_desired(viodev,
+                                               ibmveth_get_desired_dma
+                                               (viodev));
+                               return ibmveth_open(adapter->netdev);
+                       }
+                       dev->mtu = new_mtu;
+                       vio_cmo_set_dev_desired(viodev,
+                                               ibmveth_get_desired_dma
+                                               (viodev));
                         return 0;
                 }
         }
@@ -1098,6 +1161,46 @@ static void ibmveth_poll_controller(struct net_device *dev)
  }
  #endif
  
+/**
+ * ibmveth_get_desired_dma - Calculate IO memory desired by the driver
+ *
+ * @vdev: struct vio_dev for the device whose desired IO mem is to be returned
+ *
+ * Return value:
+ *     Number of bytes of IO data the driver will need to perform well.
+ */
+static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev)
+{
+       struct net_device *netdev = dev_get_drvdata(&vdev->dev);
+       struct ibmveth_adapter *adapter;
+       unsigned long ret;
+       int i;
+       int rxqentries = 1;
+
+       /* netdev inits at probe time along with the structures we need below*/
+       if (netdev == NULL)
+               return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT);
+
+       adapter = netdev_priv(netdev);
+
+       ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE;
+       ret += IOMMU_PAGE_ALIGN(netdev->mtu);
+
+       for (i = 0; i < IbmVethNumBufferPools; i++) {
+               /* add the size of the active receive buffers */
+               if (adapter->rx_buff_pool[i].active)
+                       ret +=
+                           adapter->rx_buff_pool[i].size *
+                           IOMMU_PAGE_ALIGN(adapter->rx_buff_pool[i].
+                                   buff_size);
+               rxqentries += adapter->rx_buff_pool[i].size;
+       }
+       /* add the size of the receive queue entries */
+       ret += IOMMU_PAGE_ALIGN(rxqentries * sizeof(struct ibmveth_rx_q_entry));
+
+       return ret;
+}
+
  static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
  {
         int rc, i;
@@ -1242,6 +1345,8 @@ static int __devexit ibmveth_remove(struct vio_dev *dev)
         ibmveth_proc_unregister_adapter(adapter);
  
         free_netdev(netdev);
+       dev_set_drvdata(&dev->dev, NULL);
+
         return 0;
  }
  
@@ -1402,14 +1507,15 @@ const char * buf, size_t count)
                                 return -EPERM;
                         }
  
-                       pool->active = 0;
                         if (netif_running(netdev)) {
                                 adapter->pool_config = 1;
                                 ibmveth_close(netdev);
+                               pool->active = 0;
                                 adapter->pool_config = 0;
                                 if ((rc = ibmveth_open(netdev)))
                                         return rc;
                         }
+                       pool->active = 0;
                 }
         } else if (attr == &veth_num_attr) {
                 if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT)
@@ -1485,6 +1591,7 @@ static struct vio_driver ibmveth_driver = {
         .id_table       = ibmveth_device_table,
         .probe          = ibmveth_probe,
         .remove         = ibmveth_remove,
+       .get_desired_dma = ibmveth_get_desired_dma,
         .driver         = {
                 .name   = ibmveth_driver_name,
                 .owner  = THIS_MODULE,
diff --git a/drivers/net/ibmveth.h b/drivers/net/ibmveth.h

index 41f61cd18852b26cd7f6bfc36808588fc2b230e6..d281869487526371e28b1eb2254a5ff0d91f2a2b 100644 (file)
--- a/drivers/net/ibmveth.h
+++ b/drivers/net/ibmveth.h
@@ -93,9 +93,12 @@ static inline long h_illan_attributes(unsigned long unit_address,
    plpar_hcall_norets(H_CHANGE_LOGICAL_LAN_MAC, ua, mac)
  
  #define IbmVethNumBufferPools 5
+#define IBMVETH_IO_ENTITLEMENT_DEFAULT 4243456 /* MTU of 1500 needs 4.2Mb */
  #define IBMVETH_BUFF_OH 22 /* Overhead: 14 ethernet header + 8 opaque handle */
  #define IBMVETH_MAX_MTU 68
  #define IBMVETH_MAX_POOL_COUNT 4096
+#define IBMVETH_BUFF_LIST_SIZE 4096
+#define IBMVETH_FILT_LIST_SIZE 4096
  #define IBMVETH_MAX_BUF_SIZE (1024 * 128)
  
  static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 };
@@ -143,6 +146,8 @@ struct ibmveth_adapter {
      struct ibmveth_rx_q rx_queue;
      int pool_config;
      int rx_csum;
+    void *bounce_buffer;
+    dma_addr_t bounce_buffer_dma;
  
      /* adapter specific stats */
      u64 replenish_task_cycles;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c

index c28d7cb2035b1dbcb5425aaeff55294d3f4eb786..0196a0df90210995ec6a5abe228eb23bb22d494c 100644 (file)
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -19,6 +19,7 @@
  //#define DEBUG
  #include <linux/netdevice.h>
  #include <linux/etherdevice.h>
+#include <linux/ethtool.h>
  #include <linux/module.h>
  #include <linux/virtio.h>
  #include <linux/virtio_net.h>
@@ -54,9 +55,15 @@ struct virtnet_info
         struct tasklet_struct tasklet;
         bool free_in_tasklet;
  
+       /* I like... big packets and I cannot lie! */
+       bool big_packets;
+
         /* Receive & send queues. */
         struct sk_buff_head recv;
         struct sk_buff_head send;
+
+       /* Chain pages by the private ptr. */
+       struct page *pages;
  };
  
  static inline struct virtio_net_hdr *skb_vnet_hdr(struct sk_buff *skb)
@@ -69,6 +76,23 @@ static inline void vnet_hdr_to_sg(struct scatterlist *sg, struct sk_buff *skb)
         sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr));
  }
  
+static void give_a_page(struct virtnet_info *vi, struct page *page)
+{
+       page->private = (unsigned long)vi->pages;
+       vi->pages = page;
+}
+
+static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask)
+{
+       struct page *p = vi->pages;
+
+       if (p)
+               vi->pages = (struct page *)p->private;
+       else
+               p = alloc_page(gfp_mask);
+       return p;
+}
+
  static void skb_xmit_done(struct virtqueue *svq)
  {
         struct virtnet_info *vi = svq->vdev->priv;
@@ -88,6 +112,7 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb,
                         unsigned len)
  {
         struct virtio_net_hdr *hdr = skb_vnet_hdr(skb);
+       int err;
  
         if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
                 pr_debug("%s: short packet %i\n", dev->name, len);
@@ -95,10 +120,23 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb,
                 goto drop;
         }
         len -= sizeof(struct virtio_net_hdr);
-       BUG_ON(len > MAX_PACKET_LEN);
  
-       skb_trim(skb, len);
+       if (len <= MAX_PACKET_LEN) {
+               unsigned int i;
  
+               for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+                       give_a_page(dev->priv, skb_shinfo(skb)->frags[i].page);
+               skb->data_len = 0;
+               skb_shinfo(skb)->nr_frags = 0;
+       }
+
+       err = pskb_trim(skb, len);
+       if (err) {
+               pr_debug("%s: pskb_trim failed %i %d\n", dev->name, len, err);
+               dev->stats.rx_dropped++;
+               goto drop;
+       }
+       skb->truesize += skb->data_len;
         dev->stats.rx_bytes += skb->len;
         dev->stats.rx_packets++;
  
@@ -160,7 +198,7 @@ static void try_fill_recv(struct virtnet_info *vi)
  {
         struct sk_buff *skb;
         struct scatterlist sg[2+MAX_SKB_FRAGS];
-       int num, err;
+       int num, err, i;
  
         sg_init_table(sg, 2+MAX_SKB_FRAGS);
         for (;;) {
@@ -170,6 +208,24 @@ static void try_fill_recv(struct virtnet_info *vi)
  
                 skb_put(skb, MAX_PACKET_LEN);
                 vnet_hdr_to_sg(sg, skb);
+
+               if (vi->big_packets) {
+                       for (i = 0; i < MAX_SKB_FRAGS; i++) {
+                               skb_frag_t *f = &skb_shinfo(skb)->frags[i];
+                               f->page = get_a_page(vi, GFP_ATOMIC);
+                               if (!f->page)
+                                       break;
+
+                               f->page_offset = 0;
+                               f->size = PAGE_SIZE;
+
+                               skb->data_len += PAGE_SIZE;
+                               skb->len += PAGE_SIZE;
+
+                               skb_shinfo(skb)->nr_frags++;
+                       }
+               }
+
                 num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
                 skb_queue_head(&vi->recv, skb);
  
@@ -335,16 +391,11 @@ again:
         free_old_xmit_skbs(vi);
  
         /* If we has a buffer left over from last time, send it now. */
-       if (unlikely(vi->last_xmit_skb)) {
-               if (xmit_skb(vi, vi->last_xmit_skb) != 0) {
-                       /* Drop this skb: we only queue one. */
-                       vi->dev->stats.tx_dropped++;
-                       kfree_skb(skb);
-                       skb = NULL;
-                       goto stop_queue;
-               }
-               vi->last_xmit_skb = NULL;
-       }
+       if (unlikely(vi->last_xmit_skb) &&
+           xmit_skb(vi, vi->last_xmit_skb) != 0)
+               goto stop_queue;
+
+       vi->last_xmit_skb = NULL;
  
         /* Put new one in send queue and do transmit */
         if (likely(skb)) {
@@ -370,6 +421,11 @@ stop_queue:
                 netif_start_queue(dev);
                 goto again;
         }
+       if (skb) {
+               /* Drop this skb: we only queue one. */
+               vi->dev->stats.tx_dropped++;
+               kfree_skb(skb);
+       }
         goto done;
  }
  
@@ -408,6 +464,22 @@ static int virtnet_close(struct net_device *dev)
         return 0;
  }
  
+static int virtnet_set_tx_csum(struct net_device *dev, u32 data)
+{
+       struct virtnet_info *vi = netdev_priv(dev);
+       struct virtio_device *vdev = vi->vdev;
+
+       if (data && !virtio_has_feature(vdev, VIRTIO_NET_F_CSUM))
+               return -ENOSYS;
+
+       return ethtool_op_set_tx_hw_csum(dev, data);
+}
+
+static struct ethtool_ops virtnet_ethtool_ops = {
+       .set_tx_csum = virtnet_set_tx_csum,
+       .set_sg = ethtool_op_set_sg,
+};
+
  static int virtnet_probe(struct virtio_device *vdev)
  {
         int err;
@@ -427,6 +499,7 @@ static int virtnet_probe(struct virtio_device *vdev)
  #ifdef CONFIG_NET_POLL_CONTROLLER
         dev->poll_controller = virtnet_netpoll;
  #endif
+       SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops);
         SET_NETDEV_DEV(dev, &vdev->dev);
  
         /* Do we support "hardware" checksums? */
@@ -462,11 +535,18 @@ static int virtnet_probe(struct virtio_device *vdev)
         vi->dev = dev;
         vi->vdev = vdev;
         vdev->priv = vi;
+       vi->pages = NULL;
  
         /* If they give us a callback when all buffers are done, we don't need
          * the timer. */
         vi->free_in_tasklet = virtio_has_feature(vdev,VIRTIO_F_NOTIFY_ON_EMPTY);
  
+       /* If we can receive ANY GSO packets, we must allocate large ones. */
+       if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4)
+           || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)
+           || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN))
+               vi->big_packets = true;
+
         /* We expect two virtqueues, receive then send. */
         vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done);
         if (IS_ERR(vi->rvq)) {
@@ -541,6 +621,10 @@ static void virtnet_remove(struct virtio_device *vdev)
         vdev->config->del_vq(vi->svq);
         vdev->config->del_vq(vi->rvq);
         unregister_netdev(vi->dev);
+
+       while (vi->pages)
+               __free_pages(get_a_page(vi, GFP_KERNEL), 0);
+
         free_netdev(vi->dev);
  }
  
@@ -553,7 +637,9 @@ static unsigned int features[] = {
         VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM,
         VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
         VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
-       VIRTIO_NET_F_HOST_ECN, VIRTIO_F_NOTIFY_ON_EMPTY,
+       VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
+       VIRTIO_NET_F_GUEST_ECN, /* We don't yet handle UFO input. */
+       VIRTIO_F_NOTIFY_ON_EMPTY,
  };
  
  static struct virtio_driver virtio_net = {
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig

index 3a7a11a75fb464017ce284d34a41388f2875dd15..1d7ec3129349b2cefeee64afd68fb92ae85a17e5 100644 (file)
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -4,7 +4,7 @@ config OF_DEVICE
  
  config OF_GPIO
         def_bool y
-       depends on OF && PPC_OF && HAVE_GPIO_LIB
+       depends on OF && PPC_OF && GPIOLIB
         help
           OpenFirmware GPIO accessors
  
diff --git a/drivers/of/of_i2c.c b/drivers/of/of_i2c.c

index 5c015d310d4a9a92b493cbd7d34691c85dd6b992..344e1b03dd8b708661fcb08982d27ebd79bd586b 100644 (file)
--- a/drivers/of/of_i2c.c
+++ b/drivers/of/of_i2c.c
@@ -91,8 +91,6 @@ void of_register_i2c_devices(struct i2c_adapter *adap,
                 }
  
                 info.irq = irq_of_parse_and_map(node, 0);
-               if (info.irq == NO_IRQ)
-                       info.irq = -1;
  
                 if (of_find_i2c_driver(node, &info) < 0) {
                         irq_dispose_mapping(info.irq);
diff --git a/drivers/parport/parport_ax88796.c b/drivers/parport/parport_ax88796.c

index 4ec220b2eae7dba0bf61481931209fbc08f8d2fc..6938d2e9f18f8171d41adcaa0e33e1e487334305 100644 (file)
--- a/drivers/parport/parport_ax88796.c
+++ b/drivers/parport/parport_ax88796.c
@@ -406,6 +406,8 @@ static int parport_ax88796_resume(struct platform_device *dev)
  #define parport_ax88796_resume  NULL
  #endif
  
+MODULE_ALIAS("platform:ax88796-pp");
+
  static struct platform_driver axdrv = {
         .driver         = {
                 .name   = "ax88796-pp",
diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c

index 71be36f18709e80771719103941e40c9f653f922..308ddb201b660da9d84e21279eef011eb87907af 100644 (file)
--- a/drivers/power/ds2760_battery.c
+++ b/drivers/power/ds2760_battery.c
@@ -433,6 +433,8 @@ static int ds2760_battery_resume(struct platform_device *pdev)
  
  #endif /* CONFIG_PM */
  
+MODULE_ALIAS("platform:ds2760-battery");
+
  static struct platform_driver ds2760_battery_driver = {
         .driver = {
                 .name = "ds2760-battery",
diff --git a/drivers/power/pda_power.c b/drivers/power/pda_power.c

index 82810b7bff9ccbf7b2dd183f7e49e4036239ff66..0471ec743ab9561425d723482c986b809fbf1d9c 100644 (file)
--- a/drivers/power/pda_power.c
+++ b/drivers/power/pda_power.c
@@ -362,6 +362,8 @@ static int pda_power_resume(struct platform_device *pdev)
  #define pda_power_resume NULL
  #endif /* CONFIG_PM */
  
+MODULE_ALIAS("platform:pda-power");
+
  static struct platform_driver pda_power_pdrv = {
         .driver = {
                 .name = "pda-power",
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c

index 5ab34340919be3a417caf1ad5bfd50ff91f1054d..79954bd6bfa5a02b963e6c2868c9c5ad22bed5af 100644 (file)
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -15,6 +15,7 @@
  #include <linux/err.h>
  #include <linux/virtio.h>
  #include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
  #include <linux/interrupt.h>
  #include <linux/virtio_ring.h>
  #include <linux/pfn.h>
@@ -87,16 +88,20 @@ static u32 kvm_get_features(struct virtio_device *vdev)
         return features;
  }
  
-static void kvm_set_features(struct virtio_device *vdev, u32 features)
+static void kvm_finalize_features(struct virtio_device *vdev)
  {
-       unsigned int i;
+       unsigned int i, bits;
         struct kvm_device_desc *desc = to_kvmdev(vdev)->desc;
         /* Second half of bitmap is features we accept. */
         u8 *out_features = kvm_vq_features(desc) + desc->feature_len;
  
+       /* Give virtio_ring a chance to accept features. */
+       vring_transport_features(vdev);
+
         memset(out_features, 0, desc->feature_len);
-       for (i = 0; i < min(desc->feature_len * 8, 32); i++) {
-               if (features & (1 << i))
+       bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
+       for (i = 0; i < bits; i++) {
+               if (test_bit(i, vdev->features))
                         out_features[i / 8] |= (1 << (i % 8));
         }
  }
@@ -222,7 +227,7 @@ static void kvm_del_vq(struct virtqueue *vq)
   */
  static struct virtio_config_ops kvm_vq_configspace_ops = {
         .get_features = kvm_get_features,
-       .set_features = kvm_set_features,
+       .finalize_features = kvm_finalize_features,
         .get = kvm_get,
         .set = kvm_set,
         .get_status = kvm_get_status,
@@ -333,6 +338,25 @@ static int __init kvm_devices_init(void)
         return 0;
  }
  
+/* code for early console output with virtio_console */
+static __init int early_put_chars(u32 vtermno, const char *buf, int count)
+{
+       char scratch[17];
+       unsigned int len = count;
+
+       if (len > sizeof(scratch) - 1)
+               len = sizeof(scratch) - 1;
+       scratch[len] = '\0';
+       memcpy(scratch, buf, len);
+       kvm_hypercall1(KVM_S390_VIRTIO_NOTIFY, __pa(scratch));
+       return len;
+}
+
+void s390_virtio_console_init(void)
+{
+       virtio_cons_early_init(early_put_chars);
+}
+
  /*
   * We do this after core stuff, but before the drivers.
   */
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c

index eb702b96d57cf7ac631af7e18406493bf7eef6d9..c4a7c06793c5ff6bc96257cbc77868dd1c25e49c 100644 (file)
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -3819,6 +3819,20 @@ static int ibmvfc_remove(struct vio_dev *vdev)
         return 0;
  }
  
+/**
+ * ibmvfc_get_desired_dma - Calculate DMA resources needed by the driver
+ * @vdev:      vio device struct
+ *
+ * Return value:
+ *     Number of bytes the driver will need to DMA map at the same time in
+ *     order to perform well.
+ */
+static unsigned long ibmvfc_get_desired_dma(struct vio_dev *vdev)
+{
+       unsigned long pool_dma = max_requests * sizeof(union ibmvfc_iu);
+       return pool_dma + ((512 * 1024) * driver_template.cmd_per_lun);
+}
+
  static struct vio_device_id ibmvfc_device_table[] __devinitdata = {
         {"fcp", "IBM,vfc-client"},
         { "", "" }
@@ -3829,6 +3843,7 @@ static struct vio_driver ibmvfc_driver = {
         .id_table = ibmvfc_device_table,
         .probe = ibmvfc_probe,
         .remove = ibmvfc_remove,
+       .get_desired_dma = ibmvfc_get_desired_dma,
         .driver = {
                 .name = IBMVFC_NAME,
                 .owner = THIS_MODULE,
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c

index 5d23368a1bcee68e31bc04019cd696118132bea0..20000ec79b043a9a17fcfc54b45e58cce556f064 100644 (file)
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -72,6 +72,7 @@
  #include <linux/delay.h>
  #include <asm/firmware.h>
  #include <asm/vio.h>
+#include <asm/firmware.h>
  #include <scsi/scsi.h>
  #include <scsi/scsi_cmnd.h>
  #include <scsi/scsi_host.h>
@@ -426,8 +427,10 @@ static int map_sg_data(struct scsi_cmnd *cmd,
                                            SG_ALL * sizeof(struct srp_direct_buf),
                                            &evt_struct->ext_list_token, 0);
                 if (!evt_struct->ext_list) {
-                       sdev_printk(KERN_ERR, cmd->device,
-                                   "Can't allocate memory for indirect table\n");
+                       if (!firmware_has_feature(FW_FEATURE_CMO))
+                               sdev_printk(KERN_ERR, cmd->device,
+                                           "Can't allocate memory "
+                                           "for indirect table\n");
                         return 0;
                 }
         }
@@ -743,7 +746,9 @@ static int ibmvscsi_queuecommand(struct scsi_cmnd *cmnd,
         srp_cmd->lun = ((u64) lun) << 48;
  
         if (!map_data_for_srp_cmd(cmnd, evt_struct, srp_cmd, hostdata->dev)) {
-               sdev_printk(KERN_ERR, cmnd->device, "couldn't convert cmd to srp_cmd\n");
+               if (!firmware_has_feature(FW_FEATURE_CMO))
+                       sdev_printk(KERN_ERR, cmnd->device,
+                                   "couldn't convert cmd to srp_cmd\n");
                 free_event_struct(&hostdata->pool, evt_struct);
                 return SCSI_MLQUEUE_HOST_BUSY;
         }
@@ -855,7 +860,10 @@ static void send_mad_adapter_info(struct ibmvscsi_host_data *hostdata)
                                             DMA_BIDIRECTIONAL);
  
         if (dma_mapping_error(req->buffer)) {
-               dev_err(hostdata->dev, "Unable to map request_buffer for adapter_info!\n");
+               if (!firmware_has_feature(FW_FEATURE_CMO))
+                       dev_err(hostdata->dev,
+                               "Unable to map request_buffer for "
+                               "adapter_info!\n");
                 free_event_struct(&hostdata->pool, evt_struct);
                 return;
         }
@@ -1400,7 +1408,9 @@ static int ibmvscsi_do_host_config(struct ibmvscsi_host_data *hostdata,
                                                     DMA_BIDIRECTIONAL);
  
         if (dma_mapping_error(host_config->buffer)) {
-               dev_err(hostdata->dev, "dma_mapping error getting host config\n");
+               if (!firmware_has_feature(FW_FEATURE_CMO))
+                       dev_err(hostdata->dev,
+                               "dma_mapping error getting host config\n");
                 free_event_struct(&hostdata->pool, evt_struct);
                 return -1;
         }
@@ -1604,7 +1614,7 @@ static struct scsi_host_template driver_template = {
         .eh_host_reset_handler = ibmvscsi_eh_host_reset_handler,
         .slave_configure = ibmvscsi_slave_configure,
         .change_queue_depth = ibmvscsi_change_queue_depth,
-       .cmd_per_lun = 16,
+       .cmd_per_lun = IBMVSCSI_CMDS_PER_LUN_DEFAULT,
         .can_queue = IBMVSCSI_MAX_REQUESTS_DEFAULT,
         .this_id = -1,
         .sg_tablesize = SG_ALL,
@@ -1612,6 +1622,26 @@ static struct scsi_host_template driver_template = {
         .shost_attrs = ibmvscsi_attrs,
  };
  
+/**
+ * ibmvscsi_get_desired_dma - Calculate IO memory desired by the driver
+ *
+ * @vdev: struct vio_dev for the device whose desired IO mem is to be returned
+ *
+ * Return value:
+ *     Number of bytes of IO data the driver will need to perform well.
+ */
+static unsigned long ibmvscsi_get_desired_dma(struct vio_dev *vdev)
+{
+       /* iu_storage data allocated in initialize_event_pool */
+       unsigned long desired_io = max_requests * sizeof(union viosrp_iu);
+
+       /* add io space for sg data */
+       desired_io += (IBMVSCSI_MAX_SECTORS_DEFAULT *
+                            IBMVSCSI_CMDS_PER_LUN_DEFAULT);
+
+       return desired_io;
+}
+
  /**
   * Called by bus code for each adapter
   */
@@ -1641,7 +1671,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
         hostdata->host = host;
         hostdata->dev = dev;
         atomic_set(&hostdata->request_limit, -1);
-       hostdata->host->max_sectors = 32 * 8; /* default max I/O 32 pages */
+       hostdata->host->max_sectors = IBMVSCSI_MAX_SECTORS_DEFAULT;
  
         rc = ibmvscsi_ops->init_crq_queue(&hostdata->queue, hostdata, max_requests);
         if (rc != 0 && rc != H_RESOURCE) {
@@ -1735,6 +1765,7 @@ static struct vio_driver ibmvscsi_driver = {
         .id_table = ibmvscsi_device_table,
         .probe = ibmvscsi_probe,
         .remove = ibmvscsi_remove,
+       .get_desired_dma = ibmvscsi_get_desired_dma,
         .driver = {
                 .name = "ibmvscsi",
                 .owner = THIS_MODULE,
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.h b/drivers/scsi/ibmvscsi/ibmvscsi.h

index 46e850e302c7918306d96218ff606229ae230183..2d4339d5e16e4e3c9cfed8b7b4fe30b87cb84932 100644 (file)
--- a/drivers/scsi/ibmvscsi/ibmvscsi.h
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.h
@@ -45,6 +45,8 @@ struct Scsi_Host;
  #define MAX_INDIRECT_BUFS 10
  
  #define IBMVSCSI_MAX_REQUESTS_DEFAULT 100
+#define IBMVSCSI_CMDS_PER_LUN_DEFAULT 16
+#define IBMVSCSI_MAX_SECTORS_DEFAULT 256 /* 32 * 8 = default max I/O 32 pages */
  #define IBMVSCSI_MAX_CMDS_PER_LUN 64
  
  /* ------------------------------------------------------------
diff --git a/drivers/telephony/ixj.c b/drivers/telephony/ixj.c

index 49cd9793404f33c5165d5b77e0d9657c3f7c1b20..ec7aeb502d1548ecbfd64c521518c11857e308de 100644 (file)
--- a/drivers/telephony/ixj.c
+++ b/drivers/telephony/ixj.c
@@ -6095,15 +6095,15 @@ static int capabilities_check(IXJ *j, struct phone_capability *pcreq)
         return retval;
  }
  
-static int ixj_ioctl(struct inode *inode, struct file *file_p, unsigned int cmd, unsigned long arg)
+static long do_ixj_ioctl(struct file *file_p, unsigned int cmd, unsigned long arg)
  {
         IXJ_TONE ti;
         IXJ_FILTER jf;
         IXJ_FILTER_RAW jfr;
         void __user *argp = (void __user *)arg;
-
-       unsigned int raise, mant;
+       struct inode *inode = file_p->f_path.dentry->d_inode;
         unsigned int minor = iminor(inode);
+       unsigned int raise, mant;
         int board = NUM(inode);
  
         IXJ *j = get_ixj(NUM(inode));
@@ -6661,6 +6661,15 @@ static int ixj_ioctl(struct inode *inode, struct file *file_p, unsigned int cmd,
         return retval;
  }
  
+static long ixj_ioctl(struct file *file_p, unsigned int cmd, unsigned long arg)
+{
+       long ret;
+       lock_kernel();
+       ret = do_ixj_ioctl(file_p, cmd, arg);
+       unlock_kernel();
+       return ret;
+}
+
  static int ixj_fasync(int fd, struct file *file_p, int mode)
  {
         IXJ *j = get_ixj(NUM(file_p->f_path.dentry->d_inode));
@@ -6674,7 +6683,7 @@ static const struct file_operations ixj_fops =
          .read           = ixj_enhanced_read,
          .write          = ixj_enhanced_write,
          .poll           = ixj_poll,
-        .ioctl          = ixj_ioctl,
+        .unlocked_ioctl = ixj_ioctl,
          .release        = ixj_release,
          .fasync         = ixj_fasync
  };
diff --git a/drivers/usb/gadget/at91_udc.h b/drivers/usb/gadget/at91_udc.h

index a973f2a50fb959509472aa66db390817c78f1a7d..c65d62295890e94584045ab9f0cc4b872ab29711 100644 (file)
--- a/drivers/usb/gadget/at91_udc.h
+++ b/drivers/usb/gadget/at91_udc.h
@@ -171,7 +171,7 @@ struct at91_request {
  #endif
  
  #define ERR(stuff...)          pr_err("udc: " stuff)
-#define WARN(stuff...)         pr_warning("udc: " stuff)
+#define WARNING(stuff...)      pr_warning("udc: " stuff)
  #define INFO(stuff...)         pr_info("udc: " stuff)
  #define DBG(stuff...)          pr_debug("udc: " stuff)
  
diff --git a/drivers/usb/gadget/cdc2.c b/drivers/usb/gadget/cdc2.c

index d490d0289507290a752b90655323cc2ee7e8871f..a39a4b940c33c93aa129fb5aab06fd4c326e556d 100644 (file)
--- a/drivers/usb/gadget/cdc2.c
+++ b/drivers/usb/gadget/cdc2.c
@@ -170,7 +170,7 @@ static int __init cdc_bind(struct usb_composite_dev *cdev)
                  * but if the controller isn't recognized at all then
                  * that assumption is a bit more likely to be wrong.
                  */
-               WARN(cdev, "controller '%s' not recognized; trying %s\n",
+               WARNING(cdev, "controller '%s' not recognized; trying %s\n",
                                 gadget->name,
                                 cdc_config_driver.label);
                 device_desc.bcdDevice =
diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c

index d7aaaa29b1e1978b78cfeaad0495ad4e2e455f6e..bcac2e68660d5ba51fd8c253fa136c4385e9c058 100644 (file)
--- a/drivers/usb/gadget/ether.c
+++ b/drivers/usb/gadget/ether.c
@@ -293,7 +293,7 @@ static int __init eth_bind(struct usb_composite_dev *cdev)
                  * but if the controller isn't recognized at all then
                  * that assumption is a bit more likely to be wrong.
                  */
-               WARN(cdev, "controller '%s' not recognized; trying %s\n",
+               WARNING(cdev, "controller '%s' not recognized; trying %s\n",
                                 gadget->name,
                                 eth_config_driver.label);
                 device_desc.bcdDevice =
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c

index 15c24edbb61adb29a40db7a5e7c83dc63a4a777f..ea2c31d18080d2a23025979a4c798f5133ceba48 100644 (file)
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -308,7 +308,7 @@ MODULE_LICENSE("Dual BSD/GPL");
         dev_vdbg(&(d)->gadget->dev , fmt , ## args)
  #define ERROR(d, fmt, args...) \
         dev_err(&(d)->gadget->dev , fmt , ## args)
-#define WARN(d, fmt, args...) \
+#define WARNING(d, fmt, args...) \
         dev_warn(&(d)->gadget->dev , fmt , ## args)
  #define INFO(d, fmt, args...) \
         dev_info(&(d)->gadget->dev , fmt , ## args)
@@ -1091,7 +1091,7 @@ static int ep0_queue(struct fsg_dev *fsg)
         if (rc != 0 && rc != -ESHUTDOWN) {
  
                 /* We can't do much more than wait for a reset */
-               WARN(fsg, "error in submission: %s --> %d\n",
+               WARNING(fsg, "error in submission: %s --> %d\n",
                                 fsg->ep0->name, rc);
         }
         return rc;
@@ -1227,7 +1227,7 @@ static void received_cbi_adsc(struct fsg_dev *fsg, struct fsg_buffhd *bh)
  
         /* Save the command for later */
         if (fsg->cbbuf_cmnd_size)
-               WARN(fsg, "CB[I] overwriting previous command\n");
+               WARNING(fsg, "CB[I] overwriting previous command\n");
         fsg->cbbuf_cmnd_size = req->actual;
         memcpy(fsg->cbbuf_cmnd, req->buf, fsg->cbbuf_cmnd_size);
  
@@ -1506,7 +1506,7 @@ static void start_transfer(struct fsg_dev *fsg, struct usb_ep *ep,
                  * submissions if DMA is enabled. */
                 if (rc != -ESHUTDOWN && !(rc == -EOPNOTSUPP &&
                                                 req->length == 0))
-                       WARN(fsg, "error in submission: %s --> %d\n",
+                       WARNING(fsg, "error in submission: %s --> %d\n",
                                         ep->name, rc);
         }
  }
@@ -2294,7 +2294,7 @@ static int halt_bulk_in_endpoint(struct fsg_dev *fsg)
                 VDBG(fsg, "delayed bulk-in endpoint halt\n");
         while (rc != 0) {
                 if (rc != -EAGAIN) {
-                       WARN(fsg, "usb_ep_set_halt -> %d\n", rc);
+                       WARNING(fsg, "usb_ep_set_halt -> %d\n", rc);
                         rc = 0;
                         break;
                 }
@@ -2317,7 +2317,7 @@ static int wedge_bulk_in_endpoint(struct fsg_dev *fsg)
                 VDBG(fsg, "delayed bulk-in endpoint wedge\n");
         while (rc != 0) {
                 if (rc != -EAGAIN) {
-                       WARN(fsg, "usb_ep_set_wedge -> %d\n", rc);
+                       WARNING(fsg, "usb_ep_set_wedge -> %d\n", rc);
                         rc = 0;
                         break;
                 }
@@ -3755,7 +3755,7 @@ static int __init check_parameters(struct fsg_dev *fsg)
                 if (gcnum >= 0)
                         mod_data.release = 0x0300 + gcnum;
                 else {
-                       WARN(fsg, "controller '%s' not recognized\n",
+                       WARNING(fsg, "controller '%s' not recognized\n",
                                 fsg->gadget->name);
                         mod_data.release = 0x0399;
                 }
diff --git a/drivers/usb/gadget/fsl_usb2_udc.c b/drivers/usb/gadget/fsl_usb2_udc.c

index 1695382f30fe5ce3a4898f067b12b8a632429f96..1cfccf102a2dc9fdc9948daf4aadaf5a32c793cf 100644 (file)
--- a/drivers/usb/gadget/fsl_usb2_udc.c
+++ b/drivers/usb/gadget/fsl_usb2_udc.c
@@ -1538,7 +1538,7 @@ static void dtd_complete_irq(struct fsl_udc *udc)
  
                 /* If the ep is configured */
                 if (curr_ep->name == NULL) {
-                       WARN("Invalid EP?");
+                       WARNING("Invalid EP?");
                         continue;
                 }
  
diff --git a/drivers/usb/gadget/fsl_usb2_udc.h b/drivers/usb/gadget/fsl_usb2_udc.h

index 98b1483ef6a511459e44b2b8f17ada3329570ec1..6131752a38bcc617f5f7388fcb83da41c03df474 100644 (file)
--- a/drivers/usb/gadget/fsl_usb2_udc.h
+++ b/drivers/usb/gadget/fsl_usb2_udc.h
@@ -552,7 +552,7 @@ static void dump_msg(const char *label, const u8 * buf, unsigned int length)
  #endif
  
  #define ERR(stuff...)          pr_err("udc: " stuff)
-#define WARN(stuff...)         pr_warning("udc: " stuff)
+#define WARNING(stuff...)              pr_warning("udc: " stuff)
  #define INFO(stuff...)         pr_info("udc: " stuff)
  
  /*-------------------------------------------------------------------------*/
diff --git a/drivers/usb/gadget/gmidi.c b/drivers/usb/gadget/gmidi.c

index 7f4d4828e3aabaca7db67bd512017f094b419992..ea8651e3da1a53a205ffe67e88889b87d5073a89 100644 (file)
--- a/drivers/usb/gadget/gmidi.c
+++ b/drivers/usb/gadget/gmidi.c
@@ -138,8 +138,6 @@ static void gmidi_transmit(struct gmidi_device* dev, struct usb_request* req);
         dev_vdbg(&(d)->gadget->dev , fmt , ## args)
  #define ERROR(d, fmt, args...) \
         dev_err(&(d)->gadget->dev , fmt , ## args)
-#define WARN(d, fmt, args...) \
-       dev_warn(&(d)->gadget->dev , fmt , ## args)
  #define INFO(d, fmt, args...) \
         dev_info(&(d)->gadget->dev , fmt , ## args)
  
diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c

index 48f1c63b70136ec17d458091e4ae7cba8528aaf5..60aa04847b189255522cf60489cb1d88a16e737b 100644 (file)
--- a/drivers/usb/gadget/goku_udc.c
+++ b/drivers/usb/gadget/goku_udc.c
@@ -1768,7 +1768,7 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id)
          * usb_gadget_driver_{register,unregister}() must change.
          */
         if (the_controller) {
-               WARN(dev, "ignoring %s\n", pci_name(pdev));
+               WARNING(dev, "ignoring %s\n", pci_name(pdev));
                 return -EBUSY;
         }
         if (!pdev->irq) {
diff --git a/drivers/usb/gadget/goku_udc.h b/drivers/usb/gadget/goku_udc.h

index bc4eb1e0b507213c4da4ae1abbefdc7701340863..566cb23190565288ec500fc3c525944801982910 100644 (file)
--- a/drivers/usb/gadget/goku_udc.h
+++ b/drivers/usb/gadget/goku_udc.h
@@ -285,7 +285,7 @@ struct goku_udc {
  
  #define ERROR(dev,fmt,args...) \
         xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev,fmt,args...) \
+#define WARNING(dev,fmt,args...) \
         xprintk(dev , KERN_WARNING , fmt , ## args)
  #define INFO(dev,fmt,args...) \
         xprintk(dev , KERN_INFO , fmt , ## args)
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c

index 04692d59fc1cda8d22a883beaca7b7d976f49aa5..f4585d3e90d7a54fd4836648e2cb50eed7d7a947 100644 (file)
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -262,8 +262,6 @@ static const char *CHIP;
  
  #define ERROR(dev,fmt,args...) \
         xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev,fmt,args...) \
-       xprintk(dev , KERN_WARNING , fmt , ## args)
  #define INFO(dev,fmt,args...) \
         xprintk(dev , KERN_INFO , fmt , ## args)
  
diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c

index b67ab677af725dd2d5d685ed3f982651f085d5a6..5cfb5ebf388159546ae4aa59bd71b42b04b83c9b 100644 (file)
--- a/drivers/usb/gadget/net2280.c
+++ b/drivers/usb/gadget/net2280.c
@@ -1007,7 +1007,7 @@ static void scan_dma_completions (struct net2280_ep *ep)
                          * 0122, and 0124; not all cases trigger the warning.
                          */
                         if ((tmp & (1 << NAK_OUT_PACKETS)) == 0) {
-                               WARN (ep->dev, "%s lost packet sync!\n",
+                               WARNING (ep->dev, "%s lost packet sync!\n",
                                                 ep->ep.name);
                                 req->req.status = -EOVERFLOW;
                         } else if ((tmp = readl (&ep->regs->ep_avail)) != 0) {
diff --git a/drivers/usb/gadget/net2280.h b/drivers/usb/gadget/net2280.h

index 1f2af398a9a405df9e78192080474114c2ac95f6..81a71dbdc2c6c029df26500ef37754f0c77c366e 100644 (file)
--- a/drivers/usb/gadget/net2280.h
+++ b/drivers/usb/gadget/net2280.h
@@ -272,7 +272,7 @@ static inline void net2280_led_shutdown (struct net2280 *dev)
  
  #define ERROR(dev,fmt,args...) \
         xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev,fmt,args...) \
+#define WARNING(dev,fmt,args...) \
         xprintk(dev , KERN_WARNING , fmt , ## args)
  #define INFO(dev,fmt,args...) \
         xprintk(dev , KERN_INFO , fmt , ## args)
diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c

index 4b79a8509e848e8ebb3d6d656d684d3d3af080f9..395bd18444828577a97487ca87f9cd289d8e1095 100644 (file)
--- a/drivers/usb/gadget/omap_udc.c
+++ b/drivers/usb/gadget/omap_udc.c
@@ -1120,7 +1120,7 @@ static int omap_ep_set_halt(struct usb_ep *_ep, int value)
                         status = -EINVAL;
                 else if (value) {
                         if (ep->udc->ep0_set_config) {
-                               WARN("error changing config?\n");
+                               WARNING("error changing config?\n");
                                 omap_writew(UDC_CLR_CFG, UDC_SYSCON2);
                         }
                         omap_writew(UDC_STALL_CMD, UDC_SYSCON2);
@@ -1764,7 +1764,7 @@ do_stall:
                                         u.r.bRequestType, u.r.bRequest, status);
                         if (udc->ep0_set_config) {
                                 if (udc->ep0_reset_config)
-                                       WARN("error resetting config?\n");
+                                       WARNING("error resetting config?\n");
                                 else
                                         omap_writew(UDC_CLR_CFG, UDC_SYSCON2);
                         }
@@ -3076,7 +3076,7 @@ static int omap_udc_suspend(struct platform_device *dev, pm_message_t message)
          * which would prevent entry to deep sleep...
          */
         if ((devstat & UDC_ATT) != 0 && (devstat & UDC_SUS) == 0) {
-               WARN("session active; suspend requires disconnect\n");
+               WARNING("session active; suspend requires disconnect\n");
                 omap_pullup(&udc->gadget, 0);
         }
  
diff --git a/drivers/usb/gadget/omap_udc.h b/drivers/usb/gadget/omap_udc.h

index 8522bbb12278a37dcac839a587aae062dc541a58..29edc51b6b2212dba3df4853dea948cd5c878223 100644 (file)
--- a/drivers/usb/gadget/omap_udc.h
+++ b/drivers/usb/gadget/omap_udc.h
@@ -188,7 +188,7 @@ struct omap_udc {
  #endif
  
  #define ERR(stuff...)          pr_err("udc: " stuff)
-#define WARN(stuff...)         pr_warning("udc: " stuff)
+#define WARNING(stuff...)      pr_warning("udc: " stuff)
  #define INFO(stuff...)         pr_info("udc: " stuff)
  #define DBG(stuff...)          pr_debug("udc: " stuff)
  
diff --git a/drivers/usb/gadget/printer.c b/drivers/usb/gadget/printer.c

index 49cd9e145a9bfbcdba12c76192f93ae3073b716d..e0090085b78ee74824ff2a3940a419a129e9570b 100644 (file)
--- a/drivers/usb/gadget/printer.c
+++ b/drivers/usb/gadget/printer.c
@@ -179,7 +179,7 @@ module_param(qlen, uint, S_IRUGO|S_IWUSR);
  
  #define ERROR(dev, fmt, args...) \
         xprintk(dev, KERN_ERR, fmt, ## args)
-#define WARN(dev, fmt, args...) \
+#define WARNING(dev, fmt, args...) \
         xprintk(dev, KERN_WARNING, fmt, ## args)
  #define INFO(dev, fmt, args...) \
         xprintk(dev, KERN_INFO, fmt, ## args)
diff --git a/drivers/usb/gadget/pxa25x_udc.c b/drivers/usb/gadget/pxa25x_udc.c

index 8fb0066609bb4a9db0e22a045e70b14909cc809c..7e6725d8997663b81b719a87e48b974f120b00cb 100644 (file)
--- a/drivers/usb/gadget/pxa25x_udc.c
+++ b/drivers/usb/gadget/pxa25x_udc.c
@@ -342,7 +342,7 @@ pxa25x_ep_free_request (struct usb_ep *_ep, struct usb_request *_req)
         struct pxa25x_request   *req;
  
         req = container_of (_req, struct pxa25x_request, req);
-       WARN_ON (!list_empty (&req->queue));
+       WARN_ON(!list_empty (&req->queue));
         kfree(req);
  }
  
@@ -1556,7 +1556,7 @@ config_change:
                                          * tell us about config change events,
                                          * so later ones may fail...
                                          */
-                                       WARN("config change %02x fail %d?\n",
+                                       WARNING("config change %02x fail %d?\n",
                                                 u.r.bRequest, i);
                                         return;
                                         /* TODO experiment:  if has_cfr,
@@ -2330,7 +2330,7 @@ static int pxa25x_udc_suspend(struct platform_device *dev, pm_message_t state)
         unsigned long flags;
  
         if (!udc->mach->gpio_pullup && !udc->mach->udc_command)
-               WARN("USB host won't detect disconnect!\n");
+               WARNING("USB host won't detect disconnect!\n");
         udc->suspended = 1;
  
         local_irq_save(flags);
diff --git a/drivers/usb/gadget/pxa25x_udc.h b/drivers/usb/gadget/pxa25x_udc.h

index 4d11ece7c95f0d5cf49c4343bb9002cd02a35115..c8a13215e02c6c5c7294839b33b7409b9aaeb228 100644 (file)
--- a/drivers/usb/gadget/pxa25x_udc.h
+++ b/drivers/usb/gadget/pxa25x_udc.h
@@ -259,7 +259,7 @@ dump_state(struct pxa25x_udc *dev)
  #define DBG(lvl, stuff...) do{if ((lvl) <= UDC_DEBUG) DMSG(stuff);}while(0)
  
  #define ERR(stuff...)          pr_err("udc: " stuff)
-#define WARN(stuff...)         pr_warning("udc: " stuff)
+#define WARNING(stuff...)      pr_warning("udc: " stuff)
  #define INFO(stuff...)         pr_info("udc: " stuff)
  
  
diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c

index 5458f43a866856731cd932e8b98229040fe560ee..3791e6271903e7384a2a6773281d323c9de4b833 100644 (file)
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -116,7 +116,6 @@ static inline int qlen(struct usb_gadget *gadget)
  #undef DBG
  #undef VDBG
  #undef ERROR
-#undef WARN
  #undef INFO
  
  #define xprintk(d, level, fmt, args...) \
@@ -140,8 +139,6 @@ static inline int qlen(struct usb_gadget *gadget)
  
  #define ERROR(dev, fmt, args...) \
         xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev, fmt, args...) \
-       xprintk(dev , KERN_WARNING , fmt , ## args)
  #define INFO(dev, fmt, args...) \
         xprintk(dev , KERN_INFO , fmt , ## args)
  
diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c

index 31178e10cbbecca030561792a4ae97f457f877ed..ce1ca0ba0515c9a848d320dee18aaa29e5738808 100644 (file)
--- a/drivers/usb/host/isp116x-hcd.c
+++ b/drivers/usb/host/isp116x-hcd.c
@@ -882,7 +882,7 @@ static void isp116x_endpoint_disable(struct usb_hcd *hcd,
         for (i = 0; i < 100 && !list_empty(&hep->urb_list); i++)
                 msleep(3);
         if (!list_empty(&hep->urb_list))
-               WARN("ep %p not empty?\n", ep);
+               WARNING("ep %p not empty?\n", ep);
  
         kfree(ep);
         hep->hcpriv = NULL;
diff --git a/drivers/usb/host/isp116x.h b/drivers/usb/host/isp116x.h

index 595b90a9984880c819011ad4e181a223d8d6d50b..aa211bafcff9dacc41457d10894f9eb6e8c82812 100644 (file)
--- a/drivers/usb/host/isp116x.h
+++ b/drivers/usb/host/isp116x.h
@@ -338,7 +338,7 @@ struct isp116x_ep {
  #endif
  
  #define ERR(stuff...)          printk(KERN_ERR "116x: " stuff)
-#define WARN(stuff...)         printk(KERN_WARNING "116x: " stuff)
+#define WARNING(stuff...)      printk(KERN_WARNING "116x: " stuff)
  #define INFO(stuff...)         printk(KERN_INFO "116x: " stuff)
  
  /* ------------------------------------------------- */
diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c

index 340d72da554ad8e545defea9778874de2f0ff5bb..8a74bbb57d08bee72ec63fea25ad6e7aa96d0bff 100644 (file)
--- a/drivers/usb/host/sl811-hcd.c
+++ b/drivers/usb/host/sl811-hcd.c
@@ -1026,7 +1026,7 @@ sl811h_endpoint_disable(struct usb_hcd *hcd, struct usb_host_endpoint *hep)
         if (!list_empty(&hep->urb_list))
                 msleep(3);
         if (!list_empty(&hep->urb_list))
-               WARN("ep %p not empty?\n", ep);
+               WARNING("ep %p not empty?\n", ep);
  
         kfree(ep);
         hep->hcpriv = NULL;
diff --git a/drivers/usb/host/sl811.h b/drivers/usb/host/sl811.h

index 7690d98e42a7b8ce25d96f43b127dd7025a46ad3..b6b8c1f233dd8caa62caebf5b44c92a8480a9b37 100644 (file)
--- a/drivers/usb/host/sl811.h
+++ b/drivers/usb/host/sl811.h
@@ -261,6 +261,6 @@ sl811_read_buf(struct sl811 *sl811, int addr, void *buf, size_t count)
  #endif
  
  #define ERR(stuff...)          printk(KERN_ERR "sl811: " stuff)
-#define WARN(stuff...)         printk(KERN_WARNING "sl811: " stuff)
+#define WARNING(stuff...)      printk(KERN_WARNING "sl811: " stuff)
  #define INFO(stuff...)         printk(KERN_INFO "sl811: " stuff)
  
diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c

index 054dedd2812718c1c1350acb058f983e1dda3331..b358c4e1cf212f114bbad89416f358c244311bc3 100644 (file)
--- a/drivers/usb/misc/usbtest.c
+++ b/drivers/usb/misc/usbtest.c
@@ -81,7 +81,7 @@ static struct usb_device *testdev_to_usbdev (struct usbtest_dev *test)
  
  #define ERROR(tdev, fmt, args...) \
         dev_err(&(tdev)->intf->dev , fmt , ## args)
-#define WARN(tdev, fmt, args...) \
+#define WARNING(tdev, fmt, args...) \
         dev_warn(&(tdev)->intf->dev , fmt , ## args)
  
  /*-------------------------------------------------------------------------*/
@@ -1946,7 +1946,7 @@ usbtest_probe (struct usb_interface *intf, const struct usb_device_id *id)
  
                         status = get_endpoints (dev, intf);
                         if (status < 0) {
-                               WARN(dev, "couldn't get endpoints, %d\n",
+                               WARNING(dev, "couldn't get endpoints, %d\n",
                                                 status);
                                 return status;
                         }
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c

index 7084e7e146c0b6fbc319bb9ceefcf419b7111a13..5b78fd0aff0a6d50306c75f80d99f52f97dbe99e 100644 (file)
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -71,13 +71,6 @@ static int virtio_uevent(struct device *_dv, struct kobj_uevent_env *env)
                               dev->id.device, dev->id.vendor);
  }
  
-static struct bus_type virtio_bus = {
-       .name  = "virtio",
-       .match = virtio_dev_match,
-       .dev_attrs = virtio_dev_attrs,
-       .uevent = virtio_uevent,
-};
-
  static void add_status(struct virtio_device *dev, unsigned status)
  {
         dev->config->set_status(dev, dev->config->get_status(dev) | status);
@@ -120,12 +113,16 @@ static int virtio_dev_probe(struct device *_d)
                         set_bit(f, dev->features);
         }
  
+       /* Transport features always preserved to pass to finalize_features. */
+       for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++)
+               if (device_features & (1 << i))
+                       set_bit(i, dev->features);
+
         err = drv->probe(dev);
         if (err)
                 add_status(dev, VIRTIO_CONFIG_S_FAILED);
         else {
-               /* They should never have set feature bits beyond 32 */
-               dev->config->set_features(dev, dev->features[0]);
+               dev->config->finalize_features(dev);
                 add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
         }
         return err;
@@ -147,13 +144,20 @@ static int virtio_dev_remove(struct device *_d)
         return 0;
  }
  
+static struct bus_type virtio_bus = {
+       .name  = "virtio",
+       .match = virtio_dev_match,
+       .dev_attrs = virtio_dev_attrs,
+       .uevent = virtio_uevent,
+       .probe = virtio_dev_probe,
+       .remove = virtio_dev_remove,
+};
+
  int register_virtio_driver(struct virtio_driver *driver)
  {
         /* Catch this early. */
         BUG_ON(driver->feature_table_size && !driver->feature_table);
         driver->driver.bus = &virtio_bus;
-       driver->driver.probe = virtio_dev_probe;
-       driver->driver.remove = virtio_dev_remove;
         return driver_register(&driver->driver);
  }
  EXPORT_SYMBOL_GPL(register_virtio_driver);
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c

index eae7236310e450ce1647a25a480eccad9147dec4..c7dc37c7cce91592326fff43187097ae4049ed33 100644 (file)
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -94,12 +94,17 @@ static u32 vp_get_features(struct virtio_device *vdev)
         return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
  }
  
-/* virtio config->set_features() implementation */
-static void vp_set_features(struct virtio_device *vdev, u32 features)
+/* virtio config->finalize_features() implementation */
+static void vp_finalize_features(struct virtio_device *vdev)
  {
         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
  
-       iowrite32(features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
+       /* Give virtio_ring a chance to accept features. */
+       vring_transport_features(vdev);
+
+       /* We only support 32 feature bits. */
+       BUILD_BUG_ON(ARRAY_SIZE(vdev->features) != 1);
+       iowrite32(vdev->features[0], vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES);
  }
  
  /* virtio config->get() implementation */
@@ -297,7 +302,7 @@ static struct virtio_config_ops virtio_pci_config_ops = {
         .find_vq        = vp_find_vq,
         .del_vq         = vp_del_vq,
         .get_features   = vp_get_features,
-       .set_features   = vp_set_features,
+       .finalize_features = vp_finalize_features,
  };
  
  /* the PCI probing function */
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c

index 72bf8bc090142f02a9065c0ff0448299a5e9a2e7..6eb5303fed11b20613083ed960aa27cb8e2d4bd9 100644 (file)
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -18,6 +18,7 @@
   */
  #include <linux/virtio.h>
  #include <linux/virtio_ring.h>
+#include <linux/virtio_config.h>
  #include <linux/device.h>
  
  #ifdef DEBUG
@@ -87,8 +88,11 @@ static int vring_add_buf(struct virtqueue *_vq,
         if (vq->num_free < out + in) {
                 pr_debug("Can't add buf len %i - avail = %i\n",
                          out + in, vq->num_free);
-               /* We notify *even if* VRING_USED_F_NO_NOTIFY is set here. */
-               vq->notify(&vq->vq);
+               /* FIXME: for historical reasons, we force a notify here if
+                * there are outgoing parts to the buffer.  Presumably the
+                * host should service the ring ASAP. */
+               if (out)
+                       vq->notify(&vq->vq);
                 END_USE(vq);
                 return -ENOSPC;
         }
@@ -320,4 +324,19 @@ void vring_del_virtqueue(struct virtqueue *vq)
  }
  EXPORT_SYMBOL_GPL(vring_del_virtqueue);
  
+/* Manipulates transport-specific feature bits. */
+void vring_transport_features(struct virtio_device *vdev)
+{
+       unsigned int i;
+
+       for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
+               switch (i) {
+               default:
+                       /* We don't understand this bit. */
+                       clear_bit(i, vdev->features);
+               }
+       }
+}
+EXPORT_SYMBOL_GPL(vring_transport_features);
+
  MODULE_LICENSE("GPL");
diff --git a/fs/Kconfig b/fs/Kconfig

index 37db79a2ff9578e162fcfd8a346d2c6254a95c7f..97e3bdedb1e671500841e2034369dd83a3835b4b 100644 (file)
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -902,65 +902,7 @@ endif # BLOCK
  
  menu "Pseudo filesystems"
  
-config PROC_FS
-       bool "/proc file system support" if EMBEDDED
-       default y
-       help
-         This is a virtual file system providing information about the status
-         of the system. "Virtual" means that it doesn't take up any space on
-         your hard disk: the files are created on the fly by the kernel when
-         you try to access them. Also, you cannot read the files with older
-         version of the program less: you need to use more or cat.
-
-         It's totally cool; for example, "cat /proc/interrupts" gives
-         information about what the different IRQs are used for at the moment
-         (there is a small number of Interrupt ReQuest lines in your computer
-         that are used by the attached devices to gain the CPU's attention --
-         often a source of trouble if two devices are mistakenly configured
-         to use the same IRQ). The program procinfo to display some
-         information about your system gathered from the /proc file system.
-
-         Before you can use the /proc file system, it has to be mounted,
-         meaning it has to be given a location in the directory hierarchy.
-         That location should be /proc. A command such as "mount -t proc proc
-         /proc" or the equivalent line in /etc/fstab does the job.
-
-         The /proc file system is explained in the file
-         <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
-         ("man 5 proc").
-
-         This option will enlarge your kernel by about 67 KB. Several
-         programs depend on this, so everyone should say Y here.
-
-config PROC_KCORE
-       bool "/proc/kcore support" if !ARM
-       depends on PROC_FS && MMU
-
-config PROC_VMCORE
-        bool "/proc/vmcore support (EXPERIMENTAL)"
-        depends on PROC_FS && CRASH_DUMP
-       default y
-        help
-        Exports the dump image of crashed kernel in ELF format.
-
-config PROC_SYSCTL
-       bool "Sysctl support (/proc/sys)" if EMBEDDED
-       depends on PROC_FS
-       select SYSCTL
-       default y
-       ---help---
-         The sysctl interface provides a means of dynamically changing
-         certain kernel parameters and variables on the fly without requiring
-         a recompile of the kernel or reboot of the system.  The primary
-         interface is through /proc/sys.  If you say Y here a tree of
-         modifiable sysctl entries will be generated beneath the
-          /proc/sys directory. They are explained in the files
-         in <file:Documentation/sysctl/>.  Note that enabling this
-         option will enlarge the kernel by at least 8 KB.
-
-         As it is generally a good thing, you should say Y here unless
-         building a kernel for install/rescue disks or your system is very
-         limited in memory.
+source "fs/proc/Kconfig"
  
  config SYSFS
         bool "sysfs file system support" if EMBEDDED
@@ -2093,20 +2035,6 @@ config CODA_FS
           To compile the coda client support as a module, choose M here: the
           module will be called coda.
  
-config CODA_FS_OLD_API
-       bool "Use 96-bit Coda file identifiers"
-       depends on CODA_FS
-       help
-         A new kernel-userspace API had to be introduced for Coda v6.0
-         to support larger 128-bit file identifiers as needed by the
-         new realms implementation.
-
-         However this new API is not backward compatible with older
-         clients. If you really need to run the old Coda userspace
-         cache manager then say Y.
-
-         For most cases you probably want to say N.
-
  config AFS_FS
         tristate "Andrew File System support (AFS) (EXPERIMENTAL)"
         depends on INET && EXPERIMENTAL
diff --git a/fs/aio.c b/fs/aio.c

index 0fb3117ddd93d8a517720a744cd415a4b914f63c..0051fd94b44e7e75bde7882b2d9c074a1d1f8188 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -586,7 +586,6 @@ static void use_mm(struct mm_struct *mm)
         struct task_struct *tsk = current;
  
         task_lock(tsk);
-       tsk->flags |= PF_BORROWED_MM;
         active_mm = tsk->active_mm;
         atomic_inc(&mm->mm_count);
         tsk->mm = mm;
@@ -610,7 +609,6 @@ static void unuse_mm(struct mm_struct *mm)
         struct task_struct *tsk = current;
  
         task_lock(tsk);
-       tsk->flags &= ~PF_BORROWED_MM;
         tsk->mm = NULL;
         /* active_mm is still 'mm' */
         enter_lazy_tlb(mm, tsk);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c

index 639d2d8b57106ac6e524c4ab7c940ea2fad895da..3b6ff854d98379b9af3dca3ea6889e79879575e7 100644 (file)
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -131,6 +131,15 @@ static int padzero(unsigned long elf_bss)
  #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
  #endif
  
+#ifndef ELF_BASE_PLATFORM
+/*
+ * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
+ * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
+ * will be copied to the user stack in the same manner as AT_PLATFORM.
+ */
+#define ELF_BASE_PLATFORM NULL
+#endif
+
  static int
  create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
                 unsigned long load_addr, unsigned long interp_load_addr)
@@ -142,7 +151,9 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
         elf_addr_t __user *envp;
         elf_addr_t __user *sp;
         elf_addr_t __user *u_platform;
+       elf_addr_t __user *u_base_platform;
         const char *k_platform = ELF_PLATFORM;
+       const char *k_base_platform = ELF_BASE_PLATFORM;
         int items;
         elf_addr_t *elf_info;
         int ei_index = 0;
@@ -172,6 +183,19 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
                         return -EFAULT;
         }
  
+       /*
+        * If this architecture has a "base" platform capability
+        * string, copy it to userspace.
+        */
+       u_base_platform = NULL;
+       if (k_base_platform) {
+               size_t len = strlen(k_base_platform) + 1;
+
+               u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
+               if (__copy_to_user(u_base_platform, k_base_platform, len))
+                       return -EFAULT;
+       }
+
         /* Create the ELF interpreter info */
         elf_info = (elf_addr_t *)current->mm->saved_auxv;
         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
@@ -209,6 +233,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
                 NEW_AUX_ENT(AT_PLATFORM,
                             (elf_addr_t)(unsigned long)u_platform);
         }
+       if (k_base_platform) {
+               NEW_AUX_ENT(AT_BASE_PLATFORM,
+                           (elf_addr_t)(unsigned long)u_base_platform);
+       }
         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
         }
@@ -1478,7 +1506,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
         const struct user_regset_view *view = task_user_regset_view(dump_task);
         struct elf_thread_core_info *t;
         struct elf_prpsinfo *psinfo;
-       struct task_struct *g, *p;
+       struct core_thread *ct;
         unsigned int i;
  
         info->size = 0;
@@ -1517,31 +1545,26 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
         /*
          * Allocate a structure for each thread.
          */
-       rcu_read_lock();
-       do_each_thread(g, p)
-               if (p->mm == dump_task->mm) {
-                       t = kzalloc(offsetof(struct elf_thread_core_info,
-                                            notes[info->thread_notes]),
-                                   GFP_ATOMIC);
-                       if (unlikely(!t)) {
-                               rcu_read_unlock();
-                               return 0;
-                       }
-                       t->task = p;
-                       if (p == dump_task || !info->thread) {
-                               t->next = info->thread;
-                               info->thread = t;
-                       } else {
-                               /*
-                                * Make sure to keep the original task at
-                                * the head of the list.
-                                */
-                               t->next = info->thread->next;
-                               info->thread->next = t;
-                       }
+       for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
+               t = kzalloc(offsetof(struct elf_thread_core_info,
+                                    notes[info->thread_notes]),
+                           GFP_KERNEL);
+               if (unlikely(!t))
+                       return 0;
+
+               t->task = ct->task;
+               if (ct->task == dump_task || !info->thread) {
+                       t->next = info->thread;
+                       info->thread = t;
+               } else {
+                       /*
+                        * Make sure to keep the original task at
+                        * the head of the list.
+                        */
+                       t->next = info->thread->next;
+                       info->thread->next = t;
                 }
-       while_each_thread(g, p);
-       rcu_read_unlock();
+       }
  
         /*
          * Now fill in each thread's information.
@@ -1688,7 +1711,6 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
  {
  #define        NUM_NOTES       6
         struct list_head *t;
-       struct task_struct *g, *p;
  
         info->notes = NULL;
         info->prstatus = NULL;
@@ -1720,20 +1742,19 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
  
         info->thread_status_size = 0;
         if (signr) {
+               struct core_thread *ct;
                 struct elf_thread_status *ets;
-               rcu_read_lock();
-               do_each_thread(g, p)
-                       if (current->mm == p->mm && current != p) {
-                               ets = kzalloc(sizeof(*ets), GFP_ATOMIC);
-                               if (!ets) {
-                                       rcu_read_unlock();
-                                       return 0;
-                               }
-                               ets->thread = p;
-                               list_add(&ets->list, &info->thread_list);
-                       }
-               while_each_thread(g, p);
-               rcu_read_unlock();
+
+               for (ct = current->mm->core_state->dumper.next;
+                                               ct; ct = ct->next) {
+                       ets = kzalloc(sizeof(*ets), GFP_KERNEL);
+                       if (!ets)
+                               return 0;
+
+                       ets->thread = ct->task;
+                       list_add(&ets->list, &info->thread_list);
+               }
+
                 list_for_each(t, &info->thread_list) {
                         int sz;
  
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c

index d051a32e6270ec31975da904476e1cefbaa9be1f..1b59b1edf26de890b8d4ebfd54912f670c0ba2c2 100644 (file)
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1573,7 +1573,6 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
         struct memelfnote *notes = NULL;
         struct elf_prstatus *prstatus = NULL;   /* NT_PRSTATUS */
         struct elf_prpsinfo *psinfo = NULL;     /* NT_PRPSINFO */
-       struct task_struct *g, *p;
         LIST_HEAD(thread_list);
         struct list_head *t;
         elf_fpregset_t *fpu = NULL;
@@ -1622,20 +1621,19 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
  #endif
  
         if (signr) {
+               struct core_thread *ct;
                 struct elf_thread_status *tmp;
-               rcu_read_lock();
-               do_each_thread(g,p)
-                       if (current->mm == p->mm && current != p) {
-                               tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
-                               if (!tmp) {
-                                       rcu_read_unlock();
-                                       goto cleanup;
-                               }
-                               tmp->thread = p;
-                               list_add(&tmp->list, &thread_list);
-                       }
-               while_each_thread(g,p);
-               rcu_read_unlock();
+
+               for (ct = current->mm->core_state->dumper.next;
+                                               ct; ct = ct->next) {
+                       tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+                       if (!tmp)
+                               goto cleanup;
+
+                       tmp->thread = ct->task;
+                       list_add(&tmp->list, &thread_list);
+               }
+
                 list_for_each(t, &thread_list) {
                         struct elf_thread_status *tmp;
                         int sz;
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c

index e1c854890f9400b9467419ce9e625084391d9853..bf4a3fd3c8e33a910f0ab7ae638a4b66d06f08e5 100644 (file)
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -28,11 +28,9 @@ int coda_fake_statfs;
  char * coda_f2s(struct CodaFid *f)
  {
         static char s[60];
-#ifdef CONFIG_CODA_FS_OLD_API
-       sprintf(s, "(%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2]);
-#else
+
         sprintf(s, "(%08x.%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2], f->opaque[3]);
-#endif
+
         return s;
  }
  
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c

index 40c36f7352a609a061cabdd1704e8074c0e0ee2e..0d9b80ec689ccc0741fee358d92f26b34f1bc4c9 100644 (file)
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -378,11 +378,7 @@ MODULE_AUTHOR("Jan Harkes, Peter J. Braam");
  MODULE_DESCRIPTION("Coda Distributed File System VFS interface");
  MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR);
  MODULE_LICENSE("GPL");
-#ifdef CONFIG_CODA_FS_OLD_API
-MODULE_VERSION("5.3.21");
-#else
  MODULE_VERSION("6.6");
-#endif
  
  static int __init init_coda(void)
  {
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c

index 359e531094ddddb74a88de2dbb74e11dde27d528..ce432bca95d1f875db17233560d7d7f8cf2dbbfa 100644 (file)
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -52,12 +52,8 @@ static void *alloc_upcall(int opcode, int size)
          inp->ih.opcode = opcode;
         inp->ih.pid = current->pid;
         inp->ih.pgid = task_pgrp_nr(current);
-#ifdef CONFIG_CODA_FS_OLD_API
-       memset(&inp->ih.cred, 0, sizeof(struct coda_cred));
-       inp->ih.cred.cr_fsuid = current->fsuid;
-#else
         inp->ih.uid = current->fsuid;
-#endif
+
         return (void*)inp;
  }
  
@@ -166,20 +162,11 @@ int venus_close(struct super_block *sb, struct CodaFid *fid, int flags,
         union inputArgs *inp;
         union outputArgs *outp;
         int insize, outsize, error;
-#ifdef CONFIG_CODA_FS_OLD_API
-       struct coda_cred cred = { 0, };
-       cred.cr_fsuid = uid;
-#endif
         
         insize = SIZE(release);
         UPARG(CODA_CLOSE);
         
-#ifdef CONFIG_CODA_FS_OLD_API
-       memcpy(&(inp->ih.cred), &cred, sizeof(cred));
-#else
         inp->ih.uid = uid;
-#endif
-       
          inp->coda_close.VFid = *fid;
          inp->coda_close.flags = flags;
  
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c

index 18e2c548161d43661e10f99f7b8f412d0c0e523c..5235c67e7594141b289f008a77ee447de5faf8c3 100644 (file)
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -25,7 +25,6 @@
  #include <linux/slab.h>
  #include <linux/raid/md.h>
  #include <linux/kd.h>
-#include <linux/dirent.h>
  #include <linux/route.h>
  #include <linux/in6.h>
  #include <linux/ipv6_route.h>
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c

index 78878c5781cadc0d13846d8af6a993039d8b39e6..eba87ff3177b6070284ee3eea402fd34c610c657 100644 (file)
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -116,7 +116,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
         if (xop->callback == NULL)
                 wait_event(recv_wq, (op->done != 0));
         else {
-               rv = -EINPROGRESS;
+               rv = FILE_LOCK_DEFERRED;
                 goto out;
         }
  
diff --git a/fs/dquot.c b/fs/dquot.c

index 5ac77da19959f9f15792d16669d0b4110674cc3e..1346eebe74ce973e1b81edd788a58442089329af 100644 (file)
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -562,6 +562,8 @@ static struct shrinker dqcache_shrinker = {
   */
  static void dqput(struct dquot *dquot)
  {
+       int ret;
+
         if (!dquot)
                 return;
  #ifdef __DQUOT_PARANOIA
@@ -594,7 +596,19 @@ we_slept:
         if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) {
                 spin_unlock(&dq_list_lock);
                 /* Commit dquot before releasing */
-               dquot->dq_sb->dq_op->write_dquot(dquot);
+               ret = dquot->dq_sb->dq_op->write_dquot(dquot);
+               if (ret < 0) {
+                       printk(KERN_ERR "VFS: cannot write quota structure on "
+                               "device %s (error %d). Quota may get out of "
+                               "sync!\n", dquot->dq_sb->s_id, ret);
+                       /*
+                        * We clear dirty bit anyway, so that we avoid
+                        * infinite loop here
+                        */
+                       spin_lock(&dq_list_lock);
+                       clear_dquot_dirty(dquot);
+                       spin_unlock(&dq_list_lock);
+               }
                 goto we_slept;
         }
         /* Clear flag in case dquot was inactive (something bad happened) */
@@ -875,7 +889,10 @@ static void print_warning(struct dquot *dquot, const int warntype)
         char *msg = NULL;
         struct tty_struct *tty;
  
-       if (!need_print_warning(dquot))
+       if (warntype == QUOTA_NL_IHARDBELOW ||
+           warntype == QUOTA_NL_ISOFTBELOW ||
+           warntype == QUOTA_NL_BHARDBELOW ||
+           warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot))
                 return;
  
         mutex_lock(&tty_mutex);
@@ -1083,6 +1100,35 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
         return QUOTA_OK;
  }
  
+static int info_idq_free(struct dquot *dquot, ulong inodes)
+{
+       if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
+           dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit)
+               return QUOTA_NL_NOWARN;
+
+       if (dquot->dq_dqb.dqb_curinodes - inodes <= dquot->dq_dqb.dqb_isoftlimit)
+               return QUOTA_NL_ISOFTBELOW;
+       if (dquot->dq_dqb.dqb_curinodes >= dquot->dq_dqb.dqb_ihardlimit &&
+           dquot->dq_dqb.dqb_curinodes - inodes < dquot->dq_dqb.dqb_ihardlimit)
+               return QUOTA_NL_IHARDBELOW;
+       return QUOTA_NL_NOWARN;
+}
+
+static int info_bdq_free(struct dquot *dquot, qsize_t space)
+{
+       if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
+           toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit)
+               return QUOTA_NL_NOWARN;
+
+       if (toqb(dquot->dq_dqb.dqb_curspace - space) <=
+           dquot->dq_dqb.dqb_bsoftlimit)
+               return QUOTA_NL_BSOFTBELOW;
+       if (toqb(dquot->dq_dqb.dqb_curspace) >= dquot->dq_dqb.dqb_bhardlimit &&
+           toqb(dquot->dq_dqb.dqb_curspace - space) <
+                                               dquot->dq_dqb.dqb_bhardlimit)
+               return QUOTA_NL_BHARDBELOW;
+       return QUOTA_NL_NOWARN;
+}
  /*
   *     Initialize quota pointers in inode
   *     Transaction must be started at entry
@@ -1139,6 +1185,28 @@ int dquot_drop(struct inode *inode)
         return 0;
  }
  
+/* Wrapper to remove references to quota structures from inode */
+void vfs_dq_drop(struct inode *inode)
+{
+       /* Here we can get arbitrary inode from clear_inode() so we have
+        * to be careful. OTOH we don't need locking as quota operations
+        * are allowed to change only at mount time */
+       if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
+           && inode->i_sb->dq_op->drop) {
+               int cnt;
+               /* Test before calling to rule out calls from proc and such
+                 * where we are not allowed to block. Note that this is
+                * actually reliable test even without the lock - the caller
+                * must assure that nobody can come after the DQUOT_DROP and
+                * add quota pointers back anyway */
+               for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+                       if (inode->i_dquot[cnt] != NODQUOT)
+                               break;
+               if (cnt < MAXQUOTAS)
+                       inode->i_sb->dq_op->drop(inode);
+       }
+}
+
  /*
   * Following four functions update i_blocks+i_bytes fields and
   * quota information (together with appropriate checks)
@@ -1248,6 +1316,7 @@ warn_put_all:
  int dquot_free_space(struct inode *inode, qsize_t number)
  {
         unsigned int cnt;
+       char warntype[MAXQUOTAS];
  
         /* First test before acquiring mutex - solves deadlocks when we
           * re-enter the quota code and are already holding the mutex */
@@ -1256,6 +1325,7 @@ out_sub:
                 inode_sub_bytes(inode, number);
                 return QUOTA_OK;
         }
+
         down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
         /* Now recheck reliably when holding dqptr_sem */
         if (IS_NOQUOTA(inode)) {
@@ -1266,6 +1336,7 @@ out_sub:
         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                 if (inode->i_dquot[cnt] == NODQUOT)
                         continue;
+               warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
                 dquot_decr_space(inode->i_dquot[cnt], number);
         }
         inode_sub_bytes(inode, number);
@@ -1274,6 +1345,7 @@ out_sub:
         for (cnt = 0; cnt < MAXQUOTAS; cnt++)
                 if (inode->i_dquot[cnt])
                         mark_dquot_dirty(inode->i_dquot[cnt]);
+       flush_warnings(inode->i_dquot, warntype);
         up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
         return QUOTA_OK;
  }
@@ -1284,11 +1356,13 @@ out_sub:
  int dquot_free_inode(const struct inode *inode, unsigned long number)
  {
         unsigned int cnt;
+       char warntype[MAXQUOTAS];
  
         /* First test before acquiring mutex - solves deadlocks when we
           * re-enter the quota code and are already holding the mutex */
         if (IS_NOQUOTA(inode))
                 return QUOTA_OK;
+
         down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
         /* Now recheck reliably when holding dqptr_sem */
         if (IS_NOQUOTA(inode)) {
@@ -1299,6 +1373,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number)
         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                 if (inode->i_dquot[cnt] == NODQUOT)
                         continue;
+               warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number);
                 dquot_decr_inodes(inode->i_dquot[cnt], number);
         }
         spin_unlock(&dq_data_lock);
@@ -1306,6 +1381,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number)
         for (cnt = 0; cnt < MAXQUOTAS; cnt++)
                 if (inode->i_dquot[cnt])
                         mark_dquot_dirty(inode->i_dquot[cnt]);
+       flush_warnings(inode->i_dquot, warntype);
         up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
         return QUOTA_OK;
  }
@@ -1323,7 +1399,8 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
         struct dquot *transfer_to[MAXQUOTAS];
         int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid,
             chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid;
-       char warntype[MAXQUOTAS];
+       char warntype_to[MAXQUOTAS];
+       char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
  
         /* First test before acquiring mutex - solves deadlocks when we
           * re-enter the quota code and are already holding the mutex */
@@ -1332,7 +1409,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
         /* Clear the arrays */
         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                 transfer_to[cnt] = transfer_from[cnt] = NODQUOT;
-               warntype[cnt] = QUOTA_NL_NOWARN;
+               warntype_to[cnt] = QUOTA_NL_NOWARN;
         }
         down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
         /* Now recheck reliably when holding dqptr_sem */
@@ -1364,8 +1441,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
                 if (transfer_to[cnt] == NODQUOT)
                         continue;
                 transfer_from[cnt] = inode->i_dquot[cnt];
-               if (check_idq(transfer_to[cnt], 1, warntype+cnt) == NO_QUOTA ||
-                   check_bdq(transfer_to[cnt], space, 0, warntype+cnt) == NO_QUOTA)
+               if (check_idq(transfer_to[cnt], 1, warntype_to + cnt) ==
+                   NO_QUOTA || check_bdq(transfer_to[cnt], space, 0,
+                   warntype_to + cnt) == NO_QUOTA)
                         goto warn_put_all;
         }
  
@@ -1381,6 +1459,10 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
  
                 /* Due to IO error we might not have transfer_from[] structure */
                 if (transfer_from[cnt]) {
+                       warntype_from_inodes[cnt] =
+                               info_idq_free(transfer_from[cnt], 1);
+                       warntype_from_space[cnt] =
+                               info_bdq_free(transfer_from[cnt], space);
                         dquot_decr_inodes(transfer_from[cnt], 1);
                         dquot_decr_space(transfer_from[cnt], space);
                 }
@@ -1400,7 +1482,9 @@ warn_put_all:
                 if (transfer_to[cnt])
                         mark_dquot_dirty(transfer_to[cnt]);
         }
-       flush_warnings(transfer_to, warntype);
+       flush_warnings(transfer_to, warntype_to);
+       flush_warnings(transfer_from, warntype_from_inodes);
+       flush_warnings(transfer_from, warntype_from_space);
         
         for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
                 if (ret == QUOTA_OK && transfer_from[cnt] != NODQUOT)
@@ -1412,6 +1496,18 @@ warn_put_all:
         return ret;
  }
  
+/* Wrapper for transferring ownership of an inode */
+int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
+{
+       if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
+               vfs_dq_init(inode);
+               if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
+                       return 1;
+       }
+       return 0;
+}
+
+
  /*
   * Write info of quota file to disk
   */
@@ -1752,6 +1848,22 @@ out:
         return error;
  }
  
+/* Wrapper to turn on quotas when remounting rw */
+int vfs_dq_quota_on_remount(struct super_block *sb)
+{
+       int cnt;
+       int ret = 0, err;
+
+       if (!sb->s_qcop || !sb->s_qcop->quota_on)
+               return -ENOSYS;
+       for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+               err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1);
+               if (err < 0 && !ret)
+                       ret = err;
+       }
+       return ret;
+}
+
  /* Generic routine for getting common part of quota structure */
  static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di)
  {
@@ -2087,8 +2199,11 @@ EXPORT_SYMBOL(dquot_release);
  EXPORT_SYMBOL(dquot_mark_dquot_dirty);
  EXPORT_SYMBOL(dquot_initialize);
  EXPORT_SYMBOL(dquot_drop);
+EXPORT_SYMBOL(vfs_dq_drop);
  EXPORT_SYMBOL(dquot_alloc_space);
  EXPORT_SYMBOL(dquot_alloc_inode);
  EXPORT_SYMBOL(dquot_free_space);
  EXPORT_SYMBOL(dquot_free_inode);
  EXPORT_SYMBOL(dquot_transfer);
+EXPORT_SYMBOL(vfs_dq_transfer);
+EXPORT_SYMBOL(vfs_dq_quota_on_remount);
diff --git a/fs/exec.c b/fs/exec.c

index 190ed1f927740fd1dc9a3437a6081dd213be4801..5e559013e303fbcb6f9666828cca450ab57de7f9 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -25,19 +25,18 @@
  #include <linux/slab.h>
  #include <linux/file.h>
  #include <linux/fdtable.h>
-#include <linux/mman.h>
+#include <linux/mm.h>
  #include <linux/stat.h>
  #include <linux/fcntl.h>
  #include <linux/smp_lock.h>
+#include <linux/swap.h>
  #include <linux/string.h>
  #include <linux/init.h>
-#include <linux/pagemap.h>
  #include <linux/highmem.h>
  #include <linux/spinlock.h>
  #include <linux/key.h>
  #include <linux/personality.h>
  #include <linux/binfmts.h>
-#include <linux/swap.h>
  #include <linux/utsname.h>
  #include <linux/pid_namespace.h>
  #include <linux/module.h>
@@ -47,7 +46,6 @@
  #include <linux/mount.h>
  #include <linux/security.h>
  #include <linux/syscalls.h>
-#include <linux/rmap.h>
  #include <linux/tsacct_kern.h>
  #include <linux/cn_proc.h>
  #include <linux/audit.h>
@@ -724,12 +722,10 @@ static int exec_mmap(struct mm_struct *mm)
                  * Make sure that if there is a core dump in progress
                  * for the old mm, we get out and die instead of going
                  * through with the exec.  We must hold mmap_sem around
-                * checking core_waiters and changing tsk->mm.  The
-                * core-inducing thread will increment core_waiters for
-                * each thread whose ->mm == old_mm.
+                * checking core_state and changing tsk->mm.
                  */
                 down_read(&old_mm->mmap_sem);
-               if (unlikely(old_mm->core_waiters)) {
+               if (unlikely(old_mm->core_state)) {
                         up_read(&old_mm->mmap_sem);
                         return -EINTR;
                 }
@@ -1328,6 +1324,7 @@ int do_execve(char * filename,
         if (retval < 0)
                 goto out;
  
+       current->flags &= ~PF_KTHREAD;
         retval = search_binary_handler(bprm,regs);
         if (retval >= 0) {
                 /* execve success */
@@ -1382,17 +1379,14 @@ EXPORT_SYMBOL(set_binfmt);
   * name into corename, which must have space for at least
   * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
   */
-static int format_corename(char *corename, const char *pattern, long signr)
+static int format_corename(char *corename, int nr_threads, long signr)
  {
-       const char *pat_ptr = pattern;
+       const char *pat_ptr = core_pattern;
+       int ispipe = (*pat_ptr == '|');
         char *out_ptr = corename;
         char *const out_end = corename + CORENAME_MAX_SIZE;
         int rc;
         int pid_in_pattern = 0;
-       int ispipe = 0;
-
-       if (*pattern == '|')
-               ispipe = 1;
  
         /* Repeat as long as we have more pattern to process and more output
            space */
@@ -1493,7 +1487,7 @@ static int format_corename(char *corename, const char *pattern, long signr)
          * and core_uses_pid is set, then .%pid will be appended to
          * the filename. Do not do this for piped commands. */
         if (!ispipe && !pid_in_pattern
-            && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
+           && (core_uses_pid || nr_threads)) {
                 rc = snprintf(out_ptr, out_end - out_ptr,
                               ".%d", task_tgid_vnr(current));
                 if (rc > out_end - out_ptr)
@@ -1505,9 +1499,10 @@ out:
         return ispipe;
  }
  
-static void zap_process(struct task_struct *start)
+static int zap_process(struct task_struct *start)
  {
         struct task_struct *t;
+       int nr = 0;
  
         start->signal->flags = SIGNAL_GROUP_EXIT;
         start->signal->group_stop_count = 0;
@@ -1515,72 +1510,99 @@ static void zap_process(struct task_struct *start)
         t = start;
         do {
                 if (t != current && t->mm) {
-                       t->mm->core_waiters++;
                         sigaddset(&t->pending.signal, SIGKILL);
                         signal_wake_up(t, 1);
+                       nr++;
                 }
-       } while ((t = next_thread(t)) != start);
+       } while_each_thread(start, t);
+
+       return nr;
  }
  
  static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
-                               int exit_code)
+                               struct core_state *core_state, int exit_code)
  {
         struct task_struct *g, *p;
         unsigned long flags;
-       int err = -EAGAIN;
+       int nr = -EAGAIN;
  
         spin_lock_irq(&tsk->sighand->siglock);
         if (!signal_group_exit(tsk->signal)) {
+               mm->core_state = core_state;
                 tsk->signal->group_exit_code = exit_code;
-               zap_process(tsk);
-               err = 0;
+               nr = zap_process(tsk);
         }
         spin_unlock_irq(&tsk->sighand->siglock);
-       if (err)
-               return err;
+       if (unlikely(nr < 0))
+               return nr;
  
-       if (atomic_read(&mm->mm_users) == mm->core_waiters + 1)
+       if (atomic_read(&mm->mm_users) == nr + 1)
                 goto done;
-
+       /*
+        * We should find and kill all tasks which use this mm, and we should
+        * count them correctly into ->nr_threads. We don't take tasklist
+        * lock, but this is safe wrt:
+        *
+        * fork:
+        *      None of sub-threads can fork after zap_process(leader). All
+        *      processes which were created before this point should be
+        *      visible to zap_threads() because copy_process() adds the new
+        *      process to the tail of init_task.tasks list, and lock/unlock
+        *      of ->siglock provides a memory barrier.
+        *
+        * do_exit:
+        *      The caller holds mm->mmap_sem. This means that the task which
+        *      uses this mm can't pass exit_mm(), so it can't exit or clear
+        *      its ->mm.
+        *
+        * de_thread:
+        *      It does list_replace_rcu(&leader->tasks, &current->tasks),
+        *      we must see either old or new leader, this does not matter.
+        *      However, it can change p->sighand, so lock_task_sighand(p)
+        *      must be used. Since p->mm != NULL and we hold ->mmap_sem
+        *      it can't fail.
+        *
+        *      Note also that "g" can be the old leader with ->mm == NULL
+        *      and already unhashed and thus removed from ->thread_group.
+        *      This is OK, __unhash_process()->list_del_rcu() does not
+        *      clear the ->next pointer, we will find the new leader via
+        *      next_thread().
+        */
         rcu_read_lock();
         for_each_process(g) {
                 if (g == tsk->group_leader)
                         continue;
-
+               if (g->flags & PF_KTHREAD)
+                       continue;
                 p = g;
                 do {
                         if (p->mm) {
-                               if (p->mm == mm) {
-                                       /*
-                                        * p->sighand can't disappear, but
-                                        * may be changed by de_thread()
-                                        */
+                               if (unlikely(p->mm == mm)) {
                                         lock_task_sighand(p, &flags);
-                                       zap_process(p);
+                                       nr += zap_process(p);
                                         unlock_task_sighand(p, &flags);
                                 }
                                 break;
                         }
-               } while ((p = next_thread(p)) != g);
+               } while_each_thread(g, p);
         }
         rcu_read_unlock();
  done:
-       return mm->core_waiters;
+       atomic_set(&core_state->nr_threads, nr);
+       return nr;
  }
  
-static int coredump_wait(int exit_code)
+static int coredump_wait(int exit_code, struct core_state *core_state)
  {
         struct task_struct *tsk = current;
         struct mm_struct *mm = tsk->mm;
-       struct completion startup_done;
         struct completion *vfork_done;
         int core_waiters;
  
-       init_completion(&mm->core_done);
-       init_completion(&startup_done);
-       mm->core_startup_done = &startup_done;
-
-       core_waiters = zap_threads(tsk, mm, exit_code);
+       init_completion(&core_state->startup);
+       core_state->dumper.task = tsk;
+       core_state->dumper.next = NULL;
+       core_waiters = zap_threads(tsk, mm, core_state, exit_code);
         up_write(&mm->mmap_sem);
  
         if (unlikely(core_waiters < 0))
@@ -1597,12 +1619,32 @@ static int coredump_wait(int exit_code)
         }
  
         if (core_waiters)
-               wait_for_completion(&startup_done);
+               wait_for_completion(&core_state->startup);
  fail:
-       BUG_ON(mm->core_waiters);
         return core_waiters;
  }
  
+static void coredump_finish(struct mm_struct *mm)
+{
+       struct core_thread *curr, *next;
+       struct task_struct *task;
+
+       next = mm->core_state->dumper.next;
+       while ((curr = next) != NULL) {
+               next = curr->next;
+               task = curr->task;
+               /*
+                * see exit_mm(), curr->task must not see
+                * ->task == NULL before we read ->next.
+                */
+               smp_mb();
+               curr->task = NULL;
+               wake_up_process(task);
+       }
+
+       mm->core_state = NULL;
+}
+
  /*
   * set_dumpable converts traditional three-value dumpable to two flags and
   * stores them into mm->flags.  It modifies lower two bits of mm->flags, but
@@ -1654,6 +1696,7 @@ int get_dumpable(struct mm_struct *mm)
  
  int do_coredump(long signr, int exit_code, struct pt_regs * regs)
  {
+       struct core_state core_state;
         char corename[CORENAME_MAX_SIZE + 1];
         struct mm_struct *mm = current->mm;
         struct linux_binfmt * binfmt;
@@ -1677,7 +1720,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
         /*
          * If another thread got here first, or we are not dumpable, bail out.
          */
-       if (mm->core_waiters || !get_dumpable(mm)) {
+       if (mm->core_state || !get_dumpable(mm)) {
                 up_write(&mm->mmap_sem);
                 goto fail;
         }
@@ -1692,7 +1735,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
                 current->fsuid = 0;     /* Dump root private */
         }
  
-       retval = coredump_wait(exit_code);
+       retval = coredump_wait(exit_code, &core_state);
         if (retval < 0)
                 goto fail;
  
@@ -1707,7 +1750,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
          * uses lock_kernel()
          */
         lock_kernel();
-       ispipe = format_corename(corename, core_pattern, signr);
+       ispipe = format_corename(corename, retval, signr);
         unlock_kernel();
         /*
          * Don't bother to check the RLIMIT_CORE value if core_pattern points
@@ -1786,7 +1829,7 @@ fail_unlock:
                 argv_free(helper_argv);
  
         current->fsuid = fsuid;
-       complete_all(&mm->core_done);
+       coredump_finish(mm);
  fail:
         return retval;
  }
diff --git a/fs/ext2/super.c b/fs/ext2/super.c

index ef50cbc792db94668b9d7deecc22c50076511fe4..31308a3b0b8b906d7f6c0434a55decda685c0cbc 100644 (file)
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -31,6 +31,7 @@
  #include <linux/seq_file.h>
  #include <linux/mount.h>
  #include <linux/log2.h>
+#include <linux/quotaops.h>
  #include <asm/uaccess.h>
  #include "ext2.h"
  #include "xattr.h"
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c

index eaa23d2d5213ae7dcfa70875ebddf40cc7774ce4..70c0dbdcdcb75447cb73602151c2166e0805bb57 100644 (file)
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -14,7 +14,7 @@ static size_t
  ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
                          const char *name, size_t name_len)
  {
-       const int prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
+       const int prefix_len = XATTR_SECURITY_PREFIX_LEN;
         const size_t total_len = prefix_len + name_len + 1;
  
         if (list && total_len <= list_size) {
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c

index 83ee149f353db383d657e01a5feb8cd0fac82655..e8219f8eae9f293bff015a07c1dd11cccc00febf 100644 (file)
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -12,13 +12,11 @@
  #include <linux/ext2_fs.h>
  #include "xattr.h"
  
-#define XATTR_TRUSTED_PREFIX "trusted."
-
  static size_t
  ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
                         const char *name, size_t name_len)
  {
-       const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1;
+       const int prefix_len = XATTR_TRUSTED_PREFIX_LEN;
         const size_t total_len = prefix_len + name_len + 1;
  
         if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c

index f383e7c3a7b5bd18b8ca0bd43026c12ed689ce41..92495d28c62f0b43da553efba59e482d16d388bc 100644 (file)
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -11,13 +11,11 @@
  #include "ext2.h"
  #include "xattr.h"
  
-#define XATTR_USER_PREFIX "user."
-
  static size_t
  ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
                      const char *name, size_t name_len)
  {
-       const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1;
+       const size_t prefix_len = XATTR_USER_PREFIX_LEN;
         const size_t total_len = prefix_len + name_len + 1;
  
         if (!test_opt(inode->i_sb, XATTR_USER))
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c

index 8ca3bfd7242743dba5d05f5afd06e3cdb549718d..2eea96ec78ed218f4fbe14b8e4c1b169e1dfe1ac 100644 (file)
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -272,7 +272,7 @@ static void free_rb_tree_fname(struct rb_root *root)
  
         while (n) {
                 /* Do the node's children first */
-               if ((n)->rb_left) {
+               if (n->rb_left) {
                         n = n->rb_left;
                         continue;
                 }
@@ -301,24 +301,18 @@ static void free_rb_tree_fname(struct rb_root *root)
                         parent->rb_right = NULL;
                 n = parent;
         }
-       root->rb_node = NULL;
  }
  
  
-static struct dir_private_info *create_dir_info(loff_t pos)
+static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos)
  {
         struct dir_private_info *p;
  
-       p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL);
+       p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
         if (!p)
                 return NULL;
-       p->root.rb_node = NULL;
-       p->curr_node = NULL;
-       p->extra_fname = NULL;
-       p->last_pos = 0;
         p->curr_hash = pos2maj_hash(pos);
         p->curr_minor_hash = pos2min_hash(pos);
-       p->next_hash = 0;
         return p;
  }
  
@@ -433,7 +427,7 @@ static int ext3_dx_readdir(struct file * filp,
         int     ret;
  
         if (!info) {
-               info = create_dir_info(filp->f_pos);
+               info = ext3_htree_create_dir_info(filp->f_pos);
                 if (!info)
                         return -ENOMEM;
                 filp->private_data = info;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c

index 77126821b2e9de219c0e1f36caea9c263e6f7c2f..47b678d73e7a53d28ef1d0d35eae758bd87e54d7 100644 (file)
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -669,6 +669,14 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
         if (IS_ERR(inode))
                 goto iget_failed;
  
+       /*
+        * If the orphans has i_nlinks > 0 then it should be able to be
+        * truncated, otherwise it won't be removed from the orphan list
+        * during processing and an infinite loop will result.
+        */
+       if (inode->i_nlink && !ext3_can_truncate(inode))
+               goto bad_orphan;
+
         if (NEXT_ORPHAN(inode) > max_ino)
                 goto bad_orphan;
         brelse(bitmap_bh);
@@ -690,6 +698,7 @@ bad_orphan:
                 printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
                        NEXT_ORPHAN(inode));
                 printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
+               printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
                 /* Avoid freeing blocks if we got a bad deleted inode */
                 if (inode->i_nlink == 0)
                         inode->i_blocks = 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c

index 6ae4ecf3ce40b646da679ba6b2a1ecb6149e7e00..3bf07d70b914063b043342710833f5745456915e 100644 (file)
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2127,7 +2127,21 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
  
         if (this_bh) {
                 BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
-               ext3_journal_dirty_metadata(handle, this_bh);
+
+               /*
+                * The buffer head should have an attached journal head at this
+                * point. However, if the data is corrupted and an indirect
+                * block pointed to itself, it would have been detached when
+                * the block was cleared. Check for this instead of OOPSing.
+                */
+               if (bh2jh(this_bh))
+                       ext3_journal_dirty_metadata(handle, this_bh);
+               else
+                       ext3_error(inode->i_sb, "ext3_free_data",
+                                  "circular indirect block detected, "
+                                  "inode=%lu, block=%llu",
+                                  inode->i_ino,
+                                  (unsigned long long)this_bh->b_blocknr);
         }
  }
  
@@ -2253,6 +2267,19 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
         }
  }
  
+int ext3_can_truncate(struct inode *inode)
+{
+       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+               return 0;
+       if (S_ISREG(inode->i_mode))
+               return 1;
+       if (S_ISDIR(inode->i_mode))
+               return 1;
+       if (S_ISLNK(inode->i_mode))
+               return !ext3_inode_is_fast_symlink(inode);
+       return 0;
+}
+
  /*
   * ext3_truncate()
   *
@@ -2297,12 +2324,7 @@ void ext3_truncate(struct inode *inode)
         unsigned blocksize = inode->i_sb->s_blocksize;
         struct page *page;
  
-       if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-           S_ISLNK(inode->i_mode)))
-               return;
-       if (ext3_inode_is_fast_symlink(inode))
-               return;
-       if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+       if (!ext3_can_truncate(inode))
                 return;
  
         /*
@@ -2513,6 +2535,16 @@ static int __ext3_get_inode_loc(struct inode *inode,
         }
         if (!buffer_uptodate(bh)) {
                 lock_buffer(bh);
+
+               /*
+                * If the buffer has the write error flag, we have failed
+                * to write out another inode in the same block.  In this
+                * case, we don't have to read the block because we may
+                * read the old inode data successfully.
+                */
+               if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
+                       set_buffer_uptodate(bh);
+
                 if (buffer_uptodate(bh)) {
                         /* someone brought it uptodate while we waited */
                         unlock_buffer(bh);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c

index 0b8cf80154f1a5c912a1b2d6cae18ccebea591c1..de13e919cd8106fac1012c162699cc5604da90f4 100644 (file)
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -240,13 +240,13 @@ static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
  {
         unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
                 EXT3_DIR_REC_LEN(2) - infosize;
-       return 0? 20: entry_space / sizeof(struct dx_entry);
+       return entry_space / sizeof(struct dx_entry);
  }
  
  static inline unsigned dx_node_limit (struct inode *dir)
  {
         unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
-       return 0? 22: entry_space / sizeof(struct dx_entry);
+       return entry_space / sizeof(struct dx_entry);
  }
  
  /*
@@ -991,19 +991,21 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
                 de = (struct ext3_dir_entry_2 *) bh->b_data;
                 top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
                                        EXT3_DIR_REC_LEN(0));
-               for (; de < top; de = ext3_next_entry(de))
-               if (ext3_match (namelen, name, de)) {
-                       if (!ext3_check_dir_entry("ext3_find_entry",
-                                                 dir, de, bh,
-                                 (block<<EXT3_BLOCK_SIZE_BITS(sb))
-                                         +((char *)de - bh->b_data))) {
-                               brelse (bh);
+               for (; de < top; de = ext3_next_entry(de)) {
+                       int off = (block << EXT3_BLOCK_SIZE_BITS(sb))
+                                 + ((char *) de - bh->b_data);
+
+                       if (!ext3_check_dir_entry(__func__, dir, de, bh, off)) {
+                               brelse(bh);
                                 *err = ERR_BAD_DX_DIR;
                                 goto errout;
                         }
-                       *res_dir = de;
-                       dx_release (frames);
-                       return bh;
+
+                       if (ext3_match(namelen, name, de)) {
+                               *res_dir = de;
+                               dx_release(frames);
+                               return bh;
+                       }
                 }
                 brelse (bh);
                 /* Check to see if we should continue to search */
diff --git a/fs/ext3/super.c b/fs/ext3/super.c

index 2845425077e857c85ac7413a9613786d0dd0ac25..615788c6843a5f8cb7c327ece40ea10f53bf5bb8 100644 (file)
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -842,7 +842,7 @@ static int parse_options (char *options, struct super_block *sb,
         int data_opt = 0;
         int option;
  #ifdef CONFIG_QUOTA
-       int qtype;
+       int qtype, qfmt;
         char *qname;
  #endif
  
@@ -1018,9 +1018,11 @@ static int parse_options (char *options, struct super_block *sb,
                 case Opt_grpjquota:
                         qtype = GRPQUOTA;
  set_qf_name:
-                       if (sb_any_quota_enabled(sb)) {
+                       if ((sb_any_quota_enabled(sb) ||
+                            sb_any_quota_suspended(sb)) &&
+                           !sbi->s_qf_names[qtype]) {
                                 printk(KERN_ERR
-                                       "EXT3-fs: Cannot change journalled "
+                                       "EXT3-fs: Cannot change journaled "
                                         "quota options when quota turned on.\n");
                                 return 0;
                         }
@@ -1056,9 +1058,11 @@ set_qf_name:
                 case Opt_offgrpjquota:
                         qtype = GRPQUOTA;
  clear_qf_name:
-                       if (sb_any_quota_enabled(sb)) {
+                       if ((sb_any_quota_enabled(sb) ||
+                            sb_any_quota_suspended(sb)) &&
+                           sbi->s_qf_names[qtype]) {
                                 printk(KERN_ERR "EXT3-fs: Cannot change "
-                                       "journalled quota options when "
+                                       "journaled quota options when "
                                         "quota turned on.\n");
                                 return 0;
                         }
@@ -1069,10 +1073,20 @@ clear_qf_name:
                         sbi->s_qf_names[qtype] = NULL;
                         break;
                 case Opt_jqfmt_vfsold:
-                       sbi->s_jquota_fmt = QFMT_VFS_OLD;
-                       break;
+                       qfmt = QFMT_VFS_OLD;
+                       goto set_qf_format;
                 case Opt_jqfmt_vfsv0:
-                       sbi->s_jquota_fmt = QFMT_VFS_V0;
+                       qfmt = QFMT_VFS_V0;
+set_qf_format:
+                       if ((sb_any_quota_enabled(sb) ||
+                            sb_any_quota_suspended(sb)) &&
+                           sbi->s_jquota_fmt != qfmt) {
+                               printk(KERN_ERR "EXT3-fs: Cannot change "
+                                       "journaled quota options when "
+                                       "quota turned on.\n");
+                               return 0;
+                       }
+                       sbi->s_jquota_fmt = qfmt;
                         break;
                 case Opt_quota:
                 case Opt_usrquota:
@@ -1084,7 +1098,8 @@ clear_qf_name:
                         set_opt(sbi->s_mount_opt, GRPQUOTA);
                         break;
                 case Opt_noquota:
-                       if (sb_any_quota_enabled(sb)) {
+                       if (sb_any_quota_enabled(sb) ||
+                           sb_any_quota_suspended(sb)) {
                                 printk(KERN_ERR "EXT3-fs: Cannot change quota "
                                         "options when quota turned on.\n");
                                 return 0;
@@ -1169,14 +1184,14 @@ clear_qf_name:
                 }
  
                 if (!sbi->s_jquota_fmt) {
-                       printk(KERN_ERR "EXT3-fs: journalled quota format "
+                       printk(KERN_ERR "EXT3-fs: journaled quota format "
                                         "not specified.\n");
                         return 0;
                 }
         } else {
                 if (sbi->s_jquota_fmt) {
-                       printk(KERN_ERR "EXT3-fs: journalled quota format "
-                                       "specified with no journalling "
+                       printk(KERN_ERR "EXT3-fs: journaled quota format "
+                                       "specified with no journaling "
                                         "enabled.\n");
                         return 0;
                 }
@@ -1370,7 +1385,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
                         int ret = ext3_quota_on_mount(sb, i);
                         if (ret < 0)
                                 printk(KERN_ERR
-                                       "EXT3-fs: Cannot turn on journalled "
+                                       "EXT3-fs: Cannot turn on journaled "
                                         "quota: error %d\n", ret);
                 }
         }
@@ -2712,7 +2727,7 @@ static int ext3_release_dquot(struct dquot *dquot)
  
  static int ext3_mark_dquot_dirty(struct dquot *dquot)
  {
-       /* Are we journalling quotas? */
+       /* Are we journaling quotas? */
         if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
             EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
                 dquot_mark_dquot_dirty(dquot);
@@ -2759,23 +2774,42 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
  
         if (!test_opt(sb, QUOTA))
                 return -EINVAL;
-       /* Not journalling quota or remount? */
-       if ((!EXT3_SB(sb)->s_qf_names[USRQUOTA] &&
-           !EXT3_SB(sb)->s_qf_names[GRPQUOTA]) || remount)
+       /* When remounting, no checks are needed and in fact, path is NULL */
+       if (remount)
                 return vfs_quota_on(sb, type, format_id, path, remount);
+
         err = path_lookup(path, LOOKUP_FOLLOW, &nd);
         if (err)
                 return err;
+
         /* Quotafile not on the same filesystem? */
         if (nd.path.mnt->mnt_sb != sb) {
                 path_put(&nd.path);
                 return -EXDEV;
         }
-       /* Quotafile not in fs root? */
-       if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
-               printk(KERN_WARNING
-                       "EXT3-fs: Quota file not on filesystem root. "
-                       "Journalled quota will not work.\n");
+       /* Journaling quota? */
+       if (EXT3_SB(sb)->s_qf_names[type]) {
+               /* Quotafile not of fs root? */
+               if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
+                       printk(KERN_WARNING
+                               "EXT3-fs: Quota file not on filesystem root. "
+                               "Journaled quota will not work.\n");
+       }
+
+       /*
+        * When we journal data on quota file, we have to flush journal to see
+        * all updates to the file when we bypass pagecache...
+        */
+       if (ext3_should_journal_data(nd.path.dentry->d_inode)) {
+               /*
+                * We don't need to lock updates but journal_flush() could
+                * otherwise be livelocked...
+                */
+               journal_lock_updates(EXT3_SB(sb)->s_journal);
+               journal_flush(EXT3_SB(sb)->s_journal);
+               journal_unlock_updates(EXT3_SB(sb)->s_journal);
+       }
+
         path_put(&nd.path);
         return vfs_quota_on(sb, type, format_id, path, remount);
  }
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c

index 821efaf2b94e55b7556903d2835493c5ec5a9252..37b81097bdf2a147b6bd2e7720e9ae8efe8170a7 100644 (file)
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -15,7 +15,7 @@ static size_t
  ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
                          const char *name, size_t name_len)
  {
-       const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
+       const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
         const size_t total_len = prefix_len + name_len + 1;
  
  
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c

index 0327497a55ce0a58baa9ce2d855cd7ecad762625..c7c41a410c4bfed2761ade463b9c34957f4bf5f5 100644 (file)
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -13,13 +13,11 @@
  #include <linux/ext3_fs.h>
  #include "xattr.h"
  
-#define XATTR_TRUSTED_PREFIX "trusted."
-
  static size_t
  ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
                         const char *name, size_t name_len)
  {
-       const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1;
+       const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
         const size_t total_len = prefix_len + name_len + 1;
  
         if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c

index 1abd8f92c4402a86a992e10e8e3261fd635eea10..430fe63b31b397d37370758ac119eeb2139469bd 100644 (file)
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -12,13 +12,11 @@
  #include <linux/ext3_fs.h>
  #include "xattr.h"
  
-#define XATTR_USER_PREFIX "user."
-
  static size_t
  ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
                      const char *name, size_t name_len)
  {
-       const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1;
+       const size_t prefix_len = XATTR_USER_PREFIX_LEN;
         const size_t total_len = prefix_len + name_len + 1;
  
         if (!test_opt(inode->i_sb, XATTR_USER))
diff --git a/fs/fat/dir.c b/fs/fat/dir.c

index 34541d06e6263b1817d96ee4d640f2cc8aa35eb7..cd4a0162e10d6dea7507507cf25a13fd93d93f04 100644 (file)
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -17,7 +17,6 @@
  #include <linux/slab.h>
  #include <linux/time.h>
  #include <linux/msdos_fs.h>
-#include <linux/dirent.h>
  #include <linux/smp_lock.h>
  #include <linux/buffer_head.h>
  #include <linux/compat.h>
@@ -124,10 +123,11 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos,
   * but ignore that right now.
   * Ahem... Stack smashing in ring 0 isn't fun. Fixed.
   */
-static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len,
+static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len,
                        int uni_xlate, struct nls_table *nls)
  {
-       wchar_t *ip, ec;
+       const wchar_t *ip;
+       wchar_t ec;
         unsigned char *op, nc;
         int charlen;
         int k;
@@ -167,6 +167,16 @@ static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len,
         return (op - ascii);
  }
  
+static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni,
+                               unsigned char *buf, int size)
+{
+       if (sbi->options.utf8)
+               return utf8_wcstombs(buf, uni, size);
+       else
+               return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate,
+                                  sbi->nls_io);
+}
+
  static inline int
  fat_short2uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *uni)
  {
@@ -227,6 +237,19 @@ fat_shortname2uni(struct nls_table *nls, unsigned char *buf, int buf_size,
         return len;
  }
  
+static inline int fat_name_match(struct msdos_sb_info *sbi,
+                                const unsigned char *a, int a_len,
+                                const unsigned char *b, int b_len)
+{
+       if (a_len != b_len)
+               return 0;
+
+       if (sbi->options.name_check != 's')
+               return !nls_strnicmp(sbi->nls_io, a, b, a_len);
+       else
+               return !memcmp(a, b, a_len);
+}
+
  enum { PARSE_INVALID = 1, PARSE_NOT_LONGNAME, PARSE_EOF, };
  
  /**
@@ -301,6 +324,19 @@ parse_long:
         return 0;
  }
  
+/*
+ * Maximum buffer size of short name.
+ * [(MSDOS_NAME + '.') * max one char + nul]
+ * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul]
+ */
+#define FAT_MAX_SHORT_SIZE     ((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1)
+/*
+ * Maximum buffer size of unicode chars from slots.
+ * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)]
+ */
+#define FAT_MAX_UNI_CHARS      ((MSDOS_SLOTS - 1) * 13 + 1)
+#define FAT_MAX_UNI_SIZE       (FAT_MAX_UNI_CHARS * sizeof(wchar_t))
+
  /*
   * Return values: negative -> error, 0 -> not found, positive -> found,
   * value is the total amount of slots, including the shortname entry.
@@ -312,29 +348,20 @@ int fat_search_long(struct inode *inode, const unsigned char *name,
         struct msdos_sb_info *sbi = MSDOS_SB(sb);
         struct buffer_head *bh = NULL;
         struct msdos_dir_entry *de;
-       struct nls_table *nls_io = sbi->nls_io;
         struct nls_table *nls_disk = sbi->nls_disk;
-       wchar_t bufuname[14];
         unsigned char nr_slots;
-       int xlate_len;
+       wchar_t bufuname[14];
         wchar_t *unicode = NULL;
         unsigned char work[MSDOS_NAME];
-       unsigned char *bufname = NULL;
-       int uni_xlate = sbi->options.unicode_xlate;
-       int utf8 = sbi->options.utf8;
-       int anycase = (sbi->options.name_check != 's');
+       unsigned char bufname[FAT_MAX_SHORT_SIZE];
         unsigned short opt_shortname = sbi->options.shortname;
         loff_t cpos = 0;
-       int chl, i, j, last_u, err;
-
-       bufname = __getname();
-       if (!bufname)
-               return -ENOMEM;
+       int chl, i, j, last_u, err, len;
  
         err = -ENOENT;
-       while(1) {
+       while (1) {
                 if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
-                       goto EODir;
+                       goto end_of_dir;
  parse_record:
                 nr_slots = 0;
                 if (de->name[0] == DELETED_FLAG)
@@ -353,7 +380,7 @@ parse_record:
                         else if (status == PARSE_NOT_LONGNAME)
                                 goto parse_record;
                         else if (status == PARSE_EOF)
-                               goto EODir;
+                               goto end_of_dir;
                 }
  
                 memcpy(work, de->name, sizeof(de->name));
@@ -394,30 +421,24 @@ parse_record:
                 if (!last_u)
                         continue;
  
+               /* Compare shortname */
                 bufuname[last_u] = 0x0000;
-               xlate_len = utf8
-                       ?utf8_wcstombs(bufname, bufuname, PATH_MAX)
-                       :uni16_to_x8(bufname, bufuname, PATH_MAX, uni_xlate, nls_io);
-               if (xlate_len == name_len)
-                       if ((!anycase && !memcmp(name, bufname, xlate_len)) ||
-                           (anycase && !nls_strnicmp(nls_io, name, bufname,
-                                                               xlate_len)))
-                               goto Found;
+               len = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
+               if (fat_name_match(sbi, name, name_len, bufname, len))
+                       goto found;
  
                 if (nr_slots) {
-                       xlate_len = utf8
-                               ?utf8_wcstombs(bufname, unicode, PATH_MAX)
-                               :uni16_to_x8(bufname, unicode, PATH_MAX, uni_xlate, nls_io);
-                       if (xlate_len != name_len)
-                               continue;
-                       if ((!anycase && !memcmp(name, bufname, xlate_len)) ||
-                           (anycase && !nls_strnicmp(nls_io, name, bufname,
-                                                               xlate_len)))
-                               goto Found;
+                       void *longname = unicode + FAT_MAX_UNI_CHARS;
+                       int size = PATH_MAX - FAT_MAX_UNI_SIZE;
+
+                       /* Compare longname */
+                       len = fat_uni_to_x8(sbi, unicode, longname, size);
+                       if (fat_name_match(sbi, name, name_len, longname, len))
+                               goto found;
                 }
         }
  
-Found:
+found:
         nr_slots++;     /* include the de */
         sinfo->slot_off = cpos - nr_slots * sizeof(*de);
         sinfo->nr_slots = nr_slots;
@@ -425,9 +446,7 @@ Found:
         sinfo->bh = bh;
         sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de);
         err = 0;
-EODir:
-       if (bufname)
-               __putname(bufname);
+end_of_dir:
         if (unicode)
                 __putname(unicode);
  
@@ -453,23 +472,20 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
         struct msdos_sb_info *sbi = MSDOS_SB(sb);
         struct buffer_head *bh;
         struct msdos_dir_entry *de;
-       struct nls_table *nls_io = sbi->nls_io;
         struct nls_table *nls_disk = sbi->nls_disk;
-       unsigned char long_slots;
-       const char *fill_name;
-       int fill_len;
+       unsigned char nr_slots;
         wchar_t bufuname[14];
         wchar_t *unicode = NULL;
-       unsigned char c, work[MSDOS_NAME], bufname[56], *ptname = bufname;
-       unsigned long lpos, dummy, *furrfu = &lpos;
-       int uni_xlate = sbi->options.unicode_xlate;
+       unsigned char c, work[MSDOS_NAME];
+       unsigned char bufname[FAT_MAX_SHORT_SIZE], *ptname = bufname;
+       unsigned short opt_shortname = sbi->options.shortname;
         int isvfat = sbi->options.isvfat;
-       int utf8 = sbi->options.utf8;
         int nocase = sbi->options.nocase;
-       unsigned short opt_shortname = sbi->options.shortname;
+       const char *fill_name = NULL;
         unsigned long inum;
-       int chi, chl, i, i2, j, last, last_u, dotoffset = 0;
+       unsigned long lpos, dummy, *furrfu = &lpos;
         loff_t cpos;
+       int chi, chl, i, i2, j, last, last_u, dotoffset = 0, fill_len = 0;
         int ret = 0;
  
         lock_super(sb);
@@ -489,43 +505,58 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
                         cpos = 0;
                 }
         }
-       if (cpos & (sizeof(struct msdos_dir_entry)-1)) {
+       if (cpos & (sizeof(struct msdos_dir_entry) - 1)) {
                 ret = -ENOENT;
                 goto out;
         }
  
         bh = NULL;
-GetNew:
+get_new:
         if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
-               goto EODir;
+               goto end_of_dir;
  parse_record:
-       long_slots = 0;
-       /* Check for long filename entry */
-       if (isvfat) {
+       nr_slots = 0;
+       /*
+        * Check for long filename entry, but if short_only, we don't
+        * need to parse long filename.
+        */
+       if (isvfat && !short_only) {
                 if (de->name[0] == DELETED_FLAG)
-                       goto RecEnd;
+                       goto record_end;
                 if (de->attr != ATTR_EXT && (de->attr & ATTR_VOLUME))
-                       goto RecEnd;
+                       goto record_end;
                 if (de->attr != ATTR_EXT && IS_FREE(de->name))
-                       goto RecEnd;
+                       goto record_end;
         } else {
                 if ((de->attr & ATTR_VOLUME) || IS_FREE(de->name))
-                       goto RecEnd;
+                       goto record_end;
         }
  
         if (isvfat && de->attr == ATTR_EXT) {
                 int status = fat_parse_long(inode, &cpos, &bh, &de,
-                                           &unicode, &long_slots);
+                                           &unicode, &nr_slots);
                 if (status < 0) {
                         filp->f_pos = cpos;
                         ret = status;
                         goto out;
                 } else if (status == PARSE_INVALID)
-                       goto RecEnd;
+                       goto record_end;
                 else if (status == PARSE_NOT_LONGNAME)
                         goto parse_record;
                 else if (status == PARSE_EOF)
-                       goto EODir;
+                       goto end_of_dir;
+
+               if (nr_slots) {
+                       void *longname = unicode + FAT_MAX_UNI_CHARS;
+                       int size = PATH_MAX - FAT_MAX_UNI_SIZE;
+                       int len = fat_uni_to_x8(sbi, unicode, longname, size);
+
+                       fill_name = longname;
+                       fill_len = len;
+                       /* !both && !short_only, so we don't need shortname. */
+                       if (!both)
+                               goto start_filldir;
+               }
         }
  
         if (sbi->options.dotsOK) {
@@ -587,12 +618,32 @@ parse_record:
                 }
         }
         if (!last)
-               goto RecEnd;
+               goto record_end;
  
         i = last + dotoffset;
         j = last_u;
  
-       lpos = cpos - (long_slots+1)*sizeof(struct msdos_dir_entry);
+       if (isvfat) {
+               bufuname[j] = 0x0000;
+               i = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
+       }
+       if (nr_slots) {
+               /* hack for fat_ioctl_filldir() */
+               struct fat_ioctl_filldir_callback *p = dirent;
+
+               p->longname = fill_name;
+               p->long_len = fill_len;
+               p->shortname = bufname;
+               p->short_len = i;
+               fill_name = NULL;
+               fill_len = 0;
+       } else {
+               fill_name = bufname;
+               fill_len = i;
+       }
+
+start_filldir:
+       lpos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry);
         if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME))
                 inum = inode->i_ino;
         else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) {
@@ -607,49 +658,17 @@ parse_record:
                         inum = iunique(sb, MSDOS_ROOT_INO);
         }
  
-       if (isvfat) {
-               bufuname[j] = 0x0000;
-               i = utf8 ? utf8_wcstombs(bufname, bufuname, sizeof(bufname))
-                        : uni16_to_x8(bufname, bufuname, sizeof(bufname), uni_xlate, nls_io);
-       }
-
-       fill_name = bufname;
-       fill_len = i;
-       if (!short_only && long_slots) {
-               /* convert the unicode long name. 261 is maximum size
-                * of unicode buffer. (13 * slots + nul) */
-               void *longname = unicode + 261;
-               int buf_size = PATH_MAX - (261 * sizeof(unicode[0]));
-               int long_len = utf8
-                       ? utf8_wcstombs(longname, unicode, buf_size)
-                       : uni16_to_x8(longname, unicode, buf_size, uni_xlate, nls_io);
-
-               if (!both) {
-                       fill_name = longname;
-                       fill_len = long_len;
-               } else {
-                       /* hack for fat_ioctl_filldir() */
-                       struct fat_ioctl_filldir_callback *p = dirent;
-
-                       p->longname = longname;
-                       p->long_len = long_len;
-                       p->shortname = bufname;
-                       p->short_len = i;
-                       fill_name = NULL;
-                       fill_len = 0;
-               }
-       }
         if (filldir(dirent, fill_name, fill_len, *furrfu, inum,
                     (de->attr & ATTR_DIR) ? DT_DIR : DT_REG) < 0)
-               goto FillFailed;
+               goto fill_failed;
  
-RecEnd:
+record_end:
         furrfu = &lpos;
         filp->f_pos = cpos;
-       goto GetNew;
-EODir:
+       goto get_new;
+end_of_dir:
         filp->f_pos = cpos;
-FillFailed:
+fill_failed:
         brelse(bh);
         if (unicode)
                 __putname(unicode);
@@ -715,7 +734,7 @@ efault:                                                                        \
         return -EFAULT;                                                    \
  }
  
-FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, dirent)
+FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, __fat_dirent)
  
  static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
                              void __user *dirent, filldir_t filldir,
@@ -741,7 +760,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
  static int fat_dir_ioctl(struct inode *inode, struct file *filp,
                          unsigned int cmd, unsigned long arg)
  {
-       struct dirent __user *d1 = (struct dirent __user *)arg;
+       struct __fat_dirent __user *d1 = (struct __fat_dirent __user *)arg;
         int short_only, both;
  
         switch (cmd) {
@@ -757,7 +776,7 @@ static int fat_dir_ioctl(struct inode *inode, struct file *filp,
                 return fat_generic_ioctl(inode, filp, cmd, arg);
         }
  
-       if (!access_ok(VERIFY_WRITE, d1, sizeof(struct dirent[2])))
+       if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2])))
                 return -EFAULT;
         /*
          * Yes, we don't need this put_user() absolutely. However old
@@ -1082,7 +1101,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
                 goto error_free;
         }
  
-       fat_date_unix2dos(ts->tv_sec, &time, &date);
+       fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
  
         de = (struct msdos_dir_entry *)bhs[0]->b_data;
         /* filling the new directory slots ("." and ".." entries) */
diff --git a/fs/fat/inode.c b/fs/fat/inode.c

index 46a4508ffd2eba5cbf10be8122d60f721883f934..23676f9d79ce294d4bbe35931f44044fce709606 100644 (file)
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -382,17 +382,20 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
         inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
                            & ~((loff_t)sbi->cluster_size - 1)) >> 9;
         inode->i_mtime.tv_sec =
-               date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date));
+               date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date),
+                             sbi->options.tz_utc);
         inode->i_mtime.tv_nsec = 0;
         if (sbi->options.isvfat) {
                 int secs = de->ctime_cs / 100;
                 int csecs = de->ctime_cs % 100;
                 inode->i_ctime.tv_sec  =
                         date_dos2unix(le16_to_cpu(de->ctime),
-                                     le16_to_cpu(de->cdate)) + secs;
+                                     le16_to_cpu(de->cdate),
+                                     sbi->options.tz_utc) + secs;
                 inode->i_ctime.tv_nsec = csecs * 10000000;
                 inode->i_atime.tv_sec =
-                       date_dos2unix(0, le16_to_cpu(de->adate));
+                       date_dos2unix(0, le16_to_cpu(de->adate),
+                                     sbi->options.tz_utc);
                 inode->i_atime.tv_nsec = 0;
         } else
                 inode->i_ctime = inode->i_atime = inode->i_mtime;
@@ -591,11 +594,14 @@ retry:
         raw_entry->attr = fat_attr(inode);
         raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart);
         raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16);
-       fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, &raw_entry->date);
+       fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time,
+                         &raw_entry->date, sbi->options.tz_utc);
         if (sbi->options.isvfat) {
                 __le16 atime;
-               fat_date_unix2dos(inode->i_ctime.tv_sec,&raw_entry->ctime,&raw_entry->cdate);
-               fat_date_unix2dos(inode->i_atime.tv_sec,&atime,&raw_entry->adate);
+               fat_date_unix2dos(inode->i_ctime.tv_sec, &raw_entry->ctime,
+                                 &raw_entry->cdate, sbi->options.tz_utc);
+               fat_date_unix2dos(inode->i_atime.tv_sec, &atime,
+                                 &raw_entry->adate, sbi->options.tz_utc);
                 raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 +
                         inode->i_ctime.tv_nsec / 10000000;
         }
@@ -836,6 +842,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
         }
         if (sbi->options.flush)
                 seq_puts(m, ",flush");
+       if (opts->tz_utc)
+               seq_puts(m, ",tz=UTC");
  
         return 0;
  }
@@ -848,7 +856,7 @@ enum {
         Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
         Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
         Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
-       Opt_obsolate, Opt_flush, Opt_err,
+       Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err,
  };
  
  static match_table_t fat_tokens = {
@@ -883,6 +891,7 @@ static match_table_t fat_tokens = {
         {Opt_obsolate, "cvf_options=%100s"},
         {Opt_obsolate, "posix"},
         {Opt_flush, "flush"},
+       {Opt_tz_utc, "tz=UTC"},
         {Opt_err, NULL},
  };
  static match_table_t msdos_tokens = {
@@ -947,10 +956,11 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
         opts->utf8 = opts->unicode_xlate = 0;
         opts->numtail = 1;
         opts->usefree = opts->nocase = 0;
+       opts->tz_utc = 0;
         *debug = 0;
  
         if (!options)
-               return 0;
+               goto out;
  
         while ((p = strsep(&options, ",")) != NULL) {
                 int token;
@@ -1036,6 +1046,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
                 case Opt_flush:
                         opts->flush = 1;
                         break;
+               case Opt_tz_utc:
+                       opts->tz_utc = 1;
+                       break;
  
                 /* msdos specific */
                 case Opt_dots:
@@ -1104,10 +1117,13 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
                         return -EINVAL;
                 }
         }
+
+out:
         /* UTF-8 doesn't provide FAT semantics */
         if (!strcmp(opts->iocharset, "utf8")) {
                 printk(KERN_ERR "FAT: utf8 is not a recommended IO charset"
-                      " for FAT filesystems, filesystem will be case sensitive!\n");
+                      " for FAT filesystems, filesystem will be "
+                      "case sensitive!\n");
         }
  
         /* If user doesn't specify allow_utime, it's initialized from dmask. */
diff --git a/fs/fat/misc.c b/fs/fat/misc.c

index 61f23511eacf41ec34649a84f1cc7e601ff61a68..79fb98ad36d4d2e2913c26bb5ab5b56358594eea 100644 (file)
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -142,7 +142,7 @@ static int day_n[] = {
  };
  
  /* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */
-int date_dos2unix(unsigned short time, unsigned short date)
+int date_dos2unix(unsigned short time, unsigned short date, int tz_utc)
  {
         int month, year, secs;
  
@@ -156,16 +156,18 @@ int date_dos2unix(unsigned short time, unsigned short date)
             ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 &&
             month < 2 ? 1 : 0)+3653);
                         /* days since 1.1.70 plus 80's leap day */
-       secs += sys_tz.tz_minuteswest*60;
+       if (!tz_utc)
+               secs += sys_tz.tz_minuteswest*60;
         return secs;
  }
  
  /* Convert linear UNIX date to a MS-DOS time/date pair. */
-void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date)
+void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, int tz_utc)
  {
         int day, year, nl_day, month;
  
-       unix_date -= sys_tz.tz_minuteswest*60;
+       if (!tz_utc)
+               unix_date -= sys_tz.tz_minuteswest*60;
  
         /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */
         if (unix_date < 315532800)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c

index 2060bf06b90625cf920aa08f87da07bd286a193f..51d0035ff07e3a59037324ef256eb6b849163deb 100644 (file)
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -97,7 +97,7 @@ void fuse_invalidate_attr(struct inode *inode)
   * timeout is unknown (unlink, rmdir, rename and in some cases
   * lookup)
   */
-static void fuse_invalidate_entry_cache(struct dentry *entry)
+void fuse_invalidate_entry_cache(struct dentry *entry)
  {
         fuse_dentry_settime(entry, 0);
  }
@@ -112,18 +112,16 @@ static void fuse_invalidate_entry(struct dentry *entry)
         fuse_invalidate_entry_cache(entry);
  }
  
-static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
-                            struct dentry *entry,
+static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_req *req,
+                            u64 nodeid, struct qstr *name,
                              struct fuse_entry_out *outarg)
  {
-       struct fuse_conn *fc = get_fuse_conn(dir);
-
         memset(outarg, 0, sizeof(struct fuse_entry_out));
         req->in.h.opcode = FUSE_LOOKUP;
-       req->in.h.nodeid = get_node_id(dir);
+       req->in.h.nodeid = nodeid;
         req->in.numargs = 1;
-       req->in.args[0].size = entry->d_name.len + 1;
-       req->in.args[0].value = entry->d_name.name;
+       req->in.args[0].size = name->len + 1;
+       req->in.args[0].value = name->name;
         req->out.numargs = 1;
         if (fc->minor < 9)
                 req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
@@ -189,7 +187,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
                 attr_version = fuse_get_attr_version(fc);
  
                 parent = dget_parent(entry);
-               fuse_lookup_init(req, parent->d_inode, entry, &outarg);
+               fuse_lookup_init(fc, req, get_node_id(parent->d_inode),
+                                &entry->d_name, &outarg);
                 request_send(fc, req);
                 dput(parent);
                 err = req->out.h.error;
@@ -225,7 +224,7 @@ static int invalid_nodeid(u64 nodeid)
         return !nodeid || nodeid == FUSE_ROOT_ID;
  }
  
-static struct dentry_operations fuse_dentry_operations = {
+struct dentry_operations fuse_dentry_operations = {
         .d_revalidate   = fuse_dentry_revalidate,
  };
  
@@ -239,85 +238,127 @@ int fuse_valid_type(int m)
   * Add a directory inode to a dentry, ensuring that no other dentry
   * refers to this inode.  Called with fc->inst_mutex.
   */
-static int fuse_d_add_directory(struct dentry *entry, struct inode *inode)
+static struct dentry *fuse_d_add_directory(struct dentry *entry,
+                                          struct inode *inode)
  {
         struct dentry *alias = d_find_alias(inode);
-       if (alias) {
+       if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) {
                 /* This tries to shrink the subtree below alias */
                 fuse_invalidate_entry(alias);
                 dput(alias);
                 if (!list_empty(&inode->i_dentry))
-                       return -EBUSY;
+                       return ERR_PTR(-EBUSY);
+       } else {
+               dput(alias);
         }
-       d_add(entry, inode);
-       return 0;
+       return d_splice_alias(inode, entry);
  }
  
-static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
-                                 struct nameidata *nd)
+int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
+                    struct fuse_entry_out *outarg, struct inode **inode)
  {
-       int err;
-       struct fuse_entry_out outarg;
-       struct inode *inode = NULL;
-       struct fuse_conn *fc = get_fuse_conn(dir);
+       struct fuse_conn *fc = get_fuse_conn_super(sb);
         struct fuse_req *req;
         struct fuse_req *forget_req;
         u64 attr_version;
+       int err;
  
-       if (entry->d_name.len > FUSE_NAME_MAX)
-               return ERR_PTR(-ENAMETOOLONG);
+       *inode = NULL;
+       err = -ENAMETOOLONG;
+       if (name->len > FUSE_NAME_MAX)
+               goto out;
  
         req = fuse_get_req(fc);
+       err = PTR_ERR(req);
         if (IS_ERR(req))
-               return ERR_CAST(req);
+               goto out;
  
         forget_req = fuse_get_req(fc);
+       err = PTR_ERR(forget_req);
         if (IS_ERR(forget_req)) {
                 fuse_put_request(fc, req);
-               return ERR_CAST(forget_req);
+               goto out;
         }
  
         attr_version = fuse_get_attr_version(fc);
  
-       fuse_lookup_init(req, dir, entry, &outarg);
+       fuse_lookup_init(fc, req, nodeid, name, outarg);
         request_send(fc, req);
         err = req->out.h.error;
         fuse_put_request(fc, req);
         /* Zero nodeid is same as -ENOENT, but with valid timeout */
-       if (!err && outarg.nodeid &&
-           (invalid_nodeid(outarg.nodeid) ||
-            !fuse_valid_type(outarg.attr.mode)))
-               err = -EIO;
-       if (!err && outarg.nodeid) {
-               inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
-                                 &outarg.attr, entry_attr_timeout(&outarg),
-                                 attr_version);
-               if (!inode) {
-                       fuse_send_forget(fc, forget_req, outarg.nodeid, 1);
-                       return ERR_PTR(-ENOMEM);
-               }
+       if (err || !outarg->nodeid)
+               goto out_put_forget;
+
+       err = -EIO;
+       if (!outarg->nodeid)
+               goto out_put_forget;
+       if (!fuse_valid_type(outarg->attr.mode))
+               goto out_put_forget;
+
+       *inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
+                          &outarg->attr, entry_attr_timeout(outarg),
+                          attr_version);
+       err = -ENOMEM;
+       if (!*inode) {
+               fuse_send_forget(fc, forget_req, outarg->nodeid, 1);
+               goto out;
         }
+       err = 0;
+
+ out_put_forget:
         fuse_put_request(fc, forget_req);
-       if (err && err != -ENOENT)
-               return ERR_PTR(err);
+ out:
+       return err;
+}
+
+static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
+                                 struct nameidata *nd)
+{
+       int err;
+       struct fuse_entry_out outarg;
+       struct inode *inode;
+       struct dentry *newent;
+       struct fuse_conn *fc = get_fuse_conn(dir);
+       bool outarg_valid = true;
+
+       err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
+                              &outarg, &inode);
+       if (err == -ENOENT) {
+               outarg_valid = false;
+               err = 0;
+       }
+       if (err)
+               goto out_err;
+
+       err = -EIO;
+       if (inode && get_node_id(inode) == FUSE_ROOT_ID)
+               goto out_iput;
  
         if (inode && S_ISDIR(inode->i_mode)) {
                 mutex_lock(&fc->inst_mutex);
-               err = fuse_d_add_directory(entry, inode);
+               newent = fuse_d_add_directory(entry, inode);
                 mutex_unlock(&fc->inst_mutex);
-               if (err) {
-                       iput(inode);
-                       return ERR_PTR(err);
-               }
-       } else
-               d_add(entry, inode);
+               err = PTR_ERR(newent);
+               if (IS_ERR(newent))
+                       goto out_iput;
+       } else {
+               newent = d_splice_alias(inode, entry);
+       }
  
+       entry = newent ? newent : entry;
         entry->d_op = &fuse_dentry_operations;
-       if (!err)
+       if (outarg_valid)
                 fuse_change_entry_timeout(entry, &outarg);
         else
                 fuse_invalidate_entry_cache(entry);
-       return NULL;
+
+       return newent;
+
+ out_iput:
+       iput(inode);
+ out_err:
+       return ERR_PTR(err);
  }
  
  /*
diff --git a/fs/fuse/file.c b/fs/fuse/file.c

index 8092f0d9fd1fa590ac87f7f0ec7a79331760acb6..67ff2c6a8f6309bc09746a65d29b8d55f0a90bb5 100644 (file)
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1341,6 +1341,11 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
         pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0;
         int err;
  
+       if (fl->fl_lmops && fl->fl_lmops->fl_grant) {
+               /* NLM needs asynchronous locks, which we don't support yet */
+               return -ENOLCK;
+       }
+
         /* Unlock on close is handled by the flush method */
         if (fl->fl_flags & FL_CLOSE)
                 return 0;
@@ -1365,7 +1370,9 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
         struct fuse_conn *fc = get_fuse_conn(inode);
         int err;
  
-       if (cmd == F_GETLK) {
+       if (cmd == F_CANCELLK) {
+               err = 0;
+       } else if (cmd == F_GETLK) {
                 if (fc->no_lock) {
                         posix_test_lock(file, fl);
                         err = 0;
@@ -1373,7 +1380,7 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
                         err = fuse_getlk(file, fl);
         } else {
                 if (fc->no_lock)
-                       err = posix_lock_file_wait(file, fl);
+                       err = posix_lock_file(file, fl, NULL);
                 else
                         err = fuse_setlk(file, fl, 0);
         }
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h

index bae948657c4fdfd268e86912c6b4fa6c1f944851..3a876076bdd1392c93b6a838af53eef104f8461d 100644 (file)
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -363,6 +363,9 @@ struct fuse_conn {
         /** Do not send separate SETATTR request before open(O_TRUNC)  */
         unsigned atomic_o_trunc : 1;
  
+       /** Filesystem supports NFS exporting.  Only set in INIT */
+       unsigned export_support : 1;
+
         /*
          * The following bitfields are only for optimization purposes
          * and hence races in setting them will not cause malfunction
@@ -464,6 +467,8 @@ static inline u64 get_node_id(struct inode *inode)
  /** Device operations */
  extern const struct file_operations fuse_dev_operations;
  
+extern struct dentry_operations fuse_dentry_operations;
+
  /**
   * Get a filled in inode
   */
@@ -471,6 +476,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
                         int generation, struct fuse_attr *attr,
                         u64 attr_valid, u64 attr_version);
  
+int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
+                    struct fuse_entry_out *outarg, struct inode **inode);
+
  /**
   * Send FORGET command
   */
@@ -604,6 +612,8 @@ void fuse_abort_conn(struct fuse_conn *fc);
   */
  void fuse_invalidate_attr(struct inode *inode);
  
+void fuse_invalidate_entry_cache(struct dentry *entry);
+
  /**
   * Acquire reference to fuse_conn
   */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c

index 3141690558c8aa6029128f7c2d8a5b9fc742696d..7d2f7d6e22e21e0e3ad630bc21497dcf097a19f1 100644 (file)
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -18,6 +18,7 @@
  #include <linux/statfs.h>
  #include <linux/random.h>
  #include <linux/sched.h>
+#include <linux/exportfs.h>
  
  MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
  MODULE_DESCRIPTION("Filesystem in Userspace");
@@ -552,6 +553,174 @@ static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
         return fuse_iget(sb, 1, 0, &attr, 0, 0);
  }
  
+struct fuse_inode_handle
+{
+       u64 nodeid;
+       u32 generation;
+};
+
+static struct dentry *fuse_get_dentry(struct super_block *sb,
+                                     struct fuse_inode_handle *handle)
+{
+       struct fuse_conn *fc = get_fuse_conn_super(sb);
+       struct inode *inode;
+       struct dentry *entry;
+       int err = -ESTALE;
+
+       if (handle->nodeid == 0)
+               goto out_err;
+
+       inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
+       if (!inode) {
+               struct fuse_entry_out outarg;
+               struct qstr name;
+
+               if (!fc->export_support)
+                       goto out_err;
+
+               name.len = 1;
+               name.name = ".";
+               err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
+                                      &inode);
+               if (err && err != -ENOENT)
+                       goto out_err;
+               if (err || !inode) {
+                       err = -ESTALE;
+                       goto out_err;
+               }
+               err = -EIO;
+               if (get_node_id(inode) != handle->nodeid)
+                       goto out_iput;
+       }
+       err = -ESTALE;
+       if (inode->i_generation != handle->generation)
+               goto out_iput;
+
+       entry = d_alloc_anon(inode);
+       err = -ENOMEM;
+       if (!entry)
+               goto out_iput;
+
+       if (get_node_id(inode) != FUSE_ROOT_ID) {
+               entry->d_op = &fuse_dentry_operations;
+               fuse_invalidate_entry_cache(entry);
+       }
+
+       return entry;
+
+ out_iput:
+       iput(inode);
+ out_err:
+       return ERR_PTR(err);
+}
+
+static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
+                          int connectable)
+{
+       struct inode *inode = dentry->d_inode;
+       bool encode_parent = connectable && !S_ISDIR(inode->i_mode);
+       int len = encode_parent ? 6 : 3;
+       u64 nodeid;
+       u32 generation;
+
+       if (*max_len < len)
+               return  255;
+
+       nodeid = get_fuse_inode(inode)->nodeid;
+       generation = inode->i_generation;
+
+       fh[0] = (u32)(nodeid >> 32);
+       fh[1] = (u32)(nodeid & 0xffffffff);
+       fh[2] = generation;
+
+       if (encode_parent) {
+               struct inode *parent;
+
+               spin_lock(&dentry->d_lock);
+               parent = dentry->d_parent->d_inode;
+               nodeid = get_fuse_inode(parent)->nodeid;
+               generation = parent->i_generation;
+               spin_unlock(&dentry->d_lock);
+
+               fh[3] = (u32)(nodeid >> 32);
+               fh[4] = (u32)(nodeid & 0xffffffff);
+               fh[5] = generation;
+       }
+
+       *max_len = len;
+       return encode_parent ? 0x82 : 0x81;
+}
+
+static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
+               struct fid *fid, int fh_len, int fh_type)
+{
+       struct fuse_inode_handle handle;
+
+       if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
+               return NULL;
+
+       handle.nodeid = (u64) fid->raw[0] << 32;
+       handle.nodeid |= (u64) fid->raw[1];
+       handle.generation = fid->raw[2];
+       return fuse_get_dentry(sb, &handle);
+}
+
+static struct dentry *fuse_fh_to_parent(struct super_block *sb,
+               struct fid *fid, int fh_len, int fh_type)
+{
+       struct fuse_inode_handle parent;
+
+       if (fh_type != 0x82 || fh_len < 6)
+               return NULL;
+
+       parent.nodeid = (u64) fid->raw[3] << 32;
+       parent.nodeid |= (u64) fid->raw[4];
+       parent.generation = fid->raw[5];
+       return fuse_get_dentry(sb, &parent);
+}
+
+static struct dentry *fuse_get_parent(struct dentry *child)
+{
+       struct inode *child_inode = child->d_inode;
+       struct fuse_conn *fc = get_fuse_conn(child_inode);
+       struct inode *inode;
+       struct dentry *parent;
+       struct fuse_entry_out outarg;
+       struct qstr name;
+       int err;
+
+       if (!fc->export_support)
+               return ERR_PTR(-ESTALE);
+
+       name.len = 2;
+       name.name = "..";
+       err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
+                              &name, &outarg, &inode);
+       if (err && err != -ENOENT)
+               return ERR_PTR(err);
+       if (err || !inode)
+               return ERR_PTR(-ESTALE);
+
+       parent = d_alloc_anon(inode);
+       if (!parent) {
+               iput(inode);
+               return ERR_PTR(-ENOMEM);
+       }
+       if (get_node_id(inode) != FUSE_ROOT_ID) {
+               parent->d_op = &fuse_dentry_operations;
+               fuse_invalidate_entry_cache(parent);
+       }
+
+       return parent;
+}
+
+static const struct export_operations fuse_export_operations = {
+       .fh_to_dentry   = fuse_fh_to_dentry,
+       .fh_to_parent   = fuse_fh_to_parent,
+       .encode_fh      = fuse_encode_fh,
+       .get_parent     = fuse_get_parent,
+};
+
  static const struct super_operations fuse_super_operations = {
         .alloc_inode    = fuse_alloc_inode,
         .destroy_inode  = fuse_destroy_inode,
@@ -581,6 +750,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
                                 fc->no_lock = 1;
                         if (arg->flags & FUSE_ATOMIC_O_TRUNC)
                                 fc->atomic_o_trunc = 1;
+                       if (arg->minor >= 9) {
+                               /* LOOKUP has dependency on proto version */
+                               if (arg->flags & FUSE_EXPORT_SUPPORT)
+                                       fc->export_support = 1;
+                       }
                         if (arg->flags & FUSE_BIG_WRITES)
                                 fc->big_writes = 1;
                 } else {
@@ -607,7 +781,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
         arg->minor = FUSE_KERNEL_MINOR_VERSION;
         arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
         arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
-               FUSE_BIG_WRITES;
+               FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES;
         req->in.h.opcode = FUSE_INIT;
         req->in.numargs = 1;
         req->in.args[0].size = sizeof(*arg);
@@ -652,6 +826,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
         sb->s_magic = FUSE_SUPER_MAGIC;
         sb->s_op = &fuse_super_operations;
         sb->s_maxbytes = MAX_LFS_FILESIZE;
+       sb->s_export_op = &fuse_export_operations;
  
         file = fget(d.fd);
         if (!file)
diff --git a/fs/hfs/bitmap.c b/fs/hfs/bitmap.c

index 24e75798ddf014c747d9537dfe475b0e5326c884..c6e97366e8ac3f362d0f08d63d0234d837e14103 100644 (file)
--- a/fs/hfs/bitmap.c
+++ b/fs/hfs/bitmap.c
@@ -145,7 +145,7 @@ u32 hfs_vbm_search_free(struct super_block *sb, u32 goal, u32 *num_bits)
         if (!*num_bits)
                 return 0;
  
-       down(&HFS_SB(sb)->bitmap_lock);
+       mutex_lock(&HFS_SB(sb)->bitmap_lock);
         bitmap = HFS_SB(sb)->bitmap;
  
         pos = hfs_find_set_zero_bits(bitmap, HFS_SB(sb)->fs_ablocks, goal, num_bits);
@@ -162,7 +162,7 @@ u32 hfs_vbm_search_free(struct super_block *sb, u32 goal, u32 *num_bits)
         HFS_SB(sb)->free_ablocks -= *num_bits;
         hfs_bitmap_dirty(sb);
  out:
-       up(&HFS_SB(sb)->bitmap_lock);
+       mutex_unlock(&HFS_SB(sb)->bitmap_lock);
         return pos;
  }
  
@@ -205,7 +205,7 @@ int hfs_clear_vbm_bits(struct super_block *sb, u16 start, u16 count)
         if ((start + count) > HFS_SB(sb)->fs_ablocks)
                 return -2;
  
-       down(&HFS_SB(sb)->bitmap_lock);
+       mutex_lock(&HFS_SB(sb)->bitmap_lock);
         /* bitmap is always on a 32-bit boundary */
         curr = HFS_SB(sb)->bitmap + (start / 32);
         len = count;
@@ -236,7 +236,7 @@ int hfs_clear_vbm_bits(struct super_block *sb, u16 start, u16 count)
         }
  out:
         HFS_SB(sb)->free_ablocks += len;
-       up(&HFS_SB(sb)->bitmap_lock);
+       mutex_unlock(&HFS_SB(sb)->bitmap_lock);
         hfs_bitmap_dirty(sb);
  
         return 0;
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c

index f6621a785202b9f3d26f562bd7402cfbedb589a9..9b9d6395bad36127b04a67b8048d149e2bc89d3a 100644 (file)
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -40,7 +40,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
         {
         struct hfs_mdb *mdb = HFS_SB(sb)->mdb;
         HFS_I(tree->inode)->flags = 0;
-       init_MUTEX(&HFS_I(tree->inode)->extents_lock);
+       mutex_init(&HFS_I(tree->inode)->extents_lock);
         switch (id) {
         case HFS_EXT_CNID:
                 hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize,
diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c

index c176f67ba0a5488c116d283b444dc66f0844fa6d..2c16316d291794a875b5afc36908829cfe1f5ce5 100644 (file)
--- a/fs/hfs/extent.c
+++ b/fs/hfs/extent.c
@@ -343,16 +343,16 @@ int hfs_get_block(struct inode *inode, sector_t block,
                 goto done;
         }
  
-       down(&HFS_I(inode)->extents_lock);
+       mutex_lock(&HFS_I(inode)->extents_lock);
         res = hfs_ext_read_extent(inode, ablock);
         if (!res)
                 dblock = hfs_ext_find_block(HFS_I(inode)->cached_extents,
                                             ablock - HFS_I(inode)->cached_start);
         else {
-               up(&HFS_I(inode)->extents_lock);
+               mutex_unlock(&HFS_I(inode)->extents_lock);
                 return -EIO;
         }
-       up(&HFS_I(inode)->extents_lock);
+       mutex_unlock(&HFS_I(inode)->extents_lock);
  
  done:
         map_bh(bh_result, sb, HFS_SB(sb)->fs_start +
@@ -375,7 +375,7 @@ int hfs_extend_file(struct inode *inode)
         u32 start, len, goal;
         int res;
  
-       down(&HFS_I(inode)->extents_lock);
+       mutex_lock(&HFS_I(inode)->extents_lock);
         if (HFS_I(inode)->alloc_blocks == HFS_I(inode)->first_blocks)
                 goal = hfs_ext_lastblock(HFS_I(inode)->first_extents);
         else {
@@ -425,7 +425,7 @@ int hfs_extend_file(struct inode *inode)
                         goto insert_extent;
         }
  out:
-       up(&HFS_I(inode)->extents_lock);
+       mutex_unlock(&HFS_I(inode)->extents_lock);
         if (!res) {
                 HFS_I(inode)->alloc_blocks += len;
                 mark_inode_dirty(inode);
@@ -487,7 +487,7 @@ void hfs_file_truncate(struct inode *inode)
         if (blk_cnt == alloc_cnt)
                 goto out;
  
-       down(&HFS_I(inode)->extents_lock);
+       mutex_lock(&HFS_I(inode)->extents_lock);
         hfs_find_init(HFS_SB(sb)->ext_tree, &fd);
         while (1) {
                 if (alloc_cnt == HFS_I(inode)->first_blocks) {
@@ -514,7 +514,7 @@ void hfs_file_truncate(struct inode *inode)
                 hfs_brec_remove(&fd);
         }
         hfs_find_exit(&fd);
-       up(&HFS_I(inode)->extents_lock);
+       mutex_unlock(&HFS_I(inode)->extents_lock);
  
         HFS_I(inode)->alloc_blocks = blk_cnt;
  out:
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h

index 147374b6f67525871efd8f7abb8320bd838a3be4..9955232fdf8c977abae64e327ca250468ee74654 100644 (file)
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -11,6 +11,7 @@
  
  #include <linux/slab.h>
  #include <linux/types.h>
+#include <linux/mutex.h>
  #include <linux/buffer_head.h>
  #include <linux/fs.h>
  
@@ -53,7 +54,7 @@ struct hfs_inode_info {
         struct list_head open_dir_list;
         struct inode *rsrc_inode;
  
-       struct semaphore extents_lock;
+       struct mutex extents_lock;
  
         u16 alloc_blocks, clump_blocks;
         sector_t fs_blocks;
@@ -139,7 +140,7 @@ struct hfs_sb_info {
  
         struct nls_table *nls_io, *nls_disk;
  
-       struct semaphore bitmap_lock;
+       struct mutex bitmap_lock;
  
         unsigned long flags;
  
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c

index 97f8446c4ff415e8646a868ecce82d6ef77c20eb..dc4ec640e875974f0588ee60f3e478a4f0e4f7c6 100644 (file)
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -150,7 +150,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode)
         if (!inode)
                 return NULL;
  
-       init_MUTEX(&HFS_I(inode)->extents_lock);
+       mutex_init(&HFS_I(inode)->extents_lock);
         INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
         hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name);
         inode->i_ino = HFS_SB(sb)->next_id++;
@@ -281,7 +281,7 @@ static int hfs_read_inode(struct inode *inode, void *data)
  
         HFS_I(inode)->flags = 0;
         HFS_I(inode)->rsrc_inode = NULL;
-       init_MUTEX(&HFS_I(inode)->extents_lock);
+       mutex_init(&HFS_I(inode)->extents_lock);
         INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
  
         /* Initialize the inode */
diff --git a/fs/hfs/super.c b/fs/hfs/super.c

index 8cf67974adf60ad79f1b88da04ba4d4fb71f3b68..ac2ec5ef66e4a244461b2c8dd3f027626f943ae6 100644 (file)
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -372,7 +372,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
  
         sb->s_op = &hfs_super_operations;
         sb->s_flags |= MS_NODIRATIME;
-       init_MUTEX(&sbi->bitmap_lock);
+       mutex_init(&sbi->bitmap_lock);
  
         res = hfs_mdb_get(sb);
         if (res) {
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c

index 12e899cd78866543edb5bfe6510ce113ced5d721..fec8f61227ffb28792a7b94f30417f003d786882 100644 (file)
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -199,16 +199,16 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
                 goto done;
         }
  
-       down(&HFSPLUS_I(inode).extents_lock);
+       mutex_lock(&HFSPLUS_I(inode).extents_lock);
         res = hfsplus_ext_read_extent(inode, ablock);
         if (!res) {
                 dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).cached_extents, ablock -
                                              HFSPLUS_I(inode).cached_start);
         } else {
-               up(&HFSPLUS_I(inode).extents_lock);
+               mutex_unlock(&HFSPLUS_I(inode).extents_lock);
                 return -EIO;
         }
-       up(&HFSPLUS_I(inode).extents_lock);
+       mutex_unlock(&HFSPLUS_I(inode).extents_lock);
  
  done:
         dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock);
@@ -355,7 +355,7 @@ int hfsplus_file_extend(struct inode *inode)
                 return -ENOSPC;
         }
  
-       down(&HFSPLUS_I(inode).extents_lock);
+       mutex_lock(&HFSPLUS_I(inode).extents_lock);
         if (HFSPLUS_I(inode).alloc_blocks == HFSPLUS_I(inode).first_blocks)
                 goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).first_extents);
         else {
@@ -408,7 +408,7 @@ int hfsplus_file_extend(struct inode *inode)
                         goto insert_extent;
         }
  out:
-       up(&HFSPLUS_I(inode).extents_lock);
+       mutex_unlock(&HFSPLUS_I(inode).extents_lock);
         if (!res) {
                 HFSPLUS_I(inode).alloc_blocks += len;
                 mark_inode_dirty(inode);
@@ -465,7 +465,7 @@ void hfsplus_file_truncate(struct inode *inode)
         if (blk_cnt == alloc_cnt)
                 goto out;
  
-       down(&HFSPLUS_I(inode).extents_lock);
+       mutex_lock(&HFSPLUS_I(inode).extents_lock);
         hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd);
         while (1) {
                 if (alloc_cnt == HFSPLUS_I(inode).first_blocks) {
@@ -492,7 +492,7 @@ void hfsplus_file_truncate(struct inode *inode)
                 hfs_brec_remove(&fd);
         }
         hfs_find_exit(&fd);
-       up(&HFSPLUS_I(inode).extents_lock);
+       mutex_unlock(&HFSPLUS_I(inode).extents_lock);
  
         HFSPLUS_I(inode).alloc_blocks = blk_cnt;
  out:
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h

index 9e59537b43d56ef0ca178acce4ec900b7d50fcff..f027a905225fc5f7a5c2dfe8b5d1e0764ec96c80 100644 (file)
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -11,6 +11,7 @@
  #define _LINUX_HFSPLUS_FS_H
  
  #include <linux/fs.h>
+#include <linux/mutex.h>
  #include <linux/buffer_head.h>
  #include "hfsplus_raw.h"
  
@@ -154,7 +155,7 @@ struct hfsplus_sb_info {
  
  
  struct hfsplus_inode_info {
-       struct semaphore extents_lock;
+       struct mutex extents_lock;
         u32 clump_blocks, alloc_blocks;
         sector_t fs_blocks;
         /* Allocation extents from catalog record or volume header */
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c

index 67e1c8b467c496565359331e79ca7c706166a30d..cc3b5e24339b2089bf684cdafc7ebcee38ab928d 100644 (file)
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -163,7 +163,7 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
  
         inode->i_ino = dir->i_ino;
         INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
-       init_MUTEX(&HFSPLUS_I(inode).extents_lock);
+       mutex_init(&HFSPLUS_I(inode).extents_lock);
         HFSPLUS_I(inode).flags = HFSPLUS_FLG_RSRC;
  
         hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd);
@@ -316,7 +316,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
         inode->i_nlink = 1;
         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
         INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
-       init_MUTEX(&HFSPLUS_I(inode).extents_lock);
+       mutex_init(&HFSPLUS_I(inode).extents_lock);
         atomic_set(&HFSPLUS_I(inode).opencnt, 0);
         HFSPLUS_I(inode).flags = 0;
         memset(HFSPLUS_I(inode).first_extents, 0, sizeof(hfsplus_extent_rec));
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c

index ce97a54518d81d8b904c7a01c336133516547eb3..3859118531c7c3b64b3726cc85894d0ce721c12c 100644 (file)
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -34,7 +34,7 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino)
                 return inode;
  
         INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
-       init_MUTEX(&HFSPLUS_I(inode).extents_lock);
+       mutex_init(&HFSPLUS_I(inode).extents_lock);
         HFSPLUS_I(inode).flags = 0;
         HFSPLUS_I(inode).rsrc_inode = NULL;
         atomic_set(&HFSPLUS_I(inode).opencnt, 0);
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c

index 6bd48f0a7047d7560f06492dfe57e53141be6a17..c2fb2dd0131f36cf2bcb3e1ad342a2c64cbd8422 100644 (file)
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -209,6 +209,11 @@ repeat:
  
         while (rs.len > 2) { /* There may be one byte for padding somewhere */
                 rr = (struct rock_ridge *)rs.chr;
+               /*
+                * Ignore rock ridge info if rr->len is out of range, but
+                * don't return -EIO because that would make the file
+                * invisible.
+                */
                 if (rr->len < 3)
                         goto out;       /* Something got screwed up here */
                 sig = isonum_721(rs.chr);
@@ -216,8 +221,12 @@ repeat:
                         goto eio;
                 rs.chr += rr->len;
                 rs.len -= rr->len;
+               /*
+                * As above, just ignore the rock ridge info if rr->len
+                * is bogus.
+                */
                 if (rs.len < 0)
-                       goto eio;       /* corrupted isofs */
+                       goto out;       /* Something got screwed up here */
  
                 switch (sig) {
                 case SIG('R', 'R'):
@@ -307,6 +316,11 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de,
  repeat:
         while (rs.len > 2) { /* There may be one byte for padding somewhere */
                 rr = (struct rock_ridge *)rs.chr;
+               /*
+                * Ignore rock ridge info if rr->len is out of range, but
+                * don't return -EIO because that would make the file
+                * invisible.
+                */
                 if (rr->len < 3)
                         goto out;       /* Something got screwed up here */
                 sig = isonum_721(rs.chr);
@@ -314,8 +328,12 @@ repeat:
                         goto eio;
                 rs.chr += rr->len;
                 rs.len -= rr->len;
+               /*
+                * As above, just ignore the rock ridge info if rr->len
+                * is bogus.
+                */
                 if (rs.len < 0)
-                       goto eio;       /* corrupted isofs */
+                       goto out;       /* Something got screwed up here */
  
                 switch (sig) {
  #ifndef CONFIG_ZISOFS          /* No flag for SF or ZF */
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c

index 5a8ca61498caf36cbb818fddc00eafbca2642f03..2eccbfaa1d48186934de93b8391e360fc77ede6d 100644 (file)
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -36,7 +36,7 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
  
  /*
   * When an ext3-ordered file is truncated, it is possible that many pages are
- * not sucessfully freed, because they are attached to a committing transaction.
+ * not successfully freed, because they are attached to a committing transaction.
   * After the transaction commits, these pages are left on the LRU, with no
   * ->mapping, and with attached buffers.  These pages are trivially reclaimable
   * by the VM, but their apparent absence upsets the VM accounting, and it makes
@@ -45,8 +45,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
   * So here, we have a buffer which has just come off the forget list.  Look to
   * see if we can strip all buffers from the backing page.
   *
- * Called under lock_journal(), and possibly under journal_datalist_lock.  The
- * caller provided us with a ref against the buffer, and we drop that here.
+ * Called under journal->j_list_lock.  The caller provided us with a ref
+ * against the buffer, and we drop that here.
   */
  static void release_buffer_page(struct buffer_head *bh)
  {
@@ -77,6 +77,19 @@ nope:
         __brelse(bh);
  }
  
+/*
+ * Decrement reference counter for data buffer. If it has been marked
+ * 'BH_Freed', release it and the page to which it belongs if possible.
+ */
+static void release_data_buffer(struct buffer_head *bh)
+{
+       if (buffer_freed(bh)) {
+               clear_buffer_freed(bh);
+               release_buffer_page(bh);
+       } else
+               put_bh(bh);
+}
+
  /*
   * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
   * held.  For ranking reasons we must trylock.  If we lose, schedule away and
@@ -172,7 +185,7 @@ static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
  /*
   *  Submit all the data buffers to disk
   */
-static void journal_submit_data_buffers(journal_t *journal,
+static int journal_submit_data_buffers(journal_t *journal,
                                 transaction_t *commit_transaction)
  {
         struct journal_head *jh;
@@ -180,6 +193,7 @@ static void journal_submit_data_buffers(journal_t *journal,
         int locked;
         int bufs = 0;
         struct buffer_head **wbuf = journal->j_wbuf;
+       int err = 0;
  
         /*
          * Whenever we unlock the journal and sleep, things can get added
@@ -231,7 +245,7 @@ write_out_data:
                         if (locked)
                                 unlock_buffer(bh);
                         BUFFER_TRACE(bh, "already cleaned up");
-                       put_bh(bh);
+                       release_data_buffer(bh);
                         continue;
                 }
                 if (locked && test_clear_buffer_dirty(bh)) {
@@ -253,15 +267,17 @@ write_out_data:
                         put_bh(bh);
                 } else {
                         BUFFER_TRACE(bh, "writeout complete: unfile");
+                       if (unlikely(!buffer_uptodate(bh)))
+                               err = -EIO;
                         __journal_unfile_buffer(jh);
                         jbd_unlock_bh_state(bh);
                         if (locked)
                                 unlock_buffer(bh);
                         journal_remove_journal_head(bh);
-                       /* Once for our safety reference, once for
+                       /* One for our safety reference, other for
                          * journal_remove_journal_head() */
                         put_bh(bh);
-                       put_bh(bh);
+                       release_data_buffer(bh);
                 }
  
                 if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
@@ -271,6 +287,8 @@ write_out_data:
         }
         spin_unlock(&journal->j_list_lock);
         journal_do_submit_data(wbuf, bufs);
+
+       return err;
  }
  
  /*
@@ -410,8 +428,7 @@ void journal_commit_transaction(journal_t *journal)
          * Now start flushing things to disk, in the order they appear
          * on the transaction lists.  Data blocks go first.
          */
-       err = 0;
-       journal_submit_data_buffers(journal, commit_transaction);
+       err = journal_submit_data_buffers(journal, commit_transaction);
  
         /*
          * Wait for all previously submitted IO to complete.
@@ -426,10 +443,21 @@ void journal_commit_transaction(journal_t *journal)
                 if (buffer_locked(bh)) {
                         spin_unlock(&journal->j_list_lock);
                         wait_on_buffer(bh);
-                       if (unlikely(!buffer_uptodate(bh)))
-                               err = -EIO;
                         spin_lock(&journal->j_list_lock);
                 }
+               if (unlikely(!buffer_uptodate(bh))) {
+                       if (TestSetPageLocked(bh->b_page)) {
+                               spin_unlock(&journal->j_list_lock);
+                               lock_page(bh->b_page);
+                               spin_lock(&journal->j_list_lock);
+                       }
+                       if (bh->b_page->mapping)
+                               set_bit(AS_EIO, &bh->b_page->mapping->flags);
+
+                       unlock_page(bh->b_page);
+                       SetPageError(bh->b_page);
+                       err = -EIO;
+               }
                 if (!inverted_lock(journal, bh)) {
                         put_bh(bh);
                         spin_lock(&journal->j_list_lock);
@@ -443,17 +471,21 @@ void journal_commit_transaction(journal_t *journal)
                 } else {
                         jbd_unlock_bh_state(bh);
                 }
-               put_bh(bh);
+               release_data_buffer(bh);
                 cond_resched_lock(&journal->j_list_lock);
         }
         spin_unlock(&journal->j_list_lock);
  
-       if (err)
-               journal_abort(journal, err);
+       if (err) {
+               char b[BDEVNAME_SIZE];
  
-       journal_write_revoke_records(journal, commit_transaction);
+               printk(KERN_WARNING
+                       "JBD: Detected IO errors while flushing file data "
+                       "on %s\n", bdevname(journal->j_fs_dev, b));
+               err = 0;
+       }
  
-       jbd_debug(3, "JBD: commit phase 2\n");
+       journal_write_revoke_records(journal, commit_transaction);
  
         /*
          * If we found any dirty or locked buffers, then we should have
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c

index b99c3b3654c49ee33f86bc8883319a55f11a9711..aa7143a8349bdd0fcb841e70a252d8d9047777bf 100644 (file)
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -68,7 +68,6 @@ EXPORT_SYMBOL(journal_set_features);
  EXPORT_SYMBOL(journal_create);
  EXPORT_SYMBOL(journal_load);
  EXPORT_SYMBOL(journal_destroy);
-EXPORT_SYMBOL(journal_update_superblock);
  EXPORT_SYMBOL(journal_abort);
  EXPORT_SYMBOL(journal_errno);
  EXPORT_SYMBOL(journal_ack_err);
@@ -1636,9 +1635,10 @@ static int journal_init_journal_head_cache(void)
  
  static void journal_destroy_journal_head_cache(void)
  {
-       J_ASSERT(journal_head_cache != NULL);
-       kmem_cache_destroy(journal_head_cache);
-       journal_head_cache = NULL;
+       if (journal_head_cache) {
+               kmem_cache_destroy(journal_head_cache);
+               journal_head_cache = NULL;
+       }
  }
  
  /*
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c

index 1bb43e987f4b4ff4f0e37a2d578d34c1642cf6c0..c7bd649bbbdcfe032c46c52ed235bfda7a5e40e9 100644 (file)
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -166,138 +166,123 @@ static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal,
         return NULL;
  }
  
+void journal_destroy_revoke_caches(void)
+{
+       if (revoke_record_cache) {
+               kmem_cache_destroy(revoke_record_cache);
+               revoke_record_cache = NULL;
+       }
+       if (revoke_table_cache) {
+               kmem_cache_destroy(revoke_table_cache);
+               revoke_table_cache = NULL;
+       }
+}
+
  int __init journal_init_revoke_caches(void)
  {
+       J_ASSERT(!revoke_record_cache);
+       J_ASSERT(!revoke_table_cache);
+
         revoke_record_cache = kmem_cache_create("revoke_record",
                                            sizeof(struct jbd_revoke_record_s),
                                            0,
                                            SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
                                            NULL);
         if (!revoke_record_cache)
-               return -ENOMEM;
+               goto record_cache_failure;
  
         revoke_table_cache = kmem_cache_create("revoke_table",
                                            sizeof(struct jbd_revoke_table_s),
                                            0, SLAB_TEMPORARY, NULL);
-       if (!revoke_table_cache) {
-               kmem_cache_destroy(revoke_record_cache);
-               revoke_record_cache = NULL;
-               return -ENOMEM;
-       }
+       if (!revoke_table_cache)
+               goto table_cache_failure;
+
         return 0;
-}
  
-void journal_destroy_revoke_caches(void)
-{
-       kmem_cache_destroy(revoke_record_cache);
-       revoke_record_cache = NULL;
-       kmem_cache_destroy(revoke_table_cache);
-       revoke_table_cache = NULL;
+table_cache_failure:
+       journal_destroy_revoke_caches();
+record_cache_failure:
+       return -ENOMEM;
  }
  
-/* Initialise the revoke table for a given journal to a given size. */
-
-int journal_init_revoke(journal_t *journal, int hash_size)
+static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size)
  {
-       int shift, tmp;
+       int shift = 0;
+       int tmp = hash_size;
+       struct jbd_revoke_table_s *table;
  
-       J_ASSERT (journal->j_revoke_table[0] == NULL);
+       table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
+       if (!table)
+               goto out;
  
-       shift = 0;
-       tmp = hash_size;
         while((tmp >>= 1UL) != 0UL)
                 shift++;
  
-       journal->j_revoke_table[0] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
-       if (!journal->j_revoke_table[0])
-               return -ENOMEM;
-       journal->j_revoke = journal->j_revoke_table[0];
-
-       /* Check that the hash_size is a power of two */
-       J_ASSERT(is_power_of_2(hash_size));
-
-       journal->j_revoke->hash_size = hash_size;
-
-       journal->j_revoke->hash_shift = shift;
-
-       journal->j_revoke->hash_table =
+       table->hash_size = hash_size;
+       table->hash_shift = shift;
+       table->hash_table =
                 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
-       if (!journal->j_revoke->hash_table) {
-               kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
-               journal->j_revoke = NULL;
-               return -ENOMEM;
+       if (!table->hash_table) {
+               kmem_cache_free(revoke_table_cache, table);
+               table = NULL;
+               goto out;
         }
  
         for (tmp = 0; tmp < hash_size; tmp++)
-               INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
+               INIT_LIST_HEAD(&table->hash_table[tmp]);
  
-       journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
-       if (!journal->j_revoke_table[1]) {
-               kfree(journal->j_revoke_table[0]->hash_table);
-               kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
-               return -ENOMEM;
+out:
+       return table;
+}
+
+static void journal_destroy_revoke_table(struct jbd_revoke_table_s *table)
+{
+       int i;
+       struct list_head *hash_list;
+
+       for (i = 0; i < table->hash_size; i++) {
+               hash_list = &table->hash_table[i];
+               J_ASSERT(list_empty(hash_list));
         }
  
-       journal->j_revoke = journal->j_revoke_table[1];
+       kfree(table->hash_table);
+       kmem_cache_free(revoke_table_cache, table);
+}
  
-       /* Check that the hash_size is a power of two */
+/* Initialise the revoke table for a given journal to a given size. */
+int journal_init_revoke(journal_t *journal, int hash_size)
+{
+       J_ASSERT(journal->j_revoke_table[0] == NULL);
         J_ASSERT(is_power_of_2(hash_size));
  
-       journal->j_revoke->hash_size = hash_size;
+       journal->j_revoke_table[0] = journal_init_revoke_table(hash_size);
+       if (!journal->j_revoke_table[0])
+               goto fail0;
  
-       journal->j_revoke->hash_shift = shift;
+       journal->j_revoke_table[1] = journal_init_revoke_table(hash_size);
+       if (!journal->j_revoke_table[1])
+               goto fail1;
  
-       journal->j_revoke->hash_table =
-               kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
-       if (!journal->j_revoke->hash_table) {
-               kfree(journal->j_revoke_table[0]->hash_table);
-               kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
-               kmem_cache_free(revoke_table_cache, journal->j_revoke_table[1]);
-               journal->j_revoke = NULL;
-               return -ENOMEM;
-       }
-
-       for (tmp = 0; tmp < hash_size; tmp++)
-               INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
+       journal->j_revoke = journal->j_revoke_table[1];
  
         spin_lock_init(&journal->j_revoke_lock);
  
         return 0;
-}
  
-/* Destoy a journal's revoke table.  The table must already be empty! */
+fail1:
+       journal_destroy_revoke_table(journal->j_revoke_table[0]);
+fail0:
+       return -ENOMEM;
+}
  
+/* Destroy a journal's revoke table.  The table must already be empty! */
  void journal_destroy_revoke(journal_t *journal)
  {
-       struct jbd_revoke_table_s *table;
-       struct list_head *hash_list;
-       int i;
-
-       table = journal->j_revoke_table[0];
-       if (!table)
-               return;
-
-       for (i=0; i<table->hash_size; i++) {
-               hash_list = &table->hash_table[i];
-               J_ASSERT (list_empty(hash_list));
-       }
-
-       kfree(table->hash_table);
-       kmem_cache_free(revoke_table_cache, table);
-       journal->j_revoke = NULL;
-
-       table = journal->j_revoke_table[1];
-       if (!table)
-               return;
-
-       for (i=0; i<table->hash_size; i++) {
-               hash_list = &table->hash_table[i];
-               J_ASSERT (list_empty(hash_list));
-       }
-
-       kfree(table->hash_table);
-       kmem_cache_free(revoke_table_cache, table);
         journal->j_revoke = NULL;
+       if (journal->j_revoke_table[0])
+               journal_destroy_revoke_table(journal->j_revoke_table[0]);
+       if (journal->j_revoke_table[1])
+               journal_destroy_revoke_table(journal->j_revoke_table[1]);
  }
  
  
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c

index 67ff2024c23c7d21950407ce8dfea088a090f1c4..8dee32007500e3200451b9334b849f2362611cc7 100644 (file)
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1648,12 +1648,42 @@ out:
         return;
  }
  
+/*
+ * journal_try_to_free_buffers() could race with journal_commit_transaction()
+ * The latter might still hold the a count on buffers when inspecting
+ * them on t_syncdata_list or t_locked_list.
+ *
+ * journal_try_to_free_buffers() will call this function to
+ * wait for the current transaction to finish syncing data buffers, before
+ * tryinf to free that buffer.
+ *
+ * Called with journal->j_state_lock held.
+ */
+static void journal_wait_for_transaction_sync_data(journal_t *journal)
+{
+       transaction_t *transaction = NULL;
+       tid_t tid;
+
+       spin_lock(&journal->j_state_lock);
+       transaction = journal->j_committing_transaction;
+
+       if (!transaction) {
+               spin_unlock(&journal->j_state_lock);
+               return;
+       }
+
+       tid = transaction->t_tid;
+       spin_unlock(&journal->j_state_lock);
+       log_wait_commit(journal, tid);
+}
  
  /**
   * int journal_try_to_free_buffers() - try to free page buffers.
   * @journal: journal for operation
   * @page: to try and free
- * @unused_gfp_mask: unused
+ * @gfp_mask: we use the mask to detect how hard should we try to release
+ * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
+ * release the buffers.
   *
   *
   * For all the buffers on this page,
@@ -1682,9 +1712,11 @@ out:
   * journal_try_to_free_buffer() is changing its state.  But that
   * cannot happen because we never reallocate freed data as metadata
   * while the data is part of a transaction.  Yes?
+ *
+ * Return 0 on failure, 1 on success
   */
  int journal_try_to_free_buffers(journal_t *journal,
-                               struct page *page, gfp_t unused_gfp_mask)
+                               struct page *page, gfp_t gfp_mask)
  {
         struct buffer_head *head;
         struct buffer_head *bh;
@@ -1713,7 +1745,28 @@ int journal_try_to_free_buffers(journal_t *journal,
                 if (buffer_jbd(bh))
                         goto busy;
         } while ((bh = bh->b_this_page) != head);
+
         ret = try_to_free_buffers(page);
+
+       /*
+        * There are a number of places where journal_try_to_free_buffers()
+        * could race with journal_commit_transaction(), the later still
+        * holds the reference to the buffers to free while processing them.
+        * try_to_free_buffers() failed to free those buffers. Some of the
+        * caller of releasepage() request page buffers to be dropped, otherwise
+        * treat the fail-to-free as errors (such as generic_file_direct_IO())
+        *
+        * So, if the caller of try_to_release_page() wants the synchronous
+        * behaviour(i.e make sure buffers are dropped upon return),
+        * let's wait for the current transaction to finish flush of
+        * dirty data buffers, then try to free those buffers again,
+        * with the journal locked.
+        */
+       if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
+               journal_wait_for_transaction_sync_data(journal);
+               ret = try_to_free_buffers(page);
+       }
+
  busy:
         return ret;
  }
diff --git a/fs/jfs/super.c b/fs/jfs/super.c

index 0288e6d7936a200e51b585914cf5ae76847b91a3..359c091d8965be31fa9c868cd73d9f0f87e0ce03 100644 (file)
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -22,6 +22,7 @@
  #include <linux/parser.h>
  #include <linux/completion.h>
  #include <linux/vfs.h>
+#include <linux/quotaops.h>
  #include <linux/mount.h>
  #include <linux/moduleparam.h>
  #include <linux/kthread.h>
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c

index 1f6dc518505c90b77e3bed2217f8d8de93f0803a..31668b690e03d4e482a8affb7657a310cffac1c0 100644 (file)
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -582,7 +582,15 @@ again:
         }
         if (status < 0)
                 goto out_unlock;
-       status = nlm_stat_to_errno(resp->status);
+       /*
+        * EAGAIN doesn't make sense for sleeping locks, and in some
+        * cases NLM_LCK_DENIED is returned for a permanent error.  So
+        * turn it into an ENOLCK.
+        */
+       if (resp->status == nlm_lck_denied && (fl_flags & FL_SLEEP))
+               status = -ENOLCK;
+       else
+               status = nlm_stat_to_errno(resp->status);
  out_unblock:
         nlmclnt_finish_block(block);
  out:
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c

index 821b9acdfb66accf6f2428a1eee2b2fcdcaa1620..cf0d5c2c318d6002330f1bca7965b09e8a047e9d 100644 (file)
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -418,8 +418,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
                         goto out;
                 case -EAGAIN:
                         ret = nlm_lck_denied;
-                       break;
-               case -EINPROGRESS:
+                       goto out;
+               case FILE_LOCK_DEFERRED:
                         if (wait)
                                 break;
                         /* Filesystem lock operation is in progress
@@ -434,10 +434,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
                         goto out;
         }
  
-       ret = nlm_lck_denied;
-       if (!wait)
-               goto out;
-
         ret = nlm_lck_blocked;
  
         /* Append to list of blocked */
@@ -507,7 +503,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
         }
  
         error = vfs_test_lock(file->f_file, &lock->fl);
-       if (error == -EINPROGRESS) {
+       if (error == FILE_LOCK_DEFERRED) {
                 ret = nlmsvc_defer_lock_rqst(rqstp, block);
                 goto out;
         }
@@ -731,8 +727,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
         switch (error) {
         case 0:
                 break;
-       case -EAGAIN:
-       case -EINPROGRESS:
+       case FILE_LOCK_DEFERRED:
                 dprintk("lockd: lock still blocked error %d\n", error);
                 nlmsvc_insert_block(block, NLM_NEVER);
                 nlmsvc_release_block(block);
diff --git a/fs/locks.c b/fs/locks.c

index dce8c747371c77e618ee387158c37451daf63c97..01490300f7cbd7f6257f38b9de6ff34b5ce6f78d 100644 (file)
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -779,8 +779,10 @@ find_conflict:
                 if (!flock_locks_conflict(request, fl))
                         continue;
                 error = -EAGAIN;
-               if (request->fl_flags & FL_SLEEP)
-                       locks_insert_block(fl, request);
+               if (!(request->fl_flags & FL_SLEEP))
+                       goto out;
+               error = FILE_LOCK_DEFERRED;
+               locks_insert_block(fl, request);
                 goto out;
         }
         if (request->fl_flags & FL_ACCESS)
@@ -836,7 +838,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
                         error = -EDEADLK;
                         if (posix_locks_deadlock(request, fl))
                                 goto out;
-                       error = -EAGAIN;
+                       error = FILE_LOCK_DEFERRED;
                         locks_insert_block(fl, request);
                         goto out;
                 }
@@ -1035,7 +1037,7 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
         might_sleep ();
         for (;;) {
                 error = posix_lock_file(filp, fl, NULL);
-               if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
+               if (error != FILE_LOCK_DEFERRED)
                         break;
                 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
                 if (!error)
@@ -1107,9 +1109,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
  
         for (;;) {
                 error = __posix_lock_file(inode, &fl, NULL);
-               if (error != -EAGAIN)
-                       break;
-               if (!(fl.fl_flags & FL_SLEEP))
+               if (error != FILE_LOCK_DEFERRED)
                         break;
                 error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
                 if (!error) {
@@ -1531,7 +1531,7 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl)
         might_sleep();
         for (;;) {
                 error = flock_lock_file(filp, fl);
-               if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
+               if (error != FILE_LOCK_DEFERRED)
                         break;
                 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
                 if (!error)
@@ -1716,17 +1716,17 @@ out:
   * fl_grant is set. Callers expecting ->lock() to return asynchronously
   * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if)
   * the request is for a blocking lock. When ->lock() does return asynchronously,
- * it must return -EINPROGRESS, and call ->fl_grant() when the lock
+ * it must return FILE_LOCK_DEFERRED, and call ->fl_grant() when the lock
   * request completes.
   * If the request is for non-blocking lock the file system should return
- * -EINPROGRESS then try to get the lock and call the callback routine with
- * the result. If the request timed out the callback routine will return a
+ * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine
+ * with the result. If the request timed out the callback routine will return a
   * nonzero return code and the file system should release the lock. The file
   * system is also responsible to keep a corresponding posix lock when it
   * grants a lock so the VFS can find out which locks are locally held and do
   * the correct lock cleanup when required.
   * The underlying filesystem must not drop the kernel lock or call
- * ->fl_grant() before returning to the caller with a -EINPROGRESS
+ * ->fl_grant() before returning to the caller with a FILE_LOCK_DEFERRED
   * return code.
   */
  int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
@@ -1738,6 +1738,30 @@ int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, str
  }
  EXPORT_SYMBOL_GPL(vfs_lock_file);
  
+static int do_lock_file_wait(struct file *filp, unsigned int cmd,
+                            struct file_lock *fl)
+{
+       int error;
+
+       error = security_file_lock(filp, fl->fl_type);
+       if (error)
+               return error;
+
+       for (;;) {
+               error = vfs_lock_file(filp, cmd, fl, NULL);
+               if (error != FILE_LOCK_DEFERRED)
+                       break;
+               error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
+               if (!error)
+                       continue;
+
+               locks_delete_block(fl);
+               break;
+       }
+
+       return error;
+}
+
  /* Apply the lock described by l to an open file descriptor.
   * This implements both the F_SETLK and F_SETLKW commands of fcntl().
   */
@@ -1795,26 +1819,7 @@ again:
                 goto out;
         }
  
-       error = security_file_lock(filp, file_lock->fl_type);
-       if (error)
-               goto out;
-
-       if (filp->f_op && filp->f_op->lock != NULL)
-               error = filp->f_op->lock(filp, cmd, file_lock);
-       else {
-               for (;;) {
-                       error = posix_lock_file(filp, file_lock, NULL);
-                       if (error != -EAGAIN || cmd == F_SETLK)
-                               break;
-                       error = wait_event_interruptible(file_lock->fl_wait,
-                                       !file_lock->fl_next);
-                       if (!error)
-                               continue;
-
-                       locks_delete_block(file_lock);
-                       break;
-               }
-       }
+       error = do_lock_file_wait(filp, cmd, file_lock);
  
         /*
          * Attempt to detect a close/fcntl race and recover by
@@ -1932,26 +1937,7 @@ again:
                 goto out;
         }
  
-       error = security_file_lock(filp, file_lock->fl_type);
-       if (error)
-               goto out;
-
-       if (filp->f_op && filp->f_op->lock != NULL)
-               error = filp->f_op->lock(filp, cmd, file_lock);
-       else {
-               for (;;) {
-                       error = posix_lock_file(filp, file_lock, NULL);
-                       if (error != -EAGAIN || cmd == F_SETLK64)
-                               break;
-                       error = wait_event_interruptible(file_lock->fl_wait,
-                                       !file_lock->fl_next);
-                       if (!error)
-                               continue;
-
-                       locks_delete_block(file_lock);
-                       break;
-               }
-       }
+       error = do_lock_file_wait(filp, cmd, file_lock);
  
         /*
          * Attempt to detect a close/fcntl race and recover by
diff --git a/fs/minix/inode.c b/fs/minix/inode.c

index 84f6242ba6fc7262b2cf90a7d126f5192c6a13d9..523d73713418a21c64225b6d556c830ce21d09ce 100644 (file)
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -256,9 +256,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
         if (!s->s_root)
                 goto out_iput;
  
-       if (!NO_TRUNCATE)
-               s->s_root->d_op = &minix_dentry_operations;
-
         if (!(s->s_flags & MS_RDONLY)) {
                 if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
                         ms->s_state &= ~MINIX_VALID_FS;
diff --git a/fs/minix/minix.h b/fs/minix/minix.h

index 326edfe96108f5eec496d57bd96092ff25b5e366..e6a0b193bea4141567f4462574584c4064a72a26 100644 (file)
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -2,11 +2,6 @@
  #include <linux/pagemap.h>
  #include <linux/minix_fs.h>
  
-/*
- * change the define below to 0 if you want names > info->s_namelen chars to be
- * truncated. Else they will be disallowed (ENAMETOOLONG).
- */
-#define NO_TRUNCATE 1
  #define INODE_VERSION(inode)   minix_sb(inode->i_sb)->s_version
  #define MINIX_V1               0x0001          /* original minix fs */
  #define MINIX_V2               0x0002          /* minix V2 fs */
@@ -83,7 +78,6 @@ extern const struct inode_operations minix_file_inode_operations;
  extern const struct inode_operations minix_dir_inode_operations;
  extern const struct file_operations minix_file_operations;
  extern const struct file_operations minix_dir_operations;
-extern struct dentry_operations minix_dentry_operations;
  
  static inline struct minix_sb_info *minix_sb(struct super_block *sb)
  {
diff --git a/fs/minix/namei.c b/fs/minix/namei.c

index 102241bc9c7965e646585d871f4e1436a0057ccb..32b131cd6121d2599d84ee60af29ceac261453d9 100644 (file)
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -18,30 +18,6 @@ static int add_nondir(struct dentry *dentry, struct inode *inode)
         return err;
  }
  
-static int minix_hash(struct dentry *dentry, struct qstr *qstr)
-{
-       unsigned long hash;
-       int i;
-       const unsigned char *name;
-
-       i = minix_sb(dentry->d_inode->i_sb)->s_namelen;
-       if (i >= qstr->len)
-               return 0;
-       /* Truncate the name in place, avoids having to define a compare
-          function. */
-       qstr->len = i;
-       name = qstr->name;
-       hash = init_name_hash();
-       while (i--)
-               hash = partial_name_hash(*name++, hash);
-       qstr->hash = end_name_hash(hash);
-       return 0;
-}
-
-struct dentry_operations minix_dentry_operations = {
-       .d_hash         = minix_hash,
-};
-
  static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
  {
         struct inode * inode = NULL;
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c

index 1f7f2956412ac9527be5867aa5c5bca8ecc81db6..e844b9809d27de0aa1447dec920087b6978b8c7c 100644 (file)
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -14,12 +14,7 @@
  
  /* Characters that are undesirable in an MS-DOS file name */
  static unsigned char bad_chars[] = "*?<>|\"";
-static unsigned char bad_if_strict_pc[] = "+=,; ";
-/* GEMDOS is less restrictive */
-static unsigned char bad_if_strict_atari[] = " ";
-
-#define bad_if_strict(opts) \
-       ((opts)->atari ? bad_if_strict_atari : bad_if_strict_pc)
+static unsigned char bad_if_strict[] = "+=,; ";
  
  /***** Formats an MS-DOS file name. Rejects invalid names. */
  static int msdos_format_name(const unsigned char *name, int len,
@@ -40,21 +35,20 @@ static int msdos_format_name(const unsigned char *name, int len,
                         /* Get rid of dot - test for it elsewhere */
                         name++;
                         len--;
-               } else if (!opts->atari)
+               } else
                         return -EINVAL;
         }
         /*
-        * disallow names that _really_ start with a dot for MS-DOS,
-        * GEMDOS does not care
+        * disallow names that _really_ start with a dot
          */
-       space = !opts->atari;
+       space = 1;
         c = 0;
         for (walk = res; len && walk - res < 8; walk++) {
                 c = *name++;
                 len--;
                 if (opts->name_check != 'r' && strchr(bad_chars, c))
                         return -EINVAL;
-               if (opts->name_check == 's' && strchr(bad_if_strict(opts), c))
+               if (opts->name_check == 's' && strchr(bad_if_strict, c))
                         return -EINVAL;
                 if (c >= 'A' && c <= 'Z' && opts->name_check == 's')
                         return -EINVAL;
@@ -94,7 +88,7 @@ static int msdos_format_name(const unsigned char *name, int len,
                         if (opts->name_check != 'r' && strchr(bad_chars, c))
                                 return -EINVAL;
                         if (opts->name_check == 's' &&
-                           strchr(bad_if_strict(opts), c))
+                           strchr(bad_if_strict, c))
                                 return -EINVAL;
                         if (c < ' ' || c == ':' || c == '\\')
                                 return -EINVAL;
@@ -243,6 +237,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
                            int is_dir, int is_hid, int cluster,
                            struct timespec *ts, struct fat_slot_info *sinfo)
  {
+       struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb);
         struct msdos_dir_entry de;
         __le16 time, date;
         int err;
@@ -252,7 +247,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
         if (is_hid)
                 de.attr |= ATTR_HIDDEN;
         de.lcase = 0;
-       fat_date_unix2dos(ts->tv_sec, &time, &date);
+       fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
         de.cdate = de.adate = 0;
         de.ctime = 0;
         de.ctime_cs = 0;
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c

index 6b6225ac4926f8364762ed5c74bda3a5681235b7..15c6faeec77c118a77b11c50287a6a43828bd9a2 100644 (file)
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -19,6 +19,13 @@
  
  #define NFSDDBG_FACILITY               NFSDDBG_LOCKD
  
+#ifdef CONFIG_LOCKD_V4
+#define nlm_stale_fh   nlm4_stale_fh
+#define nlm_failed     nlm4_failed
+#else
+#define nlm_stale_fh   nlm_lck_denied_nolocks
+#define nlm_failed     nlm_lck_denied_nolocks
+#endif
  /*
   * Note: we hold the dentry use count while the file is open.
   */
@@ -47,12 +54,10 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
                 return 0;
         case nfserr_dropit:
                 return nlm_drop_reply;
-#ifdef CONFIG_LOCKD_V4
         case nfserr_stale:
-               return nlm4_stale_fh;
-#endif
+               return nlm_stale_fh;
         default:
-               return nlm_lck_denied;
+               return nlm_failed;
         }
  }
  
diff --git a/fs/partitions/check.c b/fs/partitions/check.c

index efef715135d34e459ce450eb24ca41e4234cf471..7d6b34e201db4a8ba08525f942399eb742a57731 100644 (file)
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -344,18 +344,18 @@ static ssize_t whole_disk_show(struct device *dev,
  static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
                    whole_disk_show, NULL);
  
-void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
+int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
  {
         struct hd_struct *p;
         int err;
  
         p = kzalloc(sizeof(*p), GFP_KERNEL);
         if (!p)
-               return;
+               return -ENOMEM;
  
         if (!init_part_stats(p)) {
-               kfree(p);
-               return;
+               err = -ENOMEM;
+               goto out0;
         }
         p->start_sect = start;
         p->nr_sects = len;
@@ -378,15 +378,31 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
  
         /* delay uevent until 'holders' subdir is created */
         p->dev.uevent_suppress = 1;
-       device_add(&p->dev);
+       err = device_add(&p->dev);
+       if (err)
+               goto out1;
         partition_sysfs_add_subdir(p);
         p->dev.uevent_suppress = 0;
-       if (flags & ADDPART_FLAG_WHOLEDISK)
+       if (flags & ADDPART_FLAG_WHOLEDISK) {
                 err = device_create_file(&p->dev, &dev_attr_whole_disk);
+               if (err)
+                       goto out2;
+       }
  
         /* suppress uevent if the disk supresses it */
         if (!disk->dev.uevent_suppress)
                 kobject_uevent(&p->dev.kobj, KOBJ_ADD);
+
+       return 0;
+
+out2:
+       device_del(&p->dev);
+out1:
+       put_device(&p->dev);
+       free_part_stats(p);
+out0:
+       kfree(p);
+       return err;
  }
  
  /* Not exported, helper to add_disk(). */
@@ -483,10 +499,16 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
                 if (!size)
                         continue;
                 if (from + size > get_capacity(disk)) {
-                       printk(" %s: p%d exceeds device capacity\n",
+                       printk(KERN_ERR " %s: p%d exceeds device capacity\n",
                                 disk->disk_name, p);
+                       continue;
+               }
+               res = add_partition(disk, p, from, size, state->parts[p].flags);
+               if (res) {
+                       printk(KERN_ERR " %s: p%d could not be added: %d\n",
+                               disk->disk_name, p, -res);
+                       continue;
                 }
-               add_partition(disk, p, from, size, state->parts[p].flags);
  #ifdef CONFIG_BLK_DEV_MD
                 if (state->parts[p].flags & ADDPART_FLAG_RAID)
                         md_autodetect_dev(bdev->bd_dev+p);
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c

index e7b07006bc4173c5489c3bb1532f01e0473e4bf1..038a6022152fd15d84fb48b70fab816b971a8f7e 100644 (file)
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -95,13 +95,6 @@
  #include "check.h"
  #include "efi.h"
  
-#undef EFI_DEBUG
-#ifdef EFI_DEBUG
-#define Dprintk(x...) printk(KERN_DEBUG x)
-#else
-#define Dprintk(x...)
-#endif
-
  /* This allows a kernel command line option 'gpt' to override
   * the test for invalid PMBR.  Not __initdata because reloading
   * the partition tables happens after init too.
@@ -305,10 +298,10 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
  
         /* Check the GUID Partition Table signature */
         if (le64_to_cpu((*gpt)->signature) != GPT_HEADER_SIGNATURE) {
-               Dprintk("GUID Partition Table Header signature is wrong:"
-                       "%lld != %lld\n",
-                       (unsigned long long)le64_to_cpu((*gpt)->signature),
-                       (unsigned long long)GPT_HEADER_SIGNATURE);
+               pr_debug("GUID Partition Table Header signature is wrong:"
+                        "%lld != %lld\n",
+                        (unsigned long long)le64_to_cpu((*gpt)->signature),
+                        (unsigned long long)GPT_HEADER_SIGNATURE);
                 goto fail;
         }
  
@@ -318,9 +311,8 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
         crc = efi_crc32((const unsigned char *) (*gpt), le32_to_cpu((*gpt)->header_size));
  
         if (crc != origcrc) {
-               Dprintk
-                   ("GUID Partition Table Header CRC is wrong: %x != %x\n",
-                    crc, origcrc);
+               pr_debug("GUID Partition Table Header CRC is wrong: %x != %x\n",
+                        crc, origcrc);
                 goto fail;
         }
         (*gpt)->header_crc32 = cpu_to_le32(origcrc);
@@ -328,9 +320,9 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
         /* Check that the my_lba entry points to the LBA that contains
          * the GUID Partition Table */
         if (le64_to_cpu((*gpt)->my_lba) != lba) {
-               Dprintk("GPT my_lba incorrect: %lld != %lld\n",
-                       (unsigned long long)le64_to_cpu((*gpt)->my_lba),
-                       (unsigned long long)lba);
+               pr_debug("GPT my_lba incorrect: %lld != %lld\n",
+                        (unsigned long long)le64_to_cpu((*gpt)->my_lba),
+                        (unsigned long long)lba);
                 goto fail;
         }
  
@@ -339,15 +331,15 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
          */
         lastlba = last_lba(bdev);
         if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) {
-               Dprintk("GPT: first_usable_lba incorrect: %lld > %lld\n",
-                       (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba),
-                       (unsigned long long)lastlba);
+               pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n",
+                        (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba),
+                        (unsigned long long)lastlba);
                 goto fail;
         }
         if (le64_to_cpu((*gpt)->last_usable_lba) > lastlba) {
-               Dprintk("GPT: last_usable_lba incorrect: %lld > %lld\n",
-                       (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba),
-                       (unsigned long long)lastlba);
+               pr_debug("GPT: last_usable_lba incorrect: %lld > %lld\n",
+                        (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba),
+                        (unsigned long long)lastlba);
                 goto fail;
         }
  
@@ -360,7 +352,7 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
                         le32_to_cpu((*gpt)->sizeof_partition_entry));
  
         if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) {
-               Dprintk("GUID Partitition Entry Array CRC check failed.\n");
+               pr_debug("GUID Partitition Entry Array CRC check failed.\n");
                 goto fail_ptes;
         }
  
@@ -616,7 +608,7 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev)
                 return 0;
         }
  
-       Dprintk("GUID Partition Table is valid!  Yea!\n");
+       pr_debug("GUID Partition Table is valid!  Yea!\n");
  
         for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) {
                 if (!is_pte_valid(&ptes[i], last_lba(bdev)))
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c

index 0fdda2e8a4cc916648b035a1f962eaf8744797fa..8652fb99e96256e944295ab35ad3970ccf8b7cb1 100644 (file)
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -133,17 +133,17 @@ static bool ldm_parse_privhead(const u8 *data, struct privhead *ph)
         bool is_vista = false;
  
         BUG_ON(!data || !ph);
-       if (MAGIC_PRIVHEAD != BE64(data)) {
+       if (MAGIC_PRIVHEAD != get_unaligned_be64(data)) {
                 ldm_error("Cannot find PRIVHEAD structure. LDM database is"
                         " corrupt. Aborting.");
                 return false;
         }
-       ph->ver_major = BE16(data + 0x000C);
-       ph->ver_minor = BE16(data + 0x000E);
-       ph->logical_disk_start = BE64(data + 0x011B);
-       ph->logical_disk_size = BE64(data + 0x0123);
-       ph->config_start = BE64(data + 0x012B);
-       ph->config_size = BE64(data + 0x0133);
+       ph->ver_major = get_unaligned_be16(data + 0x000C);
+       ph->ver_minor = get_unaligned_be16(data + 0x000E);
+       ph->logical_disk_start = get_unaligned_be64(data + 0x011B);
+       ph->logical_disk_size = get_unaligned_be64(data + 0x0123);
+       ph->config_start = get_unaligned_be64(data + 0x012B);
+       ph->config_size = get_unaligned_be64(data + 0x0133);
         /* Version 2.11 is Win2k/XP and version 2.12 is Vista. */
         if (ph->ver_major == 2 && ph->ver_minor == 12)
                 is_vista = true;
@@ -191,14 +191,14 @@ static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc)
  {
         BUG_ON (!data || !toc);
  
-       if (MAGIC_TOCBLOCK != BE64 (data)) {
+       if (MAGIC_TOCBLOCK != get_unaligned_be64(data)) {
                 ldm_crit ("Cannot find TOCBLOCK, database may be corrupt.");
                 return false;
         }
         strncpy (toc->bitmap1_name, data + 0x24, sizeof (toc->bitmap1_name));
         toc->bitmap1_name[sizeof (toc->bitmap1_name) - 1] = 0;
-       toc->bitmap1_start = BE64 (data + 0x2E);
-       toc->bitmap1_size  = BE64 (data + 0x36);
+       toc->bitmap1_start = get_unaligned_be64(data + 0x2E);
+       toc->bitmap1_size  = get_unaligned_be64(data + 0x36);
  
         if (strncmp (toc->bitmap1_name, TOC_BITMAP1,
                         sizeof (toc->bitmap1_name)) != 0) {
@@ -208,8 +208,8 @@ static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc)
         }
         strncpy (toc->bitmap2_name, data + 0x46, sizeof (toc->bitmap2_name));
         toc->bitmap2_name[sizeof (toc->bitmap2_name) - 1] = 0;
-       toc->bitmap2_start = BE64 (data + 0x50);
-       toc->bitmap2_size  = BE64 (data + 0x58);
+       toc->bitmap2_start = get_unaligned_be64(data + 0x50);
+       toc->bitmap2_size  = get_unaligned_be64(data + 0x58);
         if (strncmp (toc->bitmap2_name, TOC_BITMAP2,
                         sizeof (toc->bitmap2_name)) != 0) {
                 ldm_crit ("TOCBLOCK's second bitmap is '%s', should be '%s'.",
@@ -237,22 +237,22 @@ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
  {
         BUG_ON (!data || !vm);
  
-       if (MAGIC_VMDB != BE32 (data)) {
+       if (MAGIC_VMDB != get_unaligned_be32(data)) {
                 ldm_crit ("Cannot find the VMDB, database may be corrupt.");
                 return false;
         }
  
-       vm->ver_major = BE16 (data + 0x12);
-       vm->ver_minor = BE16 (data + 0x14);
+       vm->ver_major = get_unaligned_be16(data + 0x12);
+       vm->ver_minor = get_unaligned_be16(data + 0x14);
         if ((vm->ver_major != 4) || (vm->ver_minor != 10)) {
                 ldm_error ("Expected VMDB version %d.%d, got %d.%d. "
                         "Aborting.", 4, 10, vm->ver_major, vm->ver_minor);
                 return false;
         }
  
-       vm->vblk_size     = BE32 (data + 0x08);
-       vm->vblk_offset   = BE32 (data + 0x0C);
-       vm->last_vblk_seq = BE32 (data + 0x04);
+       vm->vblk_size     = get_unaligned_be32(data + 0x08);
+       vm->vblk_offset   = get_unaligned_be32(data + 0x0C);
+       vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
  
         ldm_debug ("Parsed VMDB successfully.");
         return true;
@@ -507,7 +507,7 @@ static bool ldm_validate_vmdb (struct block_device *bdev, unsigned long base,
                 goto out;                               /* Already logged */
  
         /* Are there uncommitted transactions? */
-       if (BE16(data + 0x10) != 0x01) {
+       if (get_unaligned_be16(data + 0x10) != 0x01) {
                 ldm_crit ("Database is not in a consistent state.  Aborting.");
                 goto out;
         }
@@ -802,7 +802,7 @@ static bool ldm_parse_cmp3 (const u8 *buffer, int buflen, struct vblk *vb)
                 return false;
  
         len += VBLK_SIZE_CMP3;
-       if (len != BE32 (buffer + 0x14))
+       if (len != get_unaligned_be32(buffer + 0x14))
                 return false;
  
         comp = &vb->vblk.comp;
@@ -851,7 +851,7 @@ static int ldm_parse_dgr3 (const u8 *buffer, int buflen, struct vblk *vb)
                 return false;
  
         len += VBLK_SIZE_DGR3;
-       if (len != BE32 (buffer + 0x14))
+       if (len != get_unaligned_be32(buffer + 0x14))
                 return false;
  
         dgrp = &vb->vblk.dgrp;
@@ -895,7 +895,7 @@ static bool ldm_parse_dgr4 (const u8 *buffer, int buflen, struct vblk *vb)
                 return false;
  
         len += VBLK_SIZE_DGR4;
-       if (len != BE32 (buffer + 0x14))
+       if (len != get_unaligned_be32(buffer + 0x14))
                 return false;
  
         dgrp = &vb->vblk.dgrp;
@@ -931,7 +931,7 @@ static bool ldm_parse_dsk3 (const u8 *buffer, int buflen, struct vblk *vb)
                 return false;
  
         len += VBLK_SIZE_DSK3;
-       if (len != BE32 (buffer + 0x14))
+       if (len != get_unaligned_be32(buffer + 0x14))
                 return false;
  
         disk = &vb->vblk.disk;
@@ -968,7 +968,7 @@ static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb)
                 return false;
  
         len += VBLK_SIZE_DSK4;
-       if (len != BE32 (buffer + 0x14))
+       if (len != get_unaligned_be32(buffer + 0x14))
                 return false;
  
         disk = &vb->vblk.disk;
@@ -1034,14 +1034,14 @@ static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb)
                 return false;
         }
         len += VBLK_SIZE_PRT3;
-       if (len > BE32(buffer + 0x14)) {
+       if (len > get_unaligned_be32(buffer + 0x14)) {
                 ldm_error("len %d > BE32(buffer + 0x14) %d", len,
-                               BE32(buffer + 0x14));
+                               get_unaligned_be32(buffer + 0x14));
                 return false;
         }
         part = &vb->vblk.part;
-       part->start = BE64(buffer + 0x24 + r_name);
-       part->volume_offset = BE64(buffer + 0x2C + r_name);
+       part->start = get_unaligned_be64(buffer + 0x24 + r_name);
+       part->volume_offset = get_unaligned_be64(buffer + 0x2C + r_name);
         part->size = ldm_get_vnum(buffer + 0x34 + r_name);
         part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size);
         part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent);
@@ -1139,9 +1139,9 @@ static bool ldm_parse_vol5(const u8 *buffer, int buflen, struct vblk *vb)
                 return false;
         }
         len += VBLK_SIZE_VOL5;
-       if (len > BE32(buffer + 0x14)) {
+       if (len > get_unaligned_be32(buffer + 0x14)) {
                 ldm_error("len %d > BE32(buffer + 0x14) %d", len,
-                               BE32(buffer + 0x14));
+                               get_unaligned_be32(buffer + 0x14));
                 return false;
         }
         volu = &vb->vblk.volu;
@@ -1294,9 +1294,9 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
  
         BUG_ON (!data || !frags);
  
-       group = BE32 (data + 0x08);
-       rec   = BE16 (data + 0x0C);
-       num   = BE16 (data + 0x0E);
+       group = get_unaligned_be32(data + 0x08);
+       rec   = get_unaligned_be16(data + 0x0C);
+       num   = get_unaligned_be16(data + 0x0E);
         if ((num < 1) || (num > 4)) {
                 ldm_error ("A VBLK claims to have %d parts.", num);
                 return false;
@@ -1425,12 +1425,12 @@ static bool ldm_get_vblks (struct block_device *bdev, unsigned long base,
                 }
  
                 for (v = 0; v < perbuf; v++, data+=size) {  /* For each vblk */
-                       if (MAGIC_VBLK != BE32 (data)) {
+                       if (MAGIC_VBLK != get_unaligned_be32(data)) {
                                 ldm_error ("Expected to find a VBLK.");
                                 goto out;
                         }
  
-                       recs = BE16 (data + 0x0E);      /* Number of records */
+                       recs = get_unaligned_be16(data + 0x0E); /* Number of records */
                         if (recs == 1) {
                                 if (!ldm_ldmdb_add (data, size, ldb))
                                         goto out;       /* Already logged */
diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h

index 80f63b5fdd9fe294db9af77ae8d183e0a9449d3a..30e08e809c1dd7867801357ce71c12ecc028f132 100644 (file)
--- a/fs/partitions/ldm.h
+++ b/fs/partitions/ldm.h
@@ -98,11 +98,6 @@ struct parsed_partitions;
  #define TOC_BITMAP1            "config"        /* Names of the two defined */
  #define TOC_BITMAP2            "log"           /* bitmaps in the TOCBLOCK. */
  
-/* Most numbers we deal with are big-endian and won't be aligned. */
-#define BE16(x)                        ((u16)be16_to_cpu(get_unaligned((__be16*)(x))))
-#define BE32(x)                        ((u32)be32_to_cpu(get_unaligned((__be32*)(x))))
-#define BE64(x)                        ((u64)be64_to_cpu(get_unaligned((__be64*)(x))))
-
  /* Borrowed from msdos.c */
  #define SYS_IND(p)             (get_unaligned(&(p)->sys_ind))
  
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig

new file mode 100644 (file)

index 0000000..73cd7a4
--- /dev/null
+++ b/fs/proc/Kconfig
@@ -0,0 +1,59 @@
+config PROC_FS
+       bool "/proc file system support" if EMBEDDED
+       default y
+       help
+         This is a virtual file system providing information about the status
+         of the system. "Virtual" means that it doesn't take up any space on
+         your hard disk: the files are created on the fly by the kernel when
+         you try to access them. Also, you cannot read the files with older
+         version of the program less: you need to use more or cat.
+
+         It's totally cool; for example, "cat /proc/interrupts" gives
+         information about what the different IRQs are used for at the moment
+         (there is a small number of Interrupt ReQuest lines in your computer
+         that are used by the attached devices to gain the CPU's attention --
+         often a source of trouble if two devices are mistakenly configured
+         to use the same IRQ). The program procinfo to display some
+         information about your system gathered from the /proc file system.
+
+         Before you can use the /proc file system, it has to be mounted,
+         meaning it has to be given a location in the directory hierarchy.
+         That location should be /proc. A command such as "mount -t proc proc
+         /proc" or the equivalent line in /etc/fstab does the job.
+
+         The /proc file system is explained in the file
+         <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
+         ("man 5 proc").
+
+         This option will enlarge your kernel by about 67 KB. Several
+         programs depend on this, so everyone should say Y here.
+
+config PROC_KCORE
+       bool "/proc/kcore support" if !ARM
+       depends on PROC_FS && MMU
+
+config PROC_VMCORE
+        bool "/proc/vmcore support (EXPERIMENTAL)"
+        depends on PROC_FS && CRASH_DUMP
+       default y
+        help
+        Exports the dump image of crashed kernel in ELF format.
+
+config PROC_SYSCTL
+       bool "Sysctl support (/proc/sys)" if EMBEDDED
+       depends on PROC_FS
+       select SYSCTL
+       default y
+       ---help---
+         The sysctl interface provides a means of dynamically changing
+         certain kernel parameters and variables on the fly without requiring
+         a recompile of the kernel or reboot of the system.  The primary
+         interface is through /proc/sys.  If you say Y here a tree of
+         modifiable sysctl entries will be generated beneath the
+          /proc/sys directory. They are explained in the files
+         in <file:Documentation/sysctl/>.  Note that enabling this
+         option will enlarge the kernel by at least 8 KB.
+
+         As it is generally a good thing, you should say Y here unless
+         building a kernel for install/rescue disks or your system is very
+         limited in memory.
diff --git a/fs/proc/base.c b/fs/proc/base.c

index 58c3e6a8e15e160ec3d6af3aabe6138b666b00b8..a891fe4cb43bf34e0becd16e642adfc137c0d4e3 100644 (file)
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2376,29 +2376,82 @@ static int proc_base_fill_cache(struct file *filp, void *dirent,
  }
  
  #ifdef CONFIG_TASK_IO_ACCOUNTING
-static int proc_pid_io_accounting(struct task_struct *task, char *buffer)
-{
+static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
+{
+       u64 rchar, wchar, syscr, syscw;
+       struct task_io_accounting ioac;
+
+       if (!whole) {
+               rchar = task->rchar;
+               wchar = task->wchar;
+               syscr = task->syscr;
+               syscw = task->syscw;
+               memcpy(&ioac, &task->ioac, sizeof(ioac));
+       } else {
+               unsigned long flags;
+               struct task_struct *t = task;
+               rchar = wchar = syscr = syscw = 0;
+               memset(&ioac, 0, sizeof(ioac));
+
+               rcu_read_lock();
+               do {
+                       rchar += t->rchar;
+                       wchar += t->wchar;
+                       syscr += t->syscr;
+                       syscw += t->syscw;
+
+                       ioac.read_bytes += t->ioac.read_bytes;
+                       ioac.write_bytes += t->ioac.write_bytes;
+                       ioac.cancelled_write_bytes +=
+                                       t->ioac.cancelled_write_bytes;
+                       t = next_thread(t);
+               } while (t != task);
+               rcu_read_unlock();
+
+               if (lock_task_sighand(task, &flags)) {
+                       struct signal_struct *sig = task->signal;
+
+                       rchar += sig->rchar;
+                       wchar += sig->wchar;
+                       syscr += sig->syscr;
+                       syscw += sig->syscw;
+
+                       ioac.read_bytes += sig->ioac.read_bytes;
+                       ioac.write_bytes += sig->ioac.write_bytes;
+                       ioac.cancelled_write_bytes +=
+                                       sig->ioac.cancelled_write_bytes;
+
+                       unlock_task_sighand(task, &flags);
+               }
+       }
+
         return sprintf(buffer,
-#ifdef CONFIG_TASK_XACCT
                         "rchar: %llu\n"
                         "wchar: %llu\n"
                         "syscr: %llu\n"
                         "syscw: %llu\n"
-#endif
                         "read_bytes: %llu\n"
                         "write_bytes: %llu\n"
                         "cancelled_write_bytes: %llu\n",
-#ifdef CONFIG_TASK_XACCT
-                       (unsigned long long)task->rchar,
-                       (unsigned long long)task->wchar,
-                       (unsigned long long)task->syscr,
-                       (unsigned long long)task->syscw,
-#endif
-                       (unsigned long long)task->ioac.read_bytes,
-                       (unsigned long long)task->ioac.write_bytes,
-                       (unsigned long long)task->ioac.cancelled_write_bytes);
+                       (unsigned long long)rchar,
+                       (unsigned long long)wchar,
+                       (unsigned long long)syscr,
+                       (unsigned long long)syscw,
+                       (unsigned long long)ioac.read_bytes,
+                       (unsigned long long)ioac.write_bytes,
+                       (unsigned long long)ioac.cancelled_write_bytes);
+}
+
+static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
+{
+       return do_io_accounting(task, buffer, 0);
  }
-#endif
+
+static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
+{
+       return do_io_accounting(task, buffer, 1);
+}
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
  
  /*
   * Thread groups
@@ -2470,7 +2523,7 @@ static const struct pid_entry tgid_base_stuff[] = {
         REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter),
  #endif
  #ifdef CONFIG_TASK_IO_ACCOUNTING
-       INF("io",       S_IRUGO, pid_io_accounting),
+       INF("io",       S_IRUGO, tgid_io_accounting),
  #endif
  };
  
@@ -2797,6 +2850,9 @@ static const struct pid_entry tid_base_stuff[] = {
  #ifdef CONFIG_FAULT_INJECTION
         REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
  #endif
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+       INF("io",       S_IRUGO, tid_io_accounting),
+#endif
  };
  
  static int proc_tid_base_readdir(struct file * filp,
diff --git a/fs/proc/generic.c b/fs/proc/generic.c

index 43e54e86cefd995aee7fc01d3e32515283472123..bc0a0dd2d8447a1281ec125148c5a4e3323bb697 100644 (file)
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -597,6 +597,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
         ent->pde_users = 0;
         spin_lock_init(&ent->pde_unload_lock);
         ent->pde_unload_completion = NULL;
+       INIT_LIST_HEAD(&ent->pde_openers);
   out:
         return ent;
  }
@@ -789,6 +790,19 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
         spin_unlock(&de->pde_unload_lock);
  
  continue_removing:
+       spin_lock(&de->pde_unload_lock);
+       while (!list_empty(&de->pde_openers)) {
+               struct pde_opener *pdeo;
+
+               pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
+               list_del(&pdeo->lh);
+               spin_unlock(&de->pde_unload_lock);
+               pdeo->release(pdeo->inode, pdeo->file);
+               kfree(pdeo);
+               spin_lock(&de->pde_unload_lock);
+       }
+       spin_unlock(&de->pde_unload_lock);
+
         if (S_ISDIR(de->mode))
                 parent->nlink--;
         de->nlink = 0;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c

index b08d10017911f43927646943307e26238e4fe7dc..02eca2ed9dd7a674c10538c6245e7f5e38bedf69 100644 (file)
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -111,27 +111,25 @@ int __init proc_init_inodecache(void)
         return 0;
  }
  
-static int proc_remount(struct super_block *sb, int *flags, char *data)
-{
-       *flags |= MS_NODIRATIME;
-       return 0;
-}
-
  static const struct super_operations proc_sops = {
         .alloc_inode    = proc_alloc_inode,
         .destroy_inode  = proc_destroy_inode,
         .drop_inode     = generic_delete_inode,
         .delete_inode   = proc_delete_inode,
         .statfs         = simple_statfs,
-       .remount_fs     = proc_remount,
  };
  
-static void pde_users_dec(struct proc_dir_entry *pde)
+static void __pde_users_dec(struct proc_dir_entry *pde)
  {
-       spin_lock(&pde->pde_unload_lock);
         pde->pde_users--;
         if (pde->pde_unload_completion && pde->pde_users == 0)
                 complete(pde->pde_unload_completion);
+}
+
+static void pde_users_dec(struct proc_dir_entry *pde)
+{
+       spin_lock(&pde->pde_unload_lock);
+       __pde_users_dec(pde);
         spin_unlock(&pde->pde_unload_lock);
  }
  
@@ -318,36 +316,97 @@ static int proc_reg_open(struct inode *inode, struct file *file)
         struct proc_dir_entry *pde = PDE(inode);
         int rv = 0;
         int (*open)(struct inode *, struct file *);
+       int (*release)(struct inode *, struct file *);
+       struct pde_opener *pdeo;
+
+       /*
+        * What for, you ask? Well, we can have open, rmmod, remove_proc_entry
+        * sequence. ->release won't be called because ->proc_fops will be
+        * cleared. Depending on complexity of ->release, consequences vary.
+        *
+        * We can't wait for mercy when close will be done for real, it's
+        * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release
+        * by hand in remove_proc_entry(). For this, save opener's credentials
+        * for later.
+        */
+       pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
+       if (!pdeo)
+               return -ENOMEM;
  
         spin_lock(&pde->pde_unload_lock);
         if (!pde->proc_fops) {
                 spin_unlock(&pde->pde_unload_lock);
+               kfree(pdeo);
                 return rv;
         }
         pde->pde_users++;
         open = pde->proc_fops->open;
+       release = pde->proc_fops->release;
         spin_unlock(&pde->pde_unload_lock);
  
         if (open)
                 rv = open(inode, file);
  
-       pde_users_dec(pde);
+       spin_lock(&pde->pde_unload_lock);
+       if (rv == 0 && release) {
+               /* To know what to release. */
+               pdeo->inode = inode;
+               pdeo->file = file;
+               /* Strictly for "too late" ->release in proc_reg_release(). */
+               pdeo->release = release;
+               list_add(&pdeo->lh, &pde->pde_openers);
+       } else
+               kfree(pdeo);
+       __pde_users_dec(pde);
+       spin_unlock(&pde->pde_unload_lock);
         return rv;
  }
  
+static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde,
+                                       struct inode *inode, struct file *file)
+{
+       struct pde_opener *pdeo;
+
+       list_for_each_entry(pdeo, &pde->pde_openers, lh) {
+               if (pdeo->inode == inode && pdeo->file == file)
+                       return pdeo;
+       }
+       return NULL;
+}
+
  static int proc_reg_release(struct inode *inode, struct file *file)
  {
         struct proc_dir_entry *pde = PDE(inode);
         int rv = 0;
         int (*release)(struct inode *, struct file *);
+       struct pde_opener *pdeo;
  
         spin_lock(&pde->pde_unload_lock);
+       pdeo = find_pde_opener(pde, inode, file);
         if (!pde->proc_fops) {
-               spin_unlock(&pde->pde_unload_lock);
+               /*
+                * Can't simply exit, __fput() will think that everything is OK,
+                * and move on to freeing struct file. remove_proc_entry() will
+                * find slacker in opener's list and will try to do non-trivial
+                * things with struct file. Therefore, remove opener from list.
+                *
+                * But if opener is removed from list, who will ->release it?
+                */
+               if (pdeo) {
+                       list_del(&pdeo->lh);
+                       spin_unlock(&pde->pde_unload_lock);
+                       rv = pdeo->release(inode, file);
+                       kfree(pdeo);
+               } else
+                       spin_unlock(&pde->pde_unload_lock);
                 return rv;
         }
         pde->pde_users++;
         release = pde->proc_fops->release;
+       if (pdeo) {
+               list_del(&pdeo->lh);
+               kfree(pdeo);
+       }
         spin_unlock(&pde->pde_unload_lock);
  
         if (release)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h

index 28cbca8059057e5f1b7729b69743aea11cecc27c..442202314d5322292f87328b3e62b5427b3fa77a 100644 (file)
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -63,6 +63,7 @@ extern const struct file_operations proc_smaps_operations;
  extern const struct file_operations proc_clear_refs_operations;
  extern const struct file_operations proc_pagemap_operations;
  extern const struct file_operations proc_net_operations;
+extern const struct file_operations proc_kmsg_operations;
  extern const struct inode_operations proc_net_inode_operations;
  
  void free_proc_entry(struct proc_dir_entry *de);
@@ -88,3 +89,10 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
                 struct dentry *dentry);
  int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
                 filldir_t filldir);
+
+struct pde_opener {
+       struct inode *inode;
+       struct file *file;
+       int (*release)(struct inode *, struct file *);
+       struct list_head lh;
+};
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c

index e78c81fcf547981f3aa99c7ebeca831af15e98f5..c2370c76fb711d6c7c32fba6ec34ffac437d7b65 100644 (file)
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -23,6 +23,10 @@
  
  #define CORE_STR "CORE"
  
+#ifndef ELF_CORE_EFLAGS
+#define ELF_CORE_EFLAGS        0
+#endif
+
  static int open_kcore(struct inode * inode, struct file * filp)
  {
         return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
@@ -164,11 +168,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
         elf->e_entry    = 0;
         elf->e_phoff    = sizeof(struct elfhdr);
         elf->e_shoff    = 0;
-#if defined(CONFIG_H8300)
-       elf->e_flags    = ELF_FLAGS;
-#else
-       elf->e_flags    = 0;
-#endif
+       elf->e_flags    = ELF_CORE_EFLAGS;
         elf->e_ehsize   = sizeof(struct elfhdr);
         elf->e_phentsize= sizeof(struct elf_phdr);
         elf->e_phnum    = nphdr;
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c

index ff3b90b56e9d635546fe71868ffdc473632c5352..9fd5df3f40ce7d0dcc4f78559fdd5eb2c521da4f 100644 (file)
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -15,6 +15,8 @@
  #include <asm/uaccess.h>
  #include <asm/io.h>
  
+#include "internal.h"
+
  extern wait_queue_head_t log_wait;
  
  extern int do_syslog(int type, char __user *bug, int count);
diff --git a/fs/quota.c b/fs/quota.c

index db1cc9f3c7aa3a24be8439c632f77228e5e266ab..7f4386ebc23a26b98f97d5088ecff7c79369fe17 100644 (file)
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -186,7 +186,7 @@ static void quota_sync_sb(struct super_block *sb, int type)
  
  void sync_dquots(struct super_block *sb, int type)
  {
-       int cnt, dirty;
+       int cnt;
  
         if (sb) {
                 if (sb->s_qcop->quota_sync)
@@ -198,11 +198,17 @@ void sync_dquots(struct super_block *sb, int type)
  restart:
         list_for_each_entry(sb, &super_blocks, s_list) {
                 /* This test just improves performance so it needn't be reliable... */
-               for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
-                       if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
-                           && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
-                               dirty = 1;
-               if (!dirty)
+               for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+                       if (type != -1 && type != cnt)
+                               continue;
+                       if (!sb_has_quota_enabled(sb, cnt))
+                               continue;
+                       if (!info_dirty(&sb_dqopt(sb)->info[cnt]) &&
+                           list_empty(&sb_dqopt(sb)->info[cnt].dqi_dirty_list))
+                               continue;
+                       break;
+               }
+               if (cnt == MAXQUOTAS)
                         continue;
                 sb->s_count++;
                 spin_unlock(&sb_lock);
diff --git a/fs/quota_v1.c b/fs/quota_v1.c

index a6cf9269105c478e7a88db2ca6b074b8bcc0933d..5ae15b13eeb00217b529b336349736604377c472 100644 (file)
--- a/fs/quota_v1.c
+++ b/fs/quota_v1.c
@@ -1,6 +1,7 @@
  #include <linux/errno.h>
  #include <linux/fs.h>
  #include <linux/quota.h>
+#include <linux/quotaops.h>
  #include <linux/dqblk_v1.h>
  #include <linux/quotaio_v1.h>
  #include <linux/kernel.h>
diff --git a/fs/quota_v2.c b/fs/quota_v2.c

index 234ada90363343ad668eff8213dd0927af7aa6f2..b53827dc02d9081c50e0d0ed0acb971c2ad931b8 100644 (file)
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -11,6 +11,7 @@
  #include <linux/init.h>
  #include <linux/module.h>
  #include <linux/slab.h>
+#include <linux/quotaops.h>
  
  #include <asm/byteorder.h>
  
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c

index e396b2fa4743e66b32d3ea4ad740aa403266fa40..c8f60ee183b5b53d4799116e776f990ba6bf2d93 100644 (file)
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -34,15 +34,10 @@
  **                     from within kupdate, it will ignore the immediate flag
  */
  
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
  #include <linux/time.h>
  #include <linux/semaphore.h>
-
  #include <linux/vmalloc.h>
  #include <linux/reiserfs_fs.h>
-
  #include <linux/kernel.h>
  #include <linux/errno.h>
  #include <linux/fcntl.h>
@@ -54,6 +49,9 @@
  #include <linux/writeback.h>
  #include <linux/blkdev.h>
  #include <linux/backing-dev.h>
+#include <linux/uaccess.h>
+
+#include <asm/system.h>
  
  /* gets a struct reiserfs_journal_list * from a list head */
  #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
@@ -558,13 +556,13 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
  static inline void lock_journal(struct super_block *p_s_sb)
  {
         PROC_INFO_INC(p_s_sb, journal.lock_journal);
-       down(&SB_JOURNAL(p_s_sb)->j_lock);
+       mutex_lock(&SB_JOURNAL(p_s_sb)->j_mutex);
  }
  
  /* unlock the current transaction */
  static inline void unlock_journal(struct super_block *p_s_sb)
  {
-       up(&SB_JOURNAL(p_s_sb)->j_lock);
+       mutex_unlock(&SB_JOURNAL(p_s_sb)->j_mutex);
  }
  
  static inline void get_journal_list(struct reiserfs_journal_list *jl)
@@ -1045,9 +1043,9 @@ static int flush_commit_list(struct super_block *s,
         }
  
         /* make sure nobody is trying to flush this one at the same time */
-       down(&jl->j_commit_lock);
+       mutex_lock(&jl->j_commit_mutex);
         if (!journal_list_still_alive(s, trans_id)) {
-               up(&jl->j_commit_lock);
+               mutex_unlock(&jl->j_commit_mutex);
                 goto put_jl;
         }
         BUG_ON(jl->j_trans_id == 0);
@@ -1057,7 +1055,7 @@ static int flush_commit_list(struct super_block *s,
                 if (flushall) {
                         atomic_set(&(jl->j_older_commits_done), 1);
                 }
-               up(&jl->j_commit_lock);
+               mutex_unlock(&jl->j_commit_mutex);
                 goto put_jl;
         }
  
@@ -1181,7 +1179,7 @@ static int flush_commit_list(struct super_block *s,
         if (flushall) {
                 atomic_set(&(jl->j_older_commits_done), 1);
         }
-       up(&jl->j_commit_lock);
+       mutex_unlock(&jl->j_commit_mutex);
        put_jl:
         put_journal_list(s, jl);
  
@@ -1411,8 +1409,8 @@ static int flush_journal_list(struct super_block *s,
  
         /* if flushall == 0, the lock is already held */
         if (flushall) {
-               down(&journal->j_flush_sem);
-       } else if (!down_trylock(&journal->j_flush_sem)) {
+               mutex_lock(&journal->j_flush_mutex);
+       } else if (mutex_trylock(&journal->j_flush_mutex)) {
                 BUG();
         }
  
@@ -1642,7 +1640,7 @@ static int flush_journal_list(struct super_block *s,
         jl->j_state = 0;
         put_journal_list(s, jl);
         if (flushall)
-               up(&journal->j_flush_sem);
+               mutex_unlock(&journal->j_flush_mutex);
         put_fs_excl();
         return err;
  }
@@ -1772,12 +1770,12 @@ static int kupdate_transactions(struct super_block *s,
         struct reiserfs_journal *journal = SB_JOURNAL(s);
         chunk.nr = 0;
  
-       down(&journal->j_flush_sem);
+       mutex_lock(&journal->j_flush_mutex);
         if (!journal_list_still_alive(s, orig_trans_id)) {
                 goto done;
         }
  
-       /* we've got j_flush_sem held, nobody is going to delete any
+       /* we've got j_flush_mutex held, nobody is going to delete any
          * of these lists out from underneath us
          */
         while ((num_trans && transactions_flushed < num_trans) ||
@@ -1812,7 +1810,7 @@ static int kupdate_transactions(struct super_block *s,
         }
  
        done:
-       up(&journal->j_flush_sem);
+       mutex_unlock(&journal->j_flush_mutex);
         return ret;
  }
  
@@ -2556,7 +2554,7 @@ static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
         INIT_LIST_HEAD(&jl->j_working_list);
         INIT_LIST_HEAD(&jl->j_tail_bh_list);
         INIT_LIST_HEAD(&jl->j_bh_list);
-       sema_init(&jl->j_commit_lock, 1);
+       mutex_init(&jl->j_commit_mutex);
         SB_JOURNAL(s)->j_num_lists++;
         get_journal_list(jl);
         return jl;
@@ -2837,8 +2835,8 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
         journal->j_last = NULL;
         journal->j_first = NULL;
         init_waitqueue_head(&(journal->j_join_wait));
-       sema_init(&journal->j_lock, 1);
-       sema_init(&journal->j_flush_sem, 1);
+       mutex_init(&journal->j_mutex);
+       mutex_init(&journal->j_flush_mutex);
  
         journal->j_trans_id = 10;
         journal->j_mount_id = 10;
@@ -4030,7 +4028,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
          * the new transaction is fully setup, and we've already flushed the
          * ordered bh list
          */
-       down(&jl->j_commit_lock);
+       mutex_lock(&jl->j_commit_mutex);
  
         /* save the transaction id in case we need to commit it later */
         commit_trans_id = jl->j_trans_id;
@@ -4196,7 +4194,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
                 lock_kernel();
         }
         BUG_ON(!list_empty(&jl->j_tail_bh_list));
-       up(&jl->j_commit_lock);
+       mutex_unlock(&jl->j_commit_mutex);
  
         /* honor the flush wishes from the caller, simple commits can
          ** be done outside the journal lock, they are done below
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c

index 1d40f2bd197081284dce1ff34c6d1f828082304f..2ec748ba0bd32f101bd11056402bd48fab4f0735 100644 (file)
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -22,6 +22,7 @@
  #include <linux/blkdev.h>
  #include <linux/buffer_head.h>
  #include <linux/exportfs.h>
+#include <linux/quotaops.h>
  #include <linux/vfs.h>
  #include <linux/mnt_namespace.h>
  #include <linux/mount.h>
@@ -182,7 +183,7 @@ static int finish_unfinished(struct super_block *s)
                         int ret = reiserfs_quota_on_mount(s, i);
                         if (ret < 0)
                                 reiserfs_warning(s,
-                                                "reiserfs: cannot turn on journalled quota: error %d",
+                                                "reiserfs: cannot turn on journaled quota: error %d",
                                                  ret);
                 }
         }
@@ -876,7 +877,9 @@ static int reiserfs_parse_options(struct super_block *s, char *options,     /* strin
                                      mount options were selected. */
                                   unsigned long *blocks,        /* strtol-ed from NNN of resize=NNN */
                                   char **jdev_name,
-                                 unsigned int *commit_max_age)
+                                 unsigned int *commit_max_age,
+                                 char **qf_names,
+                                 unsigned int *qfmt)
  {
         int c;
         char *arg = NULL;
@@ -992,9 +995,11 @@ static int reiserfs_parse_options(struct super_block *s, char *options,    /* strin
                 if (c == 'u' || c == 'g') {
                         int qtype = c == 'u' ? USRQUOTA : GRPQUOTA;
  
-                       if (sb_any_quota_enabled(s)) {
+                       if ((sb_any_quota_enabled(s) ||
+                            sb_any_quota_suspended(s)) &&
+                           (!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) {
                                 reiserfs_warning(s,
-                                                "reiserfs_parse_options: cannot change journalled quota options when quota turned on.");
+                                                "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
                                 return 0;
                         }
                         if (*arg) {     /* Some filename specified? */
@@ -1011,46 +1016,54 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
                                                          "reiserfs_parse_options: quotafile must be on filesystem root.");
                                         return 0;
                                 }
-                               REISERFS_SB(s)->s_qf_names[qtype] =
+                               qf_names[qtype] =
                                     kmalloc(strlen(arg) + 1, GFP_KERNEL);
-                               if (!REISERFS_SB(s)->s_qf_names[qtype]) {
+                               if (!qf_names[qtype]) {
                                         reiserfs_warning(s,
                                                          "reiserfs_parse_options: not enough memory for storing quotafile name.");
                                         return 0;
                                 }
-                               strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg);
+                               strcpy(qf_names[qtype], arg);
                                 *mount_options |= 1 << REISERFS_QUOTA;
                         } else {
-                               kfree(REISERFS_SB(s)->s_qf_names[qtype]);
-                               REISERFS_SB(s)->s_qf_names[qtype] = NULL;
+                               if (qf_names[qtype] !=
+                                   REISERFS_SB(s)->s_qf_names[qtype])
+                                       kfree(qf_names[qtype]);
+                               qf_names[qtype] = NULL;
                         }
                 }
                 if (c == 'f') {
                         if (!strcmp(arg, "vfsold"))
-                               REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_OLD;
+                               *qfmt = QFMT_VFS_OLD;
                         else if (!strcmp(arg, "vfsv0"))
-                               REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_V0;
+                               *qfmt = QFMT_VFS_V0;
                         else {
                                 reiserfs_warning(s,
                                                  "reiserfs_parse_options: unknown quota format specified.");
                                 return 0;
                         }
+                       if ((sb_any_quota_enabled(s) ||
+                            sb_any_quota_suspended(s)) &&
+                           *qfmt != REISERFS_SB(s)->s_jquota_fmt) {
+                               reiserfs_warning(s,
+                                                "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
+                               return 0;
+                       }
                 }
  #else
                 if (c == 'u' || c == 'g' || c == 'f') {
                         reiserfs_warning(s,
-                                        "reiserfs_parse_options: journalled quota options not supported.");
+                                        "reiserfs_parse_options: journaled quota options not supported.");
                         return 0;
                 }
  #endif
         }
  
  #ifdef CONFIG_QUOTA
-       if (!REISERFS_SB(s)->s_jquota_fmt
-           && (REISERFS_SB(s)->s_qf_names[USRQUOTA]
-               || REISERFS_SB(s)->s_qf_names[GRPQUOTA])) {
+       if (!REISERFS_SB(s)->s_jquota_fmt && !*qfmt
+           && (qf_names[USRQUOTA] || qf_names[GRPQUOTA])) {
                 reiserfs_warning(s,
-                                "reiserfs_parse_options: journalled quota format not specified.");
+                                "reiserfs_parse_options: journaled quota format not specified.");
                 return 0;
         }
         /* This checking is not precise wrt the quota type but for our purposes it is sufficient */
@@ -1130,6 +1143,21 @@ static void handle_attrs(struct super_block *s)
         }
  }
  
+#ifdef CONFIG_QUOTA
+static void handle_quota_files(struct super_block *s, char **qf_names,
+                              unsigned int *qfmt)
+{
+       int i;
+
+       for (i = 0; i < MAXQUOTAS; i++) {
+               if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i])
+                       kfree(REISERFS_SB(s)->s_qf_names[i]);
+               REISERFS_SB(s)->s_qf_names[i] = qf_names[i];
+       }
+       REISERFS_SB(s)->s_jquota_fmt = *qfmt;
+}
+#endif
+
  static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
  {
         struct reiserfs_super_block *rs;
@@ -1141,23 +1169,30 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
         struct reiserfs_journal *journal = SB_JOURNAL(s);
         char *new_opts = kstrdup(arg, GFP_KERNEL);
         int err;
+       char *qf_names[MAXQUOTAS];
+       unsigned int qfmt = 0;
  #ifdef CONFIG_QUOTA
         int i;
+
+       memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names));
  #endif
  
         rs = SB_DISK_SUPER_BLOCK(s);
  
         if (!reiserfs_parse_options
-           (s, arg, &mount_options, &blocks, NULL, &commit_max_age)) {
+           (s, arg, &mount_options, &blocks, NULL, &commit_max_age,
+           qf_names, &qfmt)) {
  #ifdef CONFIG_QUOTA
-               for (i = 0; i < MAXQUOTAS; i++) {
-                       kfree(REISERFS_SB(s)->s_qf_names[i]);
-                       REISERFS_SB(s)->s_qf_names[i] = NULL;
-               }
+               for (i = 0; i < MAXQUOTAS; i++)
+                       if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i])
+                               kfree(qf_names[i]);
  #endif
                 err = -EINVAL;
                 goto out_err;
         }
+#ifdef CONFIG_QUOTA
+       handle_quota_files(s, qf_names, &qfmt);
+#endif
  
         handle_attrs(s);
  
@@ -1570,6 +1605,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
         char *jdev_name;
         struct reiserfs_sb_info *sbi;
         int errval = -EINVAL;
+       char *qf_names[MAXQUOTAS] = {};
+       unsigned int qfmt = 0;
  
         save_mount_options(s, data);
  
@@ -1597,9 +1634,12 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
         jdev_name = NULL;
         if (reiserfs_parse_options
             (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
-            &commit_max_age) == 0) {
+            &commit_max_age, qf_names, &qfmt) == 0) {
                 goto error;
         }
+#ifdef CONFIG_QUOTA
+       handle_quota_files(s, qf_names, &qfmt);
+#endif
  
         if (blocks) {
                 SWARN(silent, s, "jmacd-7: reiserfs_fill_super: resize option "
@@ -1819,7 +1859,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
  
         return (0);
  
-      error:
+error:
         if (jinit_done) {       /* kill the commit thread, free journal ram */
                 journal_release_error(NULL, s);
         }
@@ -1830,10 +1870,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
  #ifdef CONFIG_QUOTA
         {
                 int j;
-               for (j = 0; j < MAXQUOTAS; j++) {
-                       kfree(sbi->s_qf_names[j]);
-                       sbi->s_qf_names[j] = NULL;
-               }
+               for (j = 0; j < MAXQUOTAS; j++)
+                       kfree(qf_names[j]);
         }
  #endif
         kfree(sbi);
@@ -1980,7 +2018,7 @@ static int reiserfs_release_dquot(struct dquot *dquot)
  
  static int reiserfs_mark_dquot_dirty(struct dquot *dquot)
  {
-       /* Are we journalling quotas? */
+       /* Are we journaling quotas? */
         if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
             REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
                 dquot_mark_dquot_dirty(dquot);
@@ -2026,6 +2064,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
         int err;
         struct nameidata nd;
         struct inode *inode;
+       struct reiserfs_transaction_handle th;
  
         if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA)))
                 return -EINVAL;
@@ -2053,17 +2092,28 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
                 }
                 mark_inode_dirty(inode);
         }
-       /* Not journalling quota? No more tests needed... */
-       if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] &&
-           !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) {
-               path_put(&nd.path);
-               return vfs_quota_on(sb, type, format_id, path, 0);
-       }
-       /* Quotafile not of fs root? */
-       if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
-               reiserfs_warning(sb,
+       /* Journaling quota? */
+       if (REISERFS_SB(sb)->s_qf_names[type]) {
+               /* Quotafile not of fs root? */
+               if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
+                       reiserfs_warning(sb,
                                  "reiserfs: Quota file not on filesystem root. "
                                  "Journalled quota will not work.");
+       }
+
+       /*
+        * When we journal data on quota file, we have to flush journal to see
+        * all updates to the file when we bypass pagecache...
+        */
+       if (reiserfs_file_data_log(inode)) {
+               /* Just start temporary transaction and finish it */
+               err = journal_begin(&th, sb, 1);
+               if (err)
+                       return err;
+               err = journal_end_sync(&th, sb, 1);
+               if (err)
+                       return err;
+       }
         path_put(&nd.path);
         return vfs_quota_on(sb, type, format_id, path, 0);
  }
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c

index 5e90a95ad60b5208d22cac6c0722fd1494af3e8a..056008db13775ccf235b6b2b3822f66efe9fb825 100644 (file)
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -6,8 +6,6 @@
  #include <linux/reiserfs_xattr.h>
  #include <asm/uaccess.h>
  
-#define XATTR_SECURITY_PREFIX "security."
-
  static int
  security_get(struct inode *inode, const char *name, void *buffer, size_t size)
  {
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c

index 024a938ca60f1a77238703f648f78733f64f8df4..60abe2bb1f980bbb11de6d2b161ea16946bc595a 100644 (file)
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -7,8 +7,6 @@
  #include <linux/reiserfs_xattr.h>
  #include <asm/uaccess.h>
  
-#define XATTR_TRUSTED_PREFIX "trusted."
-
  static int
  trusted_get(struct inode *inode, const char *name, void *buffer, size_t size)
  {
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c

index 073f39364b1136dcd9cbb808f5820b3e5581ff58..1384efcb938e078d882587ee9e84e1de10090bb2 100644 (file)
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -10,8 +10,6 @@
  # include <linux/reiserfs_acl.h>
  #endif
  
-#define XATTR_USER_PREFIX "user."
-
  static int
  user_get(struct inode *inode, const char *name, void *buffer, size_t size)
  {
diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c

index 8182f0542a2176aadc221c40d65dfec7e4bb382e..8c177eb7e3447175158f77aecb28be5e678bd770 100644 (file)
--- a/fs/smbfs/cache.c
+++ b/fs/smbfs/cache.c
@@ -13,7 +13,6 @@
  #include <linux/errno.h>
  #include <linux/kernel.h>
  #include <linux/mm.h>
-#include <linux/dirent.h>
  #include <linux/smb_fs.h>
  #include <linux/pagemap.h>
  #include <linux/net.h>
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c

index d517a27b7f4b7baeefb6048aef46a20d03293b21..ee536e8a649a28710fe89417265fc5bce0a1119d 100644 (file)
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -16,7 +16,6 @@
  #include <linux/stat.h>
  #include <linux/fcntl.h>
  #include <linux/dcache.h>
-#include <linux/dirent.h>
  #include <linux/nls.h>
  #include <linux/smp_lock.h>
  #include <linux/net.h>
diff --git a/fs/ufs/super.c b/fs/ufs/super.c

index 506f724055c2a58209bced23ff7fa1ade560d7c0..227c9d700040fa75a364d6451864712b2699fa14 100644 (file)
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -76,6 +76,7 @@
  
  #include <linux/errno.h>
  #include <linux/fs.h>
+#include <linux/quotaops.h>
  #include <linux/slab.h>
  #include <linux/time.h>
  #include <linux/stat.h>
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c

index b546ba69be8224f79253e75770a93ff2fdb8a621..155c10b4adbd4a0d3e518b7558d07e4501bee11b 100644 (file)
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -621,7 +621,7 @@ shortname:
         memcpy(de->name, msdos_name, MSDOS_NAME);
         de->attr = is_dir ? ATTR_DIR : ATTR_ARCH;
         de->lcase = lcase;
-       fat_date_unix2dos(ts->tv_sec, &time, &date);
+       fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
         de->time = de->ctime = time;
         de->date = de->cdate = de->adate = date;
         de->ctime_cs = 0;
diff --git a/include/asm-alpha/kvm.h b/include/asm-alpha/kvm.h

deleted file mode 100644 (file)

index b9daec4..0000000
--- a/include/asm-alpha/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_ALPHA_H
-#define __LINUX_KVM_ALPHA_H
-
-/* alpha does not support KVM */
-
-#endif
diff --git a/include/asm-alpha/thread_info.h b/include/asm-alpha/thread_info.h

index fb3185196298326ee265f2ef87cb148795896a01..15fda434442428660e3d896661ec11c9f134e71e 100644 (file)
--- a/include/asm-alpha/thread_info.h
+++ b/include/asm-alpha/thread_info.h
@@ -50,10 +50,8 @@ register struct thread_info *__current_thread_info __asm__("$8");
  #define current_thread_info()  __current_thread_info
  
  /* Thread information allocation.  */
+#define THREAD_SIZE_ORDER 1
  #define THREAD_SIZE (2*PAGE_SIZE)
-#define alloc_thread_info(tsk) \
-  ((struct thread_info *) __get_free_pages(GFP_KERNEL,1))
-#define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
  
  #endif /* __ASSEMBLY__ */
  
diff --git a/include/asm-arm/kvm.h b/include/asm-arm/kvm.h

deleted file mode 100644 (file)

index cb3c08c..0000000
--- a/include/asm-arm/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_ARM_H
-#define __LINUX_KVM_ARM_H
-
-/* arm does not support KVM */
-
-#endif
diff --git a/include/asm-arm/ptrace.h b/include/asm-arm/ptrace.h

index 7aaa206cb54ef22e8a45d92822b31bc8bda18782..8382b7510f942de6b7d4db7a6e459e9576d64b36 100644 (file)
--- a/include/asm-arm/ptrace.h
+++ b/include/asm-arm/ptrace.h
@@ -139,8 +139,6 @@ static inline int valid_user_regs(struct pt_regs *regs)
         return 0;
  }
  
-#endif /* __KERNEL__ */
-
  #define pc_pointer(v) \
         ((v) & ~PCMASK)
  
@@ -153,10 +151,10 @@ extern unsigned long profile_pc(struct pt_regs *regs);
  #define profile_pc(regs) instruction_pointer(regs)
  #endif
  
-#ifdef __KERNEL__
  #define predicate(x)           ((x) & 0xf0000000)
  #define PREDICATE_ALWAYS       0xe0000000
-#endif
+
+#endif /* __KERNEL__ */
  
  #endif /* __ASSEMBLY__ */
  
diff --git a/include/asm-arm/thread_info.h b/include/asm-arm/thread_info.h

index f5a6647863114ccfe5fa8f88a8bab88ce29b48aa..d4be2d64616087a4ebf3bf2427d399314c6584b3 100644 (file)
--- a/include/asm-arm/thread_info.h
+++ b/include/asm-arm/thread_info.h
@@ -97,19 +97,6 @@ static inline struct thread_info *current_thread_info(void)
         return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
  }
  
-/* thread information allocation */
-#ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk) \
-       ((struct thread_info *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, \
-               THREAD_SIZE_ORDER))
-#else
-#define alloc_thread_info(tsk) \
-       ((struct thread_info *)__get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER))
-#endif
-
-#define free_thread_info(info) \
-       free_pages((unsigned long)info, THREAD_SIZE_ORDER);
-
  #define thread_saved_pc(tsk)   \
         ((unsigned long)(pc_pointer(task_thread_info(tsk)->cpu_context.pc)))
  #define thread_saved_fp(tsk)   \
diff --git a/include/asm-avr32/kvm.h b/include/asm-avr32/kvm.h

deleted file mode 100644 (file)

index 8c57770..0000000
--- a/include/asm-avr32/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_AVR32_H
-#define __LINUX_KVM_AVR32_H
-
-/* avr32 does not support KVM */
-
-#endif
diff --git a/include/asm-avr32/thread_info.h b/include/asm-avr32/thread_info.h

index df68631b7b27f98f63e579a3a20940a544f84f6a..294b25f9323dda6a4885de1a6b874b4a6a3bb825 100644 (file)
--- a/include/asm-avr32/thread_info.h
+++ b/include/asm-avr32/thread_info.h
@@ -61,10 +61,6 @@ static inline struct thread_info *current_thread_info(void)
         return (struct thread_info *)addr;
  }
  
-/* thread information allocation */
-#define alloc_thread_info(ti) \
-       ((struct thread_info *) __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER))
-#define free_thread_info(ti) free_pages((unsigned long)(ti), 1)
  #define get_thread_info(ti) get_task_struct((ti)->task)
  #define put_thread_info(ti) put_task_struct((ti)->task)
  
diff --git a/include/asm-blackfin/kvm.h b/include/asm-blackfin/kvm.h

deleted file mode 100644 (file)

index e3477d7..0000000
--- a/include/asm-blackfin/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_BLACKFIN_H
-#define __LINUX_KVM_BLACKFIN_H
-
-/* blackfin does not support KVM */
-
-#endif
diff --git a/include/asm-blackfin/ptrace.h b/include/asm-blackfin/ptrace.h

index b8346cd3a6f609018b7a0aae0db0f11fc0d33802..a45a80e54adcbe75a774df805d881c03a8da7015 100644 (file)
--- a/include/asm-blackfin/ptrace.h
+++ b/include/asm-blackfin/ptrace.h
@@ -83,14 +83,14 @@ struct pt_regs {
  #define PTRACE_GETREGS            12
  #define PTRACE_SETREGS            13   /* ptrace signal  */
  
-#ifdef CONFIG_BINFMT_ELF_FDPIC
  #define PTRACE_GETFDPIC           31
  #define PTRACE_GETFDPIC_EXEC      0
  #define PTRACE_GETFDPIC_INTERP    1
-#endif
  
  #define PS_S  (0x0002)
  
+#ifdef __KERNEL__
+
  /* user_mode returns true if only one bit is set in IPEND, other than the
     master interrupt enable.  */
  #define user_mode(regs) (!(((regs)->ipend & ~0x10) & (((regs)->ipend & ~0x10) - 1)))
@@ -98,6 +98,8 @@ struct pt_regs {
  #define profile_pc(regs) instruction_pointer(regs)
  extern void show_regs(struct pt_regs *);
  
+#endif  /*  __KERNEL__  */
+
  #endif                         /* __ASSEMBLY__ */
  
  /*
diff --git a/include/asm-blackfin/thread_info.h b/include/asm-blackfin/thread_info.h

index bc2fe5accf20a7f4c55f5b45c640e7e025a29ec1..642769329d12b12fde86d7fb99620a20a98f39ad 100644 (file)
--- a/include/asm-blackfin/thread_info.h
+++ b/include/asm-blackfin/thread_info.h
@@ -42,6 +42,7 @@
  /*
   * Size of kernel stack for each process. This must be a power of 2...
   */
+#define THREAD_SIZE_ORDER      1
  #define THREAD_SIZE            8192    /* 2 pages */
  
  #ifndef __ASSEMBLY__
@@ -94,10 +95,6 @@ static inline struct thread_info *current_thread_info(void)
         return (struct thread_info *)((long)ti & ~((long)THREAD_SIZE-1));
  }
  
-/* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) \
-                               __get_free_pages(GFP_KERNEL, 1))
-#define free_thread_info(ti)   free_pages((unsigned long) (ti), 1)
  #endif                         /* __ASSEMBLY__ */
  
  /*
diff --git a/include/asm-cris/arch-v10/Kbuild b/include/asm-cris/arch-v10/Kbuild

index 60e7e1b73cec6a0e05f33e3b422b4ea3e65f6263..7a192e1290b1a335dd52ff48c001dcc80b3ac63c 100644 (file)
--- a/include/asm-cris/arch-v10/Kbuild
+++ b/include/asm-cris/arch-v10/Kbuild
@@ -1,4 +1,3 @@
-header-y += ptrace.h
  header-y += user.h
  header-y += svinto.h
  header-y += sv_addr_ag.h
diff --git a/include/asm-cris/arch-v10/ptrace.h b/include/asm-cris/arch-v10/ptrace.h

index fb14c5ee37f94e342bfb865c35dac478ec024dec..2f464eab3a51cf25764c74939e3736babfe9f844 100644 (file)
--- a/include/asm-cris/arch-v10/ptrace.h
+++ b/include/asm-cris/arch-v10/ptrace.h
@@ -106,10 +106,14 @@ struct switch_stack {
         unsigned long return_ip; /* ip that _resume will return to */
  };
  
+#ifdef __KERNEL__
+
  /* bit 8 is user-mode flag */
  #define user_mode(regs) (((regs)->dccr & 0x100) != 0)
  #define instruction_pointer(regs) ((regs)->irp)
  #define profile_pc(regs) instruction_pointer(regs)
  extern void show_regs(struct pt_regs *);
  
+#endif  /*  __KERNEL__  */
+
  #endif
diff --git a/include/asm-cris/arch-v32/Kbuild b/include/asm-cris/arch-v32/Kbuild

index a0ec545e242ea27e77213c398ba00a3e13bf0e6c..35f2fc4f993e793b1fd29c56300614dd9a263960 100644 (file)
--- a/include/asm-cris/arch-v32/Kbuild
+++ b/include/asm-cris/arch-v32/Kbuild
@@ -1,3 +1,2 @@
-header-y += ptrace.h
  header-y += user.h
  header-y += cryptocop.h
diff --git a/include/asm-cris/arch-v32/ptrace.h b/include/asm-cris/arch-v32/ptrace.h

index 516cc7062d9488786f45d4b36327469d42d6170b..41f4e8662bc2555c6c18f66071a9902577ab1312 100644 (file)
--- a/include/asm-cris/arch-v32/ptrace.h
+++ b/include/asm-cris/arch-v32/ptrace.h
@@ -106,9 +106,13 @@ struct switch_stack {
         unsigned long return_ip; /* ip that _resume will return to */
  };
  
+#ifdef __KERNEL__
+
  #define user_mode(regs) (((regs)->ccs & (1 << (U_CCS_BITNR + CCS_SHIFT))) != 0)
  #define instruction_pointer(regs) ((regs)->erp)
  extern void show_regs(struct pt_regs *);
  #define profile_pc(regs) instruction_pointer(regs)
  
+#endif  /*  __KERNEL__  */
+
  #endif
diff --git a/include/asm-cris/kvm.h b/include/asm-cris/kvm.h

deleted file mode 100644 (file)

index c860f51..0000000
--- a/include/asm-cris/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_CRIS_H
-#define __LINUX_KVM_CRIS_H
-
-/* cris does not support KVM */
-
-#endif
diff --git a/include/asm-cris/ptrace.h b/include/asm-cris/ptrace.h

index 1ec69a7ea8365282b226ff9a91dbe36df4600bf8..d910925e3174e4bab4d8267b4dd6d0ac34890ffe 100644 (file)
--- a/include/asm-cris/ptrace.h
+++ b/include/asm-cris/ptrace.h
@@ -4,11 +4,13 @@
  #include <asm/arch/ptrace.h>
  
  #ifdef __KERNEL__
+
  /* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
  #define PTRACE_GETREGS            12
  #define PTRACE_SETREGS            13
-#endif
  
  #define profile_pc(regs) instruction_pointer(regs)
  
+#endif /* __KERNEL__ */
+
  #endif /* _CRIS_PTRACE_H */
diff --git a/include/asm-cris/thread_info.h b/include/asm-cris/thread_info.h

index 784668ab0fa29dfdb251302f623993c9ceb19d65..7efe1000f99d23e18b0825d7b8f4c7ebc52df53d 100644 (file)
--- a/include/asm-cris/thread_info.h
+++ b/include/asm-cris/thread_info.h
@@ -11,6 +11,8 @@
  
  #ifdef __KERNEL__
  
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
  #ifndef __ASSEMBLY__
  #include <asm/types.h>
  #include <asm/processor.h>
diff --git a/include/asm-frv/Kbuild b/include/asm-frv/Kbuild

index bc3f12c5b7e05688a609350735ab44744fa4b6f8..0f8956def738bfbfcb2ba77204a945bde9806979 100644 (file)
--- a/include/asm-frv/Kbuild
+++ b/include/asm-frv/Kbuild
@@ -3,4 +3,3 @@ include include/asm-generic/Kbuild.asm
  header-y += registers.h
  
  unifdef-y += termios.h
-unifdef-y += ptrace.h
diff --git a/include/asm-frv/kvm.h b/include/asm-frv/kvm.h

deleted file mode 100644 (file)

index 9c8a4f0..0000000
--- a/include/asm-frv/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_FRV_H
-#define __LINUX_KVM_FRV_H
-
-/* frv does not support KVM */
-
-#endif
diff --git a/include/asm-frv/thread_info.h b/include/asm-frv/thread_info.h

index 348b8f1df17eef8593183fe565925d4c1b15467f..b7ac6bf2844c395c3179d733102e13ca0e8001fa 100644 (file)
--- a/include/asm-frv/thread_info.h
+++ b/include/asm-frv/thread_info.h
@@ -82,6 +82,8 @@ register struct thread_info *__current_thread_info asm("gr15");
  
  #define current_thread_info() ({ __current_thread_info; })
  
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
  /* thread information allocation */
  #ifdef CONFIG_DEBUG_STACK_USAGE
  #define alloc_thread_info(tsk)                                 \
diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm

index 7cd25b8e7c9a4b891fcf00fd60b3985e5d48c59e..1170dc60e638b35a9ee42cf39ba84b3982178a21 100644 (file)
--- a/include/asm-generic/Kbuild.asm
+++ b/include/asm-generic/Kbuild.asm
@@ -1,4 +1,6 @@
+ifneq ($(wildcard $(srctree)/include/asm-$(SRCARCH)/kvm.h),)
  header-y  += kvm.h
+endif
  
  ifneq ($(wildcard $(srctree)/include/asm-$(SRCARCH)/a.out.h),)
  unifdef-y += a.out.h
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h

index 2632328d8646840aab41c8e401a1088e922754f2..a3f738cffdb61a0057ffb8a1cd9a012e30b72f24 100644 (file)
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -34,9 +34,14 @@ struct bug_entry {
  #ifndef __WARN
  #ifndef __ASSEMBLY__
  extern void warn_on_slowpath(const char *file, const int line);
+extern void warn_slowpath(const char *file, const int line,
+               const char *fmt, ...) __attribute__((format(printf, 3, 4)));
  #define WANT_WARN_ON_SLOWPATH
  #endif
  #define __WARN() warn_on_slowpath(__FILE__, __LINE__)
+#define __WARN_printf(arg...) warn_slowpath(__FILE__, __LINE__, arg)
+#else
+#define __WARN_printf(arg...) __WARN()
  #endif
  
  #ifndef WARN_ON
@@ -48,6 +53,15 @@ extern void warn_on_slowpath(const char *file, const int line);
  })
  #endif
  
+#ifndef WARN
+#define WARN(condition, format...) ({                                          \
+       int __ret_warn_on = !!(condition);                              \
+       if (unlikely(__ret_warn_on))                                    \
+               __WARN_printf(format);                                  \
+       unlikely(__ret_warn_on);                                        \
+})
+#endif
+
  #else /* !CONFIG_BUG */
  #ifndef HAVE_ARCH_BUG
  #define BUG()
@@ -63,6 +77,14 @@ extern void warn_on_slowpath(const char *file, const int line);
         unlikely(__ret_warn_on);                                        \
  })
  #endif
+
+#ifndef WARN
+#define WARN(condition, format...) ({                                  \
+       int __ret_warn_on = !!(condition);                              \
+       unlikely(__ret_warn_on);                                        \
+})
+#endif
+
  #endif
  
  #define WARN_ON_ONCE(condition)        ({                              \
@@ -75,6 +97,9 @@ extern void warn_on_slowpath(const char *file, const int line);
         unlikely(__ret_warn_once);                              \
  })
  
+#define WARN_ON_RATELIMIT(condition, state)                    \
+               WARN_ON((condition) && __ratelimit(state))
+
  #ifdef CONFIG_SMP
  # define WARN_ON_SMP(x)                        WARN_ON(x)
  #else
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h

index 6be061d09da9d06fc2dabdb520148c028194621b..a3034d20ebd5ef2f7d264b62311afa06a7ac7327 100644 (file)
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -3,7 +3,7 @@
  
  #include <linux/types.h>
  
-#ifdef CONFIG_HAVE_GPIO_LIB
+#ifdef CONFIG_GPIOLIB
  
  #include <linux/compiler.h>
  
@@ -32,6 +32,8 @@ struct module;
  /**
   * struct gpio_chip - abstract a GPIO controller
   * @label: for diagnostics
+ * @dev: optional device providing the GPIOs
+ * @owner: helps prevent removal of modules exporting active GPIOs
   * @direction_input: configures signal "offset" as input, or returns error
   * @get: returns value for signal "offset"; for output signals this
   *     returns either the value actually sensed, or zero
@@ -59,6 +61,7 @@ struct module;
   */
  struct gpio_chip {
         char                    *label;
+       struct device           *dev;
         struct module           *owner;
  
         int                     (*direction_input)(struct gpio_chip *chip,
@@ -74,6 +77,7 @@ struct gpio_chip {
         int                     base;
         u16                     ngpio;
         unsigned                can_sleep:1;
+       unsigned                exported:1;
  };
  
  extern const char *gpiochip_is_requested(struct gpio_chip *chip,
@@ -108,7 +112,18 @@ extern void __gpio_set_value(unsigned gpio, int value);
  extern int __gpio_cansleep(unsigned gpio);
  
  
-#else
+#ifdef CONFIG_GPIO_SYSFS
+
+/*
+ * A sysfs interface can be exported by individual drivers if they want,
+ * but more typically is configured entirely from userspace.
+ */
+extern int gpio_export(unsigned gpio, bool direction_may_change);
+extern void gpio_unexport(unsigned gpio);
+
+#endif /* CONFIG_GPIO_SYSFS */
+
+#else  /* !CONFIG_HAVE_GPIO_LIB */
  
  static inline int gpio_is_valid(int number)
  {
@@ -137,6 +152,20 @@ static inline void gpio_set_value_cansleep(unsigned gpio, int value)
         gpio_set_value(gpio, value);
  }
  
-#endif
+#endif /* !CONFIG_HAVE_GPIO_LIB */
+
+#ifndef CONFIG_GPIO_SYSFS
+
+/* sysfs support is only available with gpiolib, where it's optional */
+
+static inline int gpio_export(unsigned gpio, bool direction_may_change)
+{
+       return -ENOSYS;
+}
+
+static inline void gpio_unexport(unsigned gpio)
+{
+}
+#endif /* CONFIG_GPIO_SYSFS */
  
  #endif /* _ASM_GENERIC_GPIO_H */
diff --git a/include/asm-generic/int-ll64.h b/include/asm-generic/int-ll64.h

index 260948905e4ea5d35f781ffcd08dff5428e6b58d..f9bc9ac29b36edafca055190ca4df361f9152f52 100644 (file)
--- a/include/asm-generic/int-ll64.h
+++ b/include/asm-generic/int-ll64.h
@@ -26,7 +26,7 @@ typedef unsigned int __u32;
  #ifdef __GNUC__
  __extension__ typedef __signed__ long long __s64;
  __extension__ typedef unsigned long long __u64;
-#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#else
  typedef __signed__ long long __s64;
  typedef unsigned long long __u64;
  #endif
diff --git a/include/asm-h8300/elf.h b/include/asm-h8300/elf.h

index 26bfc7e641daf339b933c269526b98e984734120..a8b57d1f41286261434957bc408057b487628408 100644 (file)
--- a/include/asm-h8300/elf.h
+++ b/include/asm-h8300/elf.h
@@ -26,10 +26,10 @@ typedef unsigned long elf_fpregset_t;
  #define ELF_DATA       ELFDATA2MSB
  #define ELF_ARCH       EM_H8_300
  #if defined(__H8300H__)
-#define ELF_FLAGS       0x810000
+#define ELF_CORE_EFLAGS 0x810000
  #endif
  #if defined(__H8300S__)
-#define ELF_FLAGS       0x820000
+#define ELF_CORE_EFLAGS 0x820000
  #endif
  
  #define ELF_PLAT_INIT(_r)      _r->er1 = 0
diff --git a/include/asm-h8300/kvm.h b/include/asm-h8300/kvm.h

deleted file mode 100644 (file)

index bdbed7b..0000000
--- a/include/asm-h8300/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_H8300_H
-#define __LINUX_KVM_H8300_H
-
-/* h8300 does not support KVM */
-
-#endif
diff --git a/include/asm-h8300/thread_info.h b/include/asm-h8300/thread_info.h

index 27bb95e2944c013d8024b58893d17f3585a39c53..aafd4d322ec3e2e8c6767aac9ea125587ff4b322 100644 (file)
--- a/include/asm-h8300/thread_info.h
+++ b/include/asm-h8300/thread_info.h
@@ -49,6 +49,7 @@ struct thread_info {
  /*
   * Size of kernel stack for each process. This must be a power of 2...
   */
+#define THREAD_SIZE_ORDER      1
  #define THREAD_SIZE            8192    /* 2 pages */
  
  
@@ -65,10 +66,6 @@ static inline struct thread_info *current_thread_info(void)
         return ti;
  }
  
-/* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) \
-                               __get_free_pages(GFP_KERNEL, 1))
-#define free_thread_info(ti)   free_pages((unsigned long) (ti), 1)
  #endif /* __ASSEMBLY__ */
  
  /*
diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h

index 2422ac61658a6bb16f465c26acf8c58df1735c6e..7c60fcdd2efdb6f6f79615b051f18c755c4ff69e 100644 (file)
--- a/include/asm-ia64/thread_info.h
+++ b/include/asm-ia64/thread_info.h
@@ -54,6 +54,8 @@ struct thread_info {
         },                                      \
  }
  
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
  #ifndef ASM_OFFSETS_C
  /* how to get the thread information struct from C */
  #define current_thread_info()  ((struct thread_info *) ((char *) current + IA64_TASK_SIZE))
diff --git a/include/asm-m32r/kvm.h b/include/asm-m32r/kvm.h

deleted file mode 100644 (file)

index 99a4051..0000000
--- a/include/asm-m32r/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_M32R_H
-#define __LINUX_KVM_M32R_H
-
-/* m32r does not support KVM */
-
-#endif
diff --git a/include/asm-m32r/thread_info.h b/include/asm-m32r/thread_info.h

index 1effcd0f5e631502eab4eb005e4393a146b14858..8589d462df27e366863974619b5cf293e0b9213c 100644 (file)
--- a/include/asm-m32r/thread_info.h
+++ b/include/asm-m32r/thread_info.h
@@ -94,6 +94,8 @@ static inline struct thread_info *current_thread_info(void)
         return ti;
  }
  
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
  /* thread information allocation */
  #ifdef CONFIG_DEBUG_STACK_USAGE
  #define alloc_thread_info(tsk)                                 \
diff --git a/include/asm-m68k/kvm.h b/include/asm-m68k/kvm.h

deleted file mode 100644 (file)

index 7ed27fc..0000000
--- a/include/asm-m68k/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_M68K_H
-#define __LINUX_KVM_M68K_H
-
-/* m68k does not support KVM */
-
-#endif
diff --git a/include/asm-m68k/thread_info.h b/include/asm-m68k/thread_info.h

index d635a375248896149ba6a14fcfc7392ca62873f0..abc002798a2b07a9fcb502ab0f93c1c7f502ef50 100644 (file)
--- a/include/asm-m68k/thread_info.h
+++ b/include/asm-m68k/thread_info.h
@@ -25,13 +25,7 @@ struct thread_info {
  }
  
  /* THREAD_SIZE should be 8k, so handle differently for 4k and 8k machines */
-#if PAGE_SHIFT == 13 /* 8k machines */
-#define alloc_thread_info(tsk)   ((struct thread_info *)__get_free_pages(GFP_KERNEL,0))
-#define free_thread_info(ti)  free_pages((unsigned long)(ti),0)
-#else /* otherwise assume 4k pages */
-#define alloc_thread_info(tsk)   ((struct thread_info *)__get_free_pages(GFP_KERNEL,1))
-#define free_thread_info(ti)  free_pages((unsigned long)(ti),1)
-#endif /* PAGE_SHIFT == 13 */
+#define THREAD_SIZE_ORDER (13 - PAGE_SHIFT)
  
  #define init_thread_info       (init_task.thread.info)
  #define init_stack             (init_thread_union.stack)
diff --git a/include/asm-m68knommu/kvm.h b/include/asm-m68knommu/kvm.h

deleted file mode 100644 (file)

index b49d425..0000000
--- a/include/asm-m68knommu/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_M68KNOMMU_H
-#define __LINUX_KVM_M68KNOMMU_H
-
-/* m68knommu does not support KVM */
-
-#endif
diff --git a/include/asm-m68knommu/ptrace.h b/include/asm-m68knommu/ptrace.h

index 47258e86e8c4a69e996504d52e34abfcea3524a4..8c9194b98548ffee6b4201b47050cc1ae7e0909b 100644 (file)
--- a/include/asm-m68knommu/ptrace.h
+++ b/include/asm-m68knommu/ptrace.h
@@ -68,10 +68,8 @@ struct switch_stack {
  /* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
  #define PTRACE_GETREGS            12
  #define PTRACE_SETREGS            13
-#ifdef CONFIG_FPU
  #define PTRACE_GETFPREGS          14
  #define PTRACE_SETFPREGS          15
-#endif
  
  #ifdef __KERNEL__
  
diff --git a/include/asm-m68knommu/thread_info.h b/include/asm-m68knommu/thread_info.h

index 95996d978bedcf0d9adc3dd27e1e99f121b79881..0c9bc095f3f0bc3cae753f788f72786a67b96512 100644 (file)
--- a/include/asm-m68knommu/thread_info.h
+++ b/include/asm-m68knommu/thread_info.h
@@ -71,10 +71,6 @@ static inline struct thread_info *current_thread_info(void)
         return ti;
  }
  
-/* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) \
-                               __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER))
-#define free_thread_info(ti)   free_pages((unsigned long) (ti), THREAD_SIZE_ORDER)
  #endif /* __ASSEMBLY__ */
  
  #define        PREEMPT_ACTIVE  0x4000000
diff --git a/include/asm-mips/kvm.h b/include/asm-mips/kvm.h

deleted file mode 100644 (file)

index 093a5b7..0000000
--- a/include/asm-mips/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_MIPS_H
-#define __LINUX_KVM_MIPS_H
-
-/* mips does not support KVM */
-
-#endif
diff --git a/include/asm-mips/mach-generic/gpio.h b/include/asm-mips/mach-generic/gpio.h

index e6b376bd9d06c1c296395f30db8bb397af5f195a..b4e70208da643bdc2e1e8b2ba8ad2c260064160d 100644 (file)
--- a/include/asm-mips/mach-generic/gpio.h
+++ b/include/asm-mips/mach-generic/gpio.h
@@ -1,7 +1,7 @@
  #ifndef __ASM_MACH_GENERIC_GPIO_H
  #define __ASM_MACH_GENERIC_GPIO_H
  
-#ifdef CONFIG_HAVE_GPIO_LIB
+#ifdef CONFIG_GPIOLIB
  #define gpio_get_value __gpio_get_value
  #define gpio_set_value __gpio_set_value
  #define gpio_cansleep  __gpio_cansleep
diff --git a/include/asm-mips/thread_info.h b/include/asm-mips/thread_info.h

index b2772df1a1bd6270a84da3e9d2fcfc5d0ae06ab8..bb3060699df29f437facbb5631fd44985ca9e69e 100644 (file)
--- a/include/asm-mips/thread_info.h
+++ b/include/asm-mips/thread_info.h
@@ -82,6 +82,8 @@ register struct thread_info *__current_thread_info __asm__("$28");
  #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
  #define THREAD_MASK (THREAD_SIZE - 1UL)
  
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
  #ifdef CONFIG_DEBUG_STACK_USAGE
  #define alloc_thread_info(tsk)                                 \
  ({                                                             \
diff --git a/include/asm-mn10300/kvm.h b/include/asm-mn10300/kvm.h

deleted file mode 100644 (file)

index f6b609f..0000000
--- a/include/asm-mn10300/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_MN10300_H
-#define __LINUX_KVM_MN10300_H
-
-/* mn10300 does not support KVM */
-
-#endif
diff --git a/include/asm-mn10300/ptrace.h b/include/asm-mn10300/ptrace.h

index b3684689fcceedc7be4f13f7e020559b07bd4cbe..7b06cc623d8b074a3a23511673e2c170bb033840 100644 (file)
--- a/include/asm-mn10300/ptrace.h
+++ b/include/asm-mn10300/ptrace.h
@@ -88,12 +88,16 @@ extern struct pt_regs *__frame; /* current frame pointer */
  /* options set using PTRACE_SETOPTIONS */
  #define PTRACE_O_TRACESYSGOOD     0x00000001
  
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#if defined(__KERNEL__)
+
+#if !defined(__ASSEMBLY__)
  #define user_mode(regs)                        (((regs)->epsw & EPSW_nSL) == EPSW_nSL)
  #define instruction_pointer(regs)      ((regs)->pc)
  extern void show_regs(struct pt_regs *);
-#endif
+#endif  /*  !__ASSEMBLY  */
  
  #define profile_pc(regs) ((regs)->pc)
  
+#endif  /*  __KERNEL__  */
+
  #endif /* _ASM_PTRACE_H */
diff --git a/include/asm-mn10300/thread_info.h b/include/asm-mn10300/thread_info.h

index e397e719278544b530dd72a3740e8ce06e65da85..78a3881f3c1250f39eea83dc80a320c826ed650d 100644 (file)
--- a/include/asm-mn10300/thread_info.h
+++ b/include/asm-mn10300/thread_info.h
@@ -112,6 +112,8 @@ static inline unsigned long current_stack_pointer(void)
         return sp;
  }
  
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
  /* thread information allocation */
  #ifdef CONFIG_DEBUG_STACK_USAGE
  #define alloc_thread_info(tsk) kzalloc(THREAD_SIZE, GFP_KERNEL)
diff --git a/include/asm-parisc/kvm.h b/include/asm-parisc/kvm.h

deleted file mode 100644 (file)

index 00cc458..0000000
--- a/include/asm-parisc/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_PARISC_H
-#define __LINUX_KVM_PARISC_H
-
-/* parisc does not support KVM */
-
-#endif
diff --git a/include/asm-parisc/ptrace.h b/include/asm-parisc/ptrace.h

index 93f990e418f1cd989e001bdeeec8b9ae60afe9a3..3e94c5d85ff5715a3d95af8e6444b27ae081a1fd 100644 (file)
--- a/include/asm-parisc/ptrace.h
+++ b/include/asm-parisc/ptrace.h
@@ -33,7 +33,6 @@ struct pt_regs {
         unsigned long ipsw;     /* CR22 */
  };
  
-#define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS))
  /*
   * The numbers chosen here are somewhat arbitrary but absolutely MUST
   * not overlap with any of the number assigned in <linux/ptrace.h>.
@@ -43,8 +42,11 @@ struct pt_regs {
   * since we have taken branch traps too)
   */
  #define PTRACE_SINGLEBLOCK     12      /* resume execution until next branch */
+
  #ifdef __KERNEL__
  
+#define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS))
+
  /* XXX should we use iaoq[1] or iaoq[0] ? */
  #define user_mode(regs)                        (((regs)->iaoq[0] & 3) ? 1 : 0)
  #define user_space(regs)               (((regs)->iasq[1] != 0) ? 1 : 0)
diff --git a/include/asm-parisc/thread_info.h b/include/asm-parisc/thread_info.h

index 2d9c7500867b599ade220a0a086d8d055057c675..9f812741c3553d41a45e8dada845c7bb57fef86a 100644 (file)
--- a/include/asm-parisc/thread_info.h
+++ b/include/asm-parisc/thread_info.h
@@ -34,15 +34,11 @@ struct thread_info {
  
  /* thread information allocation */
  
-#define THREAD_ORDER            2
+#define THREAD_SIZE_ORDER            2
  /* Be sure to hunt all references to this down when you change the size of
   * the kernel stack */
-#define THREAD_SIZE             (PAGE_SIZE << THREAD_ORDER)
-#define THREAD_SHIFT            (PAGE_SHIFT + THREAD_ORDER)
-
-#define alloc_thread_info(tsk) ((struct thread_info *) \
-                       __get_free_pages(GFP_KERNEL, THREAD_ORDER))
-#define free_thread_info(ti)    free_pages((unsigned long) (ti), THREAD_ORDER)
+#define THREAD_SIZE             (PAGE_SIZE << THREAD_SIZE_ORDER)
+#define THREAD_SHIFT            (PAGE_SHIFT + THREAD_SIZE_ORDER)
  
  /* how to get the thread information struct from C */
  #define current_thread_info()  ((struct thread_info *)mfctl(30))
diff --git a/include/asm-powerpc/Kbuild b/include/asm-powerpc/Kbuild

index 04ce8f8a2ee7ef1319ed00a8bc5e51de94bbde38..5ab7d7fe198ce091708e9ba25066c3788e859ebb 100644 (file)
--- a/include/asm-powerpc/Kbuild
+++ b/include/asm-powerpc/Kbuild
@@ -29,7 +29,6 @@ unifdef-y += elf.h
  unifdef-y += nvram.h
  unifdef-y += param.h
  unifdef-y += posix_types.h
-unifdef-y += ptrace.h
  unifdef-y += seccomp.h
  unifdef-y += signal.h
  unifdef-y += spu_info.h
diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h

index 2a3e9075a5a0732768ebf71b3bb5abcf600cc588..ef8a248dfd558697a374868bf35f4b8e8d5950d1 100644 (file)
--- a/include/asm-powerpc/cputable.h
+++ b/include/asm-powerpc/cputable.h
@@ -127,6 +127,8 @@ extern struct cpu_spec *identify_cpu(unsigned long offset, unsigned int pvr);
  extern void do_feature_fixups(unsigned long value, void *fixup_start,
                               void *fixup_end);
  
+extern const char *powerpc_base_platform;
+
  #endif /* __ASSEMBLY__ */
  
  /* CPU kernel features */
diff --git a/include/asm-powerpc/elf.h b/include/asm-powerpc/elf.h

index 89664675b469d8af2212b826433c4a6861f3b4e5..80d1f399ee513a944aae0b7366a55d588393fac9 100644 (file)
--- a/include/asm-powerpc/elf.h
+++ b/include/asm-powerpc/elf.h
@@ -217,6 +217,14 @@ typedef elf_vrregset_t elf_fpxregset_t;
  
  #define ELF_PLATFORM   (cur_cpu_spec->platform)
  
+/* While ELF_PLATFORM indicates the ISA supported by the platform, it
+ * may not accurately reflect the underlying behavior of the hardware
+ * (as in the case of running in Power5+ compatibility mode on a
+ * Power6 machine).  ELF_BASE_PLATFORM allows ld.so to load libraries
+ * that are tuned for the real hardware.
+ */
+#define ELF_BASE_PLATFORM (powerpc_base_platform)
+
  #ifdef __powerpc64__
  # define ELF_PLAT_INIT(_r, load_addr)  do {    \
         _r->gpr[2] = load_addr;                 \
diff --git a/include/asm-powerpc/firmware.h b/include/asm-powerpc/firmware.h

index ef328995ba9dd9cd3537aeabbcb8be05f5f1e13c..3a179827528d1b8ad53f998fec23d4fd788c77f1 100644 (file)
--- a/include/asm-powerpc/firmware.h
+++ b/include/asm-powerpc/firmware.h
@@ -46,6 +46,7 @@
  #define FW_FEATURE_PS3_LV1     ASM_CONST(0x0000000000800000)
  #define FW_FEATURE_BEAT                ASM_CONST(0x0000000001000000)
  #define FW_FEATURE_BULK_REMOVE ASM_CONST(0x0000000002000000)
+#define FW_FEATURE_CMO         ASM_CONST(0x0000000004000000)
  
  #ifndef __ASSEMBLY__
  
@@ -58,7 +59,7 @@ enum {
                 FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ |
                 FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN |
                 FW_FEATURE_BULK | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE |
-               FW_FEATURE_SPLPAR | FW_FEATURE_LPAR,
+               FW_FEATURE_SPLPAR | FW_FEATURE_LPAR | FW_FEATURE_CMO,
         FW_FEATURE_PSERIES_ALWAYS = 0,
         FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES | FW_FEATURE_LPAR,
         FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES | FW_FEATURE_LPAR,
diff --git a/include/asm-powerpc/gpio.h b/include/asm-powerpc/gpio.h

index 77ad3a890f30589aae9cfacc2ca1abd1c5a2f6ef..ea04632399d8d25f8cf5e4f4720c28bd7dbbaef9 100644 (file)
--- a/include/asm-powerpc/gpio.h
+++ b/include/asm-powerpc/gpio.h
@@ -17,7 +17,7 @@
  #include <linux/errno.h>
  #include <asm-generic/gpio.h>
  
-#ifdef CONFIG_HAVE_GPIO_LIB
+#ifdef CONFIG_GPIOLIB
  
  /*
   * We don't (yet) implement inlined/rapid versions for on-chip gpios.
@@ -51,6 +51,6 @@ static inline int irq_to_gpio(unsigned int irq)
         return -EINVAL;
  }
  
-#endif /* CONFIG_HAVE_GPIO_LIB */
+#endif /* CONFIG_GPIOLIB */
  
  #endif /* __ASM_POWERPC_GPIO_H */
diff --git a/include/asm-powerpc/hvcall.h b/include/asm-powerpc/hvcall.h

index bf6cd7cb996cf9f911d52bfaa20ff150a011684c..fbe2932fa9e9660d0a896dea589d4ac1ad6e17c3 100644 (file)
--- a/include/asm-powerpc/hvcall.h
+++ b/include/asm-powerpc/hvcall.h
@@ -92,6 +92,11 @@
  #define H_EXACT                        (1UL<<(63-24))  /* Use exact PTE or return H_PTEG_FULL */
  #define H_R_XLATE              (1UL<<(63-25))  /* include a valid logical page num in the pte if the valid bit is set */
  #define H_READ_4               (1UL<<(63-26))  /* Return 4 PTEs */
+#define H_PAGE_STATE_CHANGE    (1UL<<(63-28))
+#define H_PAGE_UNUSED          ((1UL<<(63-29)) | (1UL<<(63-30)))
+#define H_PAGE_SET_UNUSED      (H_PAGE_STATE_CHANGE | H_PAGE_UNUSED)
+#define H_PAGE_SET_LOANED      (H_PAGE_SET_UNUSED | (1UL<<(63-31)))
+#define H_PAGE_SET_ACTIVE      H_PAGE_STATE_CHANGE
  #define H_AVPN                 (1UL<<(63-32))  /* An avpn is provided as a sanity test */
  #define H_ANDCOND              (1UL<<(63-33))
  #define H_ICACHE_INVALIDATE    (1UL<<(63-40))  /* icbi, etc.  (ignored for IO pages) */
@@ -210,7 +215,9 @@
  #define H_JOIN                 0x298
  #define H_VASI_STATE            0x2A4
  #define H_ENABLE_CRQ           0x2B0
-#define MAX_HCALL_OPCODE       H_ENABLE_CRQ
+#define H_SET_MPP              0x2D0
+#define H_GET_MPP              0x2D4
+#define MAX_HCALL_OPCODE       H_GET_MPP
  
  #ifndef __ASSEMBLY__
  
@@ -270,6 +277,20 @@ struct hcall_stats {
  };
  #define HCALL_STAT_ARRAY_SIZE  ((MAX_HCALL_OPCODE >> 2) + 1)
  
+struct hvcall_mpp_data {
+       unsigned long entitled_mem;
+       unsigned long mapped_mem;
+       unsigned short group_num;
+       unsigned short pool_num;
+       unsigned char mem_weight;
+       unsigned char unallocated_mem_weight;
+       unsigned long unallocated_entitlement;  /* value in bytes */
+       unsigned long pool_size;
+       signed long loan_request;
+       unsigned long backing_mem;
+};
+
+int h_get_mpp(struct hvcall_mpp_data *);
  #endif /* __ASSEMBLY__ */
  #endif /* __KERNEL__ */
  #endif /* _ASM_POWERPC_HVCALL_H */
diff --git a/include/asm-powerpc/lppaca.h b/include/asm-powerpc/lppaca.h

index 567ed92cd91ff6f1cf0b7193b990de75ec90ede1..2fe268b10333433ad6b2e40bcf2d90c392c45798 100644 (file)
--- a/include/asm-powerpc/lppaca.h
+++ b/include/asm-powerpc/lppaca.h
@@ -125,7 +125,10 @@ struct lppaca {
         // NOTE: This value will ALWAYS be zero for dedicated processors and
         // will NEVER be zero for shared processors (ie, initialized to a 1).
         volatile u32 yield_count;       // PLIC increments each dispatchx00-x03
-       u8      reserved6[124];         // Reserved                     x04-x7F
+       u32 reserved6;
+       volatile u64 cmo_faults;        // CMO page fault count         x08-x0F
+       volatile u64 cmo_fault_time;    // CMO page fault time          x10-x17
+       u8      reserved7[104];         // Reserved                     x18-x7F
  
  //=============================================================================
  // CACHE_LINE_4-5 0x0180 - 0x027F Contains PMC interrupt data
diff --git a/include/asm-powerpc/machdep.h b/include/asm-powerpc/machdep.h

index 1233d735fd289a5831e7d6a542a0912b9cffcf32..893aafd87fde2b20ce7eda9dee61f30b79c9ec91 100644 (file)
--- a/include/asm-powerpc/machdep.h
+++ b/include/asm-powerpc/machdep.h
@@ -76,7 +76,7 @@ struct machdep_calls {
          * destroyed as well */
         void            (*hpte_clear_all)(void);
  
-       void            (*tce_build)(struct iommu_table * tbl,
+       int             (*tce_build)(struct iommu_table *tbl,
                                      long index,
                                      long npages,
                                      unsigned long uaddr,
diff --git a/include/asm-powerpc/mpc52xx_psc.h b/include/asm-powerpc/mpc52xx_psc.h

index 710c5d36efaa08496f1a473633acfdf132b3eba1..8917ed63056593377498abaf721e3f33ecaa3b1e 100644 (file)
--- a/include/asm-powerpc/mpc52xx_psc.h
+++ b/include/asm-powerpc/mpc52xx_psc.h
@@ -60,10 +60,12 @@
  #define MPC52xx_PSC_RXTX_FIFO_ALARM    0x0002
  #define MPC52xx_PSC_RXTX_FIFO_EMPTY    0x0001
  
-/* PSC interrupt mask bits */
+/* PSC interrupt status/mask bits */
  #define MPC52xx_PSC_IMR_TXRDY          0x0100
  #define MPC52xx_PSC_IMR_RXRDY          0x0200
  #define MPC52xx_PSC_IMR_DB             0x0400
+#define MPC52xx_PSC_IMR_TXEMP          0x0800
+#define MPC52xx_PSC_IMR_ORERR          0x1000
  #define MPC52xx_PSC_IMR_IPC            0x8000
  
  /* PSC input port change bit */
@@ -92,6 +94,34 @@
  
  #define MPC52xx_PSC_RFNUM_MASK 0x01ff
  
+#define MPC52xx_PSC_SICR_DTS1                  (1 << 29)
+#define MPC52xx_PSC_SICR_SHDR                  (1 << 28)
+#define MPC52xx_PSC_SICR_SIM_MASK              (0xf << 24)
+#define MPC52xx_PSC_SICR_SIM_UART              (0x0 << 24)
+#define MPC52xx_PSC_SICR_SIM_UART_DCD          (0x8 << 24)
+#define MPC52xx_PSC_SICR_SIM_CODEC_8           (0x1 << 24)
+#define MPC52xx_PSC_SICR_SIM_CODEC_16          (0x2 << 24)
+#define MPC52xx_PSC_SICR_SIM_AC97              (0x3 << 24)
+#define MPC52xx_PSC_SICR_SIM_SIR               (0x8 << 24)
+#define MPC52xx_PSC_SICR_SIM_SIR_DCD           (0xc << 24)
+#define MPC52xx_PSC_SICR_SIM_MIR               (0x5 << 24)
+#define MPC52xx_PSC_SICR_SIM_FIR               (0x6 << 24)
+#define MPC52xx_PSC_SICR_SIM_CODEC_24          (0x7 << 24)
+#define MPC52xx_PSC_SICR_SIM_CODEC_32          (0xf << 24)
+#define MPC52xx_PSC_SICR_GENCLK                        (1 << 23)
+#define MPC52xx_PSC_SICR_I2S                   (1 << 22)
+#define MPC52xx_PSC_SICR_CLKPOL                        (1 << 21)
+#define MPC52xx_PSC_SICR_SYNCPOL               (1 << 20)
+#define MPC52xx_PSC_SICR_CELLSLAVE             (1 << 19)
+#define MPC52xx_PSC_SICR_CELL2XCLK             (1 << 18)
+#define MPC52xx_PSC_SICR_ESAI                  (1 << 17)
+#define MPC52xx_PSC_SICR_ENAC97                        (1 << 16)
+#define MPC52xx_PSC_SICR_SPI                   (1 << 15)
+#define MPC52xx_PSC_SICR_MSTR                  (1 << 14)
+#define MPC52xx_PSC_SICR_CPOL                  (1 << 13)
+#define MPC52xx_PSC_SICR_CPHA                  (1 << 12)
+#define MPC52xx_PSC_SICR_USEEOF                        (1 << 11)
+#define MPC52xx_PSC_SICR_DISABLEEOF            (1 << 10)
  
  /* Structure of the hardware registers */
  struct mpc52xx_psc {
@@ -132,8 +162,12 @@ struct mpc52xx_psc {
         u8              reserved5[3];
         u8              ctlr;           /* PSC + 0x1c */
         u8              reserved6[3];
-       u16             ccr;            /* PSC + 0x20 */
-       u8              reserved7[14];
+       /* BitClkDiv field of CCR is byte swapped in
+        * the hardware for mpc5200/b compatibility */
+       u32             ccr;            /* PSC + 0x20 */
+       u32             ac97_slots;     /* PSC + 0x24 */
+       u32             ac97_cmd;       /* PSC + 0x28 */
+       u32             ac97_data;      /* PSC + 0x2c */
         u8              ivr;            /* PSC + 0x30 */
         u8              reserved8[3];
         u8              ip;             /* PSC + 0x34 */
diff --git a/include/asm-powerpc/pgtable.h b/include/asm-powerpc/pgtable.h

index d18ffe7bc7c4a498a038f0a96f64c81227ac067b..dbb8ca172e445461a8c61bcad3a994bf62276f96 100644 (file)
--- a/include/asm-powerpc/pgtable.h
+++ b/include/asm-powerpc/pgtable.h
@@ -38,6 +38,19 @@ extern void paging_init(void);
                 remap_pfn_range(vma, vaddr, pfn, size, prot)
  
  #include <asm-generic/pgtable.h>
+
+
+/*
+ * This gets called at the end of handling a page fault, when
+ * the kernel has put a new PTE into the page table for the process.
+ * We use it to ensure coherency between the i-cache and d-cache
+ * for the page which has just been mapped in.
+ * On machines which use an MMU hash table, we use this to put a
+ * corresponding HPTE into the hash table ahead of time, instead of
+ * waiting for the inevitable extra hash-table miss exception.
+ */
+extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
+
  #endif /* __ASSEMBLY__ */
  
  #endif /* __KERNEL__ */
diff --git a/include/asm-powerpc/syscalls.h b/include/asm-powerpc/syscalls.h

index 2b8a458f990a8b73e72ac2da42322812f9094bba..eb8eb400c664c3dc6d38fb14637752bb5454e0da 100644 (file)
--- a/include/asm-powerpc/syscalls.h
+++ b/include/asm-powerpc/syscalls.h
@@ -31,6 +31,7 @@ asmlinkage int sys_vfork(unsigned long p1, unsigned long p2,
                 unsigned long p3, unsigned long p4, unsigned long p5,
                 unsigned long p6, struct pt_regs *regs);
  asmlinkage long sys_pipe(int __user *fildes);
+asmlinkage long sys_pipe2(int __user *fildes, int flags);
  asmlinkage long sys_rt_sigaction(int sig,
                 const struct sigaction __user *act,
                 struct sigaction __user *oact, size_t sigsetsize);
diff --git a/include/asm-powerpc/systbl.h b/include/asm-powerpc/systbl.h

index ae7085c65692136cf7affb8cfdc25dcec8151431..e084272ed1c26f4d5dc985c254f9550093041875 100644 (file)
--- a/include/asm-powerpc/systbl.h
+++ b/include/asm-powerpc/systbl.h
@@ -316,3 +316,9 @@ COMPAT_SYS(fallocate)
  SYSCALL(subpage_prot)
  COMPAT_SYS_SPU(timerfd_settime)
  COMPAT_SYS_SPU(timerfd_gettime)
+COMPAT_SYS_SPU(signalfd4)
+SYSCALL_SPU(eventfd2)
+SYSCALL_SPU(epoll_create1)
+SYSCALL_SPU(dup3)
+SYSCALL_SPU(pipe2)
+SYSCALL(inotify_init1)
diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h

index e6e25e2364ebf87d6012a977d3e5b0d66382820b..d6648c1433221972a6730392236dc8c3c63fe614 100644 (file)
--- a/include/asm-powerpc/system.h
+++ b/include/asm-powerpc/system.h
@@ -110,6 +110,8 @@ static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
  #endif
  
  extern int set_dabr(unsigned long dabr);
+extern void do_dabr(struct pt_regs *regs, unsigned long address,
+                   unsigned long error_code);
  extern void print_backtrace(unsigned long *);
  extern void show_regs(struct pt_regs * regs);
  extern void flush_instruction_cache(void);
diff --git a/include/asm-powerpc/thread_info.h b/include/asm-powerpc/thread_info.h

index b705c2a7651a929a42780163d6bd303a412f998b..a9db562df69a3590d5a51257631dc99930ed9a77 100644 (file)
--- a/include/asm-powerpc/thread_info.h
+++ b/include/asm-powerpc/thread_info.h
@@ -66,20 +66,12 @@ struct thread_info {
  
  #if THREAD_SHIFT >= PAGE_SHIFT
  
-#define THREAD_ORDER   (THREAD_SHIFT - PAGE_SHIFT)
-
-#ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk) \
-       ((struct thread_info *)__get_free_pages(GFP_KERNEL | \
-               __GFP_ZERO, THREAD_ORDER))
-#else
-#define alloc_thread_info(tsk) \
-       ((struct thread_info *)__get_free_pages(GFP_KERNEL, THREAD_ORDER))
-#endif
-#define free_thread_info(ti)   free_pages((unsigned long)ti, THREAD_ORDER)
+#define THREAD_SIZE_ORDER      (THREAD_SHIFT - PAGE_SHIFT)
  
  #else /* THREAD_SHIFT < PAGE_SHIFT */
  
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
  extern struct thread_info *alloc_thread_info(struct task_struct *tsk);
  extern void free_thread_info(struct thread_info *ti);
  
diff --git a/include/asm-powerpc/tlbflush.h b/include/asm-powerpc/tlbflush.h

index 5c91081476441277c968d0f55067442d4cd20640..361cd5c7a32b428107b6815ae2240fce19de99e1 100644 (file)
--- a/include/asm-powerpc/tlbflush.h
+++ b/include/asm-powerpc/tlbflush.h
@@ -162,16 +162,5 @@ extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
  
  #endif
  
-/*
- * This gets called at the end of handling a page fault, when
- * the kernel has put a new PTE into the page table for the process.
- * We use it to ensure coherency between the i-cache and d-cache
- * for the page which has just been mapped in.
- * On machines which use an MMU hash table, we use this to put a
- * corresponding HPTE into the hash table ahead of time, instead of
- * waiting for the inevitable extra hash-table miss exception.
- */
-extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
-
  #endif /*__KERNEL__ */
  #endif /* _ASM_POWERPC_TLBFLUSH_H */
diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h

index ce91bb662063401f054b34fbbdd09aee73241673..e07d0c76ed779793280480b50032d0ffefe04c54 100644 (file)
--- a/include/asm-powerpc/unistd.h
+++ b/include/asm-powerpc/unistd.h
@@ -335,10 +335,16 @@
  #define __NR_subpage_prot      310
  #define __NR_timerfd_settime   311
  #define __NR_timerfd_gettime   312
+#define __NR_signalfd4         313
+#define __NR_eventfd2          314
+#define __NR_epoll_create1     315
+#define __NR_dup3              316
+#define __NR_pipe2             317
+#define __NR_inotify_init1     318
  
  #ifdef __KERNEL__
  
-#define __NR_syscalls          313
+#define __NR_syscalls          319
  
  #define __NR__exit __NR_exit
  #define NR_syscalls    __NR_syscalls
diff --git a/include/asm-powerpc/vio.h b/include/asm-powerpc/vio.h

index 56512a968dab9075666ac15a9437bd3c1a25529c..0a290a19594645b41ac386a21576459705756b7a 100644 (file)
--- a/include/asm-powerpc/vio.h
+++ b/include/asm-powerpc/vio.h
@@ -39,16 +39,32 @@
  #define VIO_IRQ_DISABLE                0UL
  #define VIO_IRQ_ENABLE         1UL
  
+/*
+ * VIO CMO minimum entitlement for all devices and spare entitlement
+ */
+#define VIO_CMO_MIN_ENT 1562624
+
  struct iommu_table;
  
-/*
- * The vio_dev structure is used to describe virtual I/O devices.
+/**
+ * vio_dev - This structure is used to describe virtual I/O devices.
+ *
+ * @desired: set from return of driver's get_desired_dma() function
+ * @entitled: bytes of IO data that has been reserved for this device.
+ * @allocated: bytes of IO data currently in use by the device.
+ * @allocs_failed: number of DMA failures due to insufficient entitlement.
   */
  struct vio_dev {
         const char *name;
         const char *type;
         uint32_t unit_address;
         unsigned int irq;
+       struct {
+               size_t desired;
+               size_t entitled;
+               size_t allocated;
+               atomic_t allocs_failed;
+       } cmo;
         struct device dev;
  };
  
@@ -56,12 +72,19 @@ struct vio_driver {
         const struct vio_device_id *id_table;
         int (*probe)(struct vio_dev *dev, const struct vio_device_id *id);
         int (*remove)(struct vio_dev *dev);
+       /* A driver must have a get_desired_dma() function to
+        * be loaded in a CMO environment if it uses DMA.
+        */
+       unsigned long (*get_desired_dma)(struct vio_dev *dev);
         struct device_driver driver;
  };
  
  extern int vio_register_driver(struct vio_driver *drv);
  extern void vio_unregister_driver(struct vio_driver *drv);
  
+extern int vio_cmo_entitlement_update(size_t);
+extern void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired);
+
  extern void __devinit vio_unregister_device(struct vio_dev *dev);
  
  struct device_node;
diff --git a/include/asm-s390/Kbuild b/include/asm-s390/Kbuild

index bb5e9edb9825391bcb152efe3ed79c27b8d5623f..63a23415fba6b2ed4a80d68630f8b240eba9e64b 100644 (file)
--- a/include/asm-s390/Kbuild
+++ b/include/asm-s390/Kbuild
@@ -7,7 +7,6 @@ header-y += tape390.h
  header-y += ucontext.h
  header-y += vtoc.h
  header-y += zcrypt.h
-header-y += kvm.h
  header-y += chsc.h
  
  unifdef-y += cmb.h
diff --git a/include/asm-s390/kvm_virtio.h b/include/asm-s390/kvm_virtio.h

index 5c871a990c2933f03029934440195b939d0e3dce..146100224defc9ffbf30280b724f67a2d7cdbc54 100644 (file)
--- a/include/asm-s390/kvm_virtio.h
+++ b/include/asm-s390/kvm_virtio.h
@@ -50,4 +50,14 @@ struct kvm_vqconfig {
  #define KVM_S390_VIRTIO_RESET          1
  #define KVM_S390_VIRTIO_SET_STATUS     2
  
+#ifdef __KERNEL__
+/* early virtio console setup */
+#ifdef CONFIG_VIRTIO_CONSOLE
+extern void s390_virtio_console_init(void);
+#else
+static inline void s390_virtio_console_init(void)
+{
+}
+#endif /* CONFIG_VIRTIO_CONSOLE */
+#endif /* __KERNEL__ */
  #endif
diff --git a/include/asm-s390/thread_info.h b/include/asm-s390/thread_info.h

index 99bbed99a3b2d288d17a7154c174dd7de514c390..91a8f93ad355fcfd2e69a710bbc9efb2bba685f0 100644 (file)
--- a/include/asm-s390/thread_info.h
+++ b/include/asm-s390/thread_info.h
@@ -78,10 +78,7 @@ static inline struct thread_info *current_thread_info(void)
         return (struct thread_info *)((*(unsigned long *) __LC_KERNEL_STACK)-THREAD_SIZE);
  }
  
-/* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) \
-       __get_free_pages(GFP_KERNEL,THREAD_ORDER))
-#define free_thread_info(ti) free_pages((unsigned long) (ti),THREAD_ORDER)
+#define THREAD_SIZE_ORDER THREAD_ORDER
  
  #endif
  
diff --git a/include/asm-sh/kvm.h b/include/asm-sh/kvm.h

deleted file mode 100644 (file)

index 6af51db..0000000
--- a/include/asm-sh/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_SH_H
-#define __LINUX_KVM_SH_H
-
-/* sh does not support KVM */
-
-#endif
diff --git a/include/asm-sh/ptrace.h b/include/asm-sh/ptrace.h

index 8d6c92b3e770992630b93232bd61135f3c01ae05..7d36dc3bee692a7d8b2dcd3c6ad79098192f0f4b 100644 (file)
--- a/include/asm-sh/ptrace.h
+++ b/include/asm-sh/ptrace.h
@@ -5,7 +5,7 @@
   * Copyright (C) 1999, 2000  Niibe Yutaka
   *
   */
-#if defined(__SH5__) || defined(CONFIG_SUPERH64)
+#if defined(__SH5__)
  struct pt_regs {
         unsigned long long pc;
         unsigned long long sr;
diff --git a/include/asm-sh/thread_info.h b/include/asm-sh/thread_info.h

index c50e5d35fe84fefe26978011966a5d39f3d2ac7b..5131e390752510be5e0cae412c033530b616874e 100644 (file)
--- a/include/asm-sh/thread_info.h
+++ b/include/asm-sh/thread_info.h
@@ -92,6 +92,8 @@ static inline struct thread_info *current_thread_info(void)
         return ti;
  }
  
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
  /* thread information allocation */
  #ifdef CONFIG_DEBUG_STACK_USAGE
  #define alloc_thread_info(ti)  kzalloc(THREAD_SIZE, GFP_KERNEL)
diff --git a/include/asm-sparc/kvm.h b/include/asm-sparc/kvm.h

deleted file mode 100644 (file)

index 2e5478d..0000000
--- a/include/asm-sparc/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_SPARC_H
-#define __LINUX_KVM_SPARC_H
-
-/* sparc does not support KVM */
-
-#endif
diff --git a/include/asm-sparc/thread_info_32.h b/include/asm-sparc/thread_info_32.h

index 91b9f5888c855defa112d38ec20761a6664f1875..2cf9db0440556005d3b5eef8847184238e79dc97 100644 (file)
--- a/include/asm-sparc/thread_info_32.h
+++ b/include/asm-sparc/thread_info_32.h
@@ -86,6 +86,8 @@ register struct thread_info *current_thread_info_reg asm("g6");
  #define THREAD_INFO_ORDER  1
  #endif
  
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
  BTFIXUPDEF_CALL(struct thread_info *, alloc_thread_info, void)
  #define alloc_thread_info(tsk) BTFIXUP_CALL(alloc_thread_info)()
  
diff --git a/include/asm-sparc/thread_info_64.h b/include/asm-sparc/thread_info_64.h

index c6d2e6c7f844cf43a88a852c388519b70b8fc91d..960969d5ad06f46bcca53ef7b06a406713744403 100644 (file)
--- a/include/asm-sparc/thread_info_64.h
+++ b/include/asm-sparc/thread_info_64.h
@@ -155,6 +155,8 @@ register struct thread_info *current_thread_info_reg asm("g6");
  #define __THREAD_INFO_ORDER    0
  #endif /* PAGE_SHIFT == 13 */
  
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
  #ifdef CONFIG_DEBUG_STACK_USAGE
  #define alloc_thread_info(tsk)                                 \
  ({                                                             \
diff --git a/include/asm-sparc64/kvm.h b/include/asm-sparc64/kvm.h

deleted file mode 100644 (file)

index 53564ad..0000000
--- a/include/asm-sparc64/kvm.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-sparc/kvm.h>
diff --git a/include/asm-um/kvm.h b/include/asm-um/kvm.h

deleted file mode 100644 (file)

index 66aa770..0000000
--- a/include/asm-um/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_UM_H
-#define __LINUX_KVM_UM_H
-
-/* um does not support KVM */
-
-#endif
diff --git a/include/asm-um/thread_info.h b/include/asm-um/thread_info.h

index 356b83e2c22edf07c357e91c46469ad762968fb2..e07e72846c7a339544e859d4295eba4452891197 100644 (file)
--- a/include/asm-um/thread_info.h
+++ b/include/asm-um/thread_info.h
@@ -53,21 +53,7 @@ static inline struct thread_info *current_thread_info(void)
         return ti;
  }
  
-#ifdef CONFIG_DEBUG_STACK_USAGE
-
-#define alloc_thread_info(tsk) \
-       ((struct thread_info *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, \
-                                                CONFIG_KERNEL_STACK_ORDER))
-#else
-
-/* thread information allocation */
-#define alloc_thread_info(tsk) \
-       ((struct thread_info *) __get_free_pages(GFP_KERNEL, \
-                                                CONFIG_KERNEL_STACK_ORDER))
-#endif
-
-#define free_thread_info(ti) \
-       free_pages((unsigned long)(ti),CONFIG_KERNEL_STACK_ORDER)
+#define THREAD_SIZE_ORDER CONFIG_KERNEL_STACK_ORDER
  
  #endif
  
diff --git a/include/asm-x86/Kbuild b/include/asm-x86/Kbuild

index 1e3554596f72189a6a2ddc03c2d6b0298215356d..4a8e80cdcfa57a7faff08a2042a6b6fb64f5ae66 100644 (file)
--- a/include/asm-x86/Kbuild
+++ b/include/asm-x86/Kbuild
@@ -3,7 +3,6 @@ include include/asm-generic/Kbuild.asm
  header-y += boot.h
  header-y += bootparam.h
  header-y += debugreg.h
-header-y += kvm.h
  header-y += ldt.h
  header-y += msr-index.h
  header-y += prctl.h
@@ -19,7 +18,6 @@ unifdef-y += msr.h
  unifdef-y += mtrr.h
  unifdef-y += posix_types_32.h
  unifdef-y += posix_types_64.h
-unifdef-y += ptrace.h
  unifdef-y += unistd_32.h
  unifdef-y += unistd_64.h
  unifdef-y += vm86.h
diff --git a/include/asm-x86/gpio.h b/include/asm-x86/gpio.h

index ff87fca0caf9e60f416f7097653c7dfcfd898d06..116e9147fe66473c0412685480e565a8a1fd8b76 100644 (file)
--- a/include/asm-x86/gpio.h
+++ b/include/asm-x86/gpio.h
@@ -1,6 +1,62 @@
+/*
+ * Generic GPIO API implementation for x86.
+ *
+ * Derived from the generic GPIO API for powerpc:
+ *
+ * Copyright (c) 2007-2008  MontaVista Software, Inc.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
  #ifndef _ASM_I386_GPIO_H
  #define _ASM_I386_GPIO_H
  
+#ifdef CONFIG_X86_RDC321X
  #include <gpio.h>
+#else /* CONFIG_X86_RDC321X */
+
+#include <asm-generic/gpio.h>
+
+#ifdef CONFIG_GPIOLIB
+
+/*
+ * Just call gpiolib.
+ */
+static inline int gpio_get_value(unsigned int gpio)
+{
+       return __gpio_get_value(gpio);
+}
+
+static inline void gpio_set_value(unsigned int gpio, int value)
+{
+       __gpio_set_value(gpio, value);
+}
+
+static inline int gpio_cansleep(unsigned int gpio)
+{
+       return __gpio_cansleep(gpio);
+}
+
+/*
+ * Not implemented, yet.
+ */
+static inline int gpio_to_irq(unsigned int gpio)
+{
+       return -ENOSYS;
+}
+
+static inline int irq_to_gpio(unsigned int irq)
+{
+       return -EINVAL;
+}
+
+#endif /* CONFIG_GPIOLIB */
+
+#endif /* CONFIG_X86_RDC321X */
  
  #endif /* _ASM_I386_GPIO_H */
diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h

index 3f2de10509884c9ff917b9653855ca34cce45a06..da0a675adf94a1a753792d1bca54c45d4fe3bed4 100644 (file)
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -152,6 +152,8 @@ struct thread_info {
  #define THREAD_FLAGS GFP_KERNEL
  #endif
  
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
  #define alloc_thread_info(tsk)                                         \
         ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER))
  
diff --git a/include/asm-xtensa/kvm.h b/include/asm-xtensa/kvm.h

deleted file mode 100644 (file)

index bda4e33..0000000
--- a/include/asm-xtensa/kvm.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __LINUX_KVM_XTENSA_H
-#define __LINUX_KVM_XTENSA_H
-
-/* xtensa does not support KVM */
-
-#endif
diff --git a/include/asm-xtensa/ptrace.h b/include/asm-xtensa/ptrace.h

index 422c73e26937de34f292a387b1b48360ae702932..089b0db44816f30242a1c6b1db18e1da651855b1 100644 (file)
--- a/include/asm-xtensa/ptrace.h
+++ b/include/asm-xtensa/ptrace.h
@@ -73,10 +73,10 @@
  #define PTRACE_GETXTREGS       18
  #define PTRACE_SETXTREGS       19
  
-#ifndef __ASSEMBLY__
-
  #ifdef __KERNEL__
  
+#ifndef __ASSEMBLY__
+
  /*
   * This struct defines the way the registers are stored on the
   * kernel stack during a system call or other kernel entry.
@@ -122,14 +122,14 @@ extern void show_regs(struct pt_regs *);
  # ifndef CONFIG_SMP
  #  define profile_pc(regs) instruction_pointer(regs)
  # endif
-#endif /* __KERNEL__ */
  
  #else  /* __ASSEMBLY__ */
  
-#ifdef __KERNEL__
  # include <asm/asm-offsets.h>
  #define PT_REGS_OFFSET   (KERNEL_STACK_SIZE - PT_USER_SIZE)
-#endif
  
  #endif /* !__ASSEMBLY__ */
+
+#endif  /* __KERNEL__ */
+
  #endif /* _XTENSA_PTRACE_H */
diff --git a/include/asm-xtensa/thread_info.h b/include/asm-xtensa/thread_info.h

index a2c640682ed94f05f030a6b07b968456b7a294ac..7e4131dd546c5d62655b12fc89460a5e44b31a82 100644 (file)
--- a/include/asm-xtensa/thread_info.h
+++ b/include/asm-xtensa/thread_info.h
@@ -111,10 +111,6 @@ static inline struct thread_info *current_thread_info(void)
         return ti;
  }
  
-/* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) __get_free_pages(GFP_KERNEL,1))
-#define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
-
  #else /* !__ASSEMBLY__ */
  
  /* how to get the thread information struct from ASM */
@@ -160,6 +156,7 @@ static inline struct thread_info *current_thread_info(void)
  #define TS_USEDFPU             0x0001  /* FPU was used by this task this quantum (SMP) */
  
  #define THREAD_SIZE 8192       //(2*PAGE_SIZE)
+#define THREAD_SIZE_ORDER 1
  
  #endif /* __KERNEL__ */
  #endif /* _XTENSA_THREAD_INFO */
diff --git a/include/linux/Kbuild b/include/linux/Kbuild

index 71d70d1fbce280585d575ba3f131847c0da19cd8..4c4142c5aa6ef26839e4acceb8bcb8b009bf5861 100644 (file)
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -189,7 +189,6 @@ unifdef-y += connector.h
  unifdef-y += cuda.h
  unifdef-y += cyclades.h
  unifdef-y += dccp.h
-unifdef-y += dirent.h
  unifdef-y += dlm.h
  unifdef-y += dlm_plock.h
  unifdef-y += edd.h
@@ -256,7 +255,9 @@ unifdef-y += kd.h
  unifdef-y += kernelcapi.h
  unifdef-y += kernel.h
  unifdef-y += keyboard.h
+ifneq ($(wildcard $(srctree)/include/asm-$(SRCARCH)/kvm.h),)
  unifdef-y += kvm.h
+endif
  unifdef-y += llc.h
  unifdef-y += loop.h
  unifdef-y += lp.h
diff --git a/include/linux/acct.h b/include/linux/acct.h

index e8cae54e8d88b9116d4813771978f2acecc58bf2..882dc7248766367bb451e283e6cbb6b2936cbc3c 100644 (file)
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -120,17 +120,20 @@ struct acct_v3
  struct vfsmount;
  struct super_block;
  struct pacct_struct;
+struct pid_namespace;
  extern void acct_auto_close_mnt(struct vfsmount *m);
  extern void acct_auto_close(struct super_block *sb);
  extern void acct_init_pacct(struct pacct_struct *pacct);
  extern void acct_collect(long exitcode, int group_dead);
  extern void acct_process(void);
+extern void acct_exit_ns(struct pid_namespace *);
  #else
  #define acct_auto_close_mnt(x) do { } while (0)
  #define acct_auto_close(x)     do { } while (0)
  #define acct_init_pacct(x)     do { } while (0)
  #define acct_collect(x,y)      do { } while (0)
  #define acct_process()         do { } while (0)
+#define acct_exit_ns(ns)       do { } while (0)
  #endif
  
  /*
diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h

index 0da17d14fd139f548cc18c3076a8390a8089918f..d7afa9dd6635cc812a87d8b6b291c3fd2419f095 100644 (file)
--- a/include/linux/auxvec.h
+++ b/include/linux/auxvec.h
@@ -26,9 +26,13 @@
  
  #define AT_SECURE 23   /* secure mode boolean */
  
+#define AT_BASE_PLATFORM 24    /* string identifying real platform, may
+                                * differ from AT_PLATFORM. */
+
  #define AT_EXECFN  31  /* filename of program */
+
  #ifdef __KERNEL__
-#define AT_VECTOR_SIZE_BASE 17 /* NEW_AUX_ENT entries in auxiliary table */
+#define AT_VECTOR_SIZE_BASE 18 /* NEW_AUX_ENT entries in auxiliary table */
    /* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */
  #endif
  
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h

index 4ddf2922fc8d60461f31ed3aa63b8d88373a20c3..652470b687c9533f09fc8829d829f3d8fa768b6b 100644 (file)
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -103,17 +103,16 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
         __alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
  #define alloc_bootmem_low_pages(x) \
         __alloc_bootmem_low(x, PAGE_SIZE, 0)
-#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
-
-extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
-                                  int flags);
-
  #define alloc_bootmem_node(pgdat, x) \
         __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
  #define alloc_bootmem_pages_node(pgdat, x) \
         __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
  #define alloc_bootmem_low_pages_node(pgdat, x) \
         __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
+#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
+
+extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
+                                  int flags);
  
  extern void *alloc_bootmem_section(unsigned long size,
                                    unsigned long section_nr);
diff --git a/include/linux/byteorder/big_endian.h b/include/linux/byteorder/big_endian.h

index 961ed4b48d8eba1b7d72628cec0f65be7cacde32..44f95b92393b89b19186166232efa4a797f7314a 100644 (file)
--- a/include/linux/byteorder/big_endian.h
+++ b/include/linux/byteorder/big_endian.h
@@ -94,12 +94,12 @@ static inline __u16 __be16_to_cpup(const __be16 *p)
  #define __le32_to_cpus(x) __swab32s((x))
  #define __cpu_to_le16s(x) __swab16s((x))
  #define __le16_to_cpus(x) __swab16s((x))
-#define __cpu_to_be64s(x) do {} while (0)
-#define __be64_to_cpus(x) do {} while (0)
-#define __cpu_to_be32s(x) do {} while (0)
-#define __be32_to_cpus(x) do {} while (0)
-#define __cpu_to_be16s(x) do {} while (0)
-#define __be16_to_cpus(x) do {} while (0)
+#define __cpu_to_be64s(x) do { (void)(x); } while (0)
+#define __be64_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_be32s(x) do { (void)(x); } while (0)
+#define __be32_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_be16s(x) do { (void)(x); } while (0)
+#define __be16_to_cpus(x) do { (void)(x); } while (0)
  
  #ifdef __KERNEL__
  #include <linux/byteorder/generic.h>
diff --git a/include/linux/byteorder/little_endian.h b/include/linux/byteorder/little_endian.h

index 05dc7c35b3b21e93956f5c462f24a79305749aed..4cc170a3176219b69ab73bec0035c0868a0362a2 100644 (file)
--- a/include/linux/byteorder/little_endian.h
+++ b/include/linux/byteorder/little_endian.h
@@ -88,12 +88,12 @@ static inline __u16 __be16_to_cpup(const __be16 *p)
  {
         return __swab16p((__u16 *)p);
  }
-#define __cpu_to_le64s(x) do {} while (0)
-#define __le64_to_cpus(x) do {} while (0)
-#define __cpu_to_le32s(x) do {} while (0)
-#define __le32_to_cpus(x) do {} while (0)
-#define __cpu_to_le16s(x) do {} while (0)
-#define __le16_to_cpus(x) do {} while (0)
+#define __cpu_to_le64s(x) do { (void)(x); } while (0)
+#define __le64_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_le32s(x) do { (void)(x); } while (0)
+#define __le32_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_le16s(x) do { (void)(x); } while (0)
+#define __le16_to_cpus(x) do { (void)(x); } while (0)
  #define __cpu_to_be64s(x) __swab64s((x))
  #define __be64_to_cpus(x) __swab64s((x))
  #define __cpu_to_be32s(x) __swab32s((x))
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h

index e155aa78d8594eb545547f48f47c7b53bf9fc36a..c98dd7cb7076f44484a68643995f10fd1bbc2ff8 100644 (file)
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -21,11 +21,13 @@
  struct cgroupfs_root;
  struct cgroup_subsys;
  struct inode;
+struct cgroup;
  
  extern int cgroup_init_early(void);
  extern int cgroup_init(void);
  extern void cgroup_init_smp(void);
  extern void cgroup_lock(void);
+extern bool cgroup_lock_live_group(struct cgroup *cgrp);
  extern void cgroup_unlock(void);
  extern void cgroup_fork(struct task_struct *p);
  extern void cgroup_fork_callbacks(struct task_struct *p);
@@ -205,49 +207,63 @@ struct cftype {
          * subsystem, followed by a period */
         char name[MAX_CFTYPE_NAME];
         int private;
-       int (*open) (struct inode *inode, struct file *file);
-       ssize_t (*read) (struct cgroup *cgrp, struct cftype *cft,
-                        struct file *file,
-                        char __user *buf, size_t nbytes, loff_t *ppos);
+
+       /*
+        * If non-zero, defines the maximum length of string that can
+        * be passed to write_string; defaults to 64
+        */
+       size_t max_write_len;
+
+       int (*open)(struct inode *inode, struct file *file);
+       ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
+                       struct file *file,
+                       char __user *buf, size_t nbytes, loff_t *ppos);
         /*
          * read_u64() is a shortcut for the common case of returning a
          * single integer. Use it in place of read()
          */
-       u64 (*read_u64) (struct cgroup *cgrp, struct cftype *cft);
+       u64 (*read_u64)(struct cgroup *cgrp, struct cftype *cft);
         /*
          * read_s64() is a signed version of read_u64()
          */
-       s64 (*read_s64) (struct cgroup *cgrp, struct cftype *cft);
+       s64 (*read_s64)(struct cgroup *cgrp, struct cftype *cft);
         /*
          * read_map() is used for defining a map of key/value
          * pairs. It should call cb->fill(cb, key, value) for each
          * entry. The key/value pairs (and their ordering) should not
          * change between reboots.
          */
-       int (*read_map) (struct cgroup *cont, struct cftype *cft,
-                        struct cgroup_map_cb *cb);
+       int (*read_map)(struct cgroup *cont, struct cftype *cft,
+                       struct cgroup_map_cb *cb);
         /*
          * read_seq_string() is used for outputting a simple sequence
          * using seqfile.
          */
-       int (*read_seq_string) (struct cgroup *cont, struct cftype *cft,
-                        struct seq_file *m);
+       int (*read_seq_string)(struct cgroup *cont, struct cftype *cft,
+                              struct seq_file *m);
  
-       ssize_t (*write) (struct cgroup *cgrp, struct cftype *cft,
-                         struct file *file,
-                         const char __user *buf, size_t nbytes, loff_t *ppos);
+       ssize_t (*write)(struct cgroup *cgrp, struct cftype *cft,
+                        struct file *file,
+                        const char __user *buf, size_t nbytes, loff_t *ppos);
  
         /*
          * write_u64() is a shortcut for the common case of accepting
          * a single integer (as parsed by simple_strtoull) from
          * userspace. Use in place of write(); return 0 or error.
          */
-       int (*write_u64) (struct cgroup *cgrp, struct cftype *cft, u64 val);
+       int (*write_u64)(struct cgroup *cgrp, struct cftype *cft, u64 val);
         /*
          * write_s64() is a signed version of write_u64()
          */
-       int (*write_s64) (struct cgroup *cgrp, struct cftype *cft, s64 val);
+       int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val);
  
+       /*
+        * write_string() is passed a nul-terminated kernelspace
+        * buffer of maximum length determined by max_write_len.
+        * Returns 0 or -ve error code.
+        */
+       int (*write_string)(struct cgroup *cgrp, struct cftype *cft,
+                           const char *buffer);
         /*
          * trigger() callback can be used to get some kick from the
          * userspace, when the actual string written is not important
@@ -256,7 +272,7 @@ struct cftype {
          */
         int (*trigger)(struct cgroup *cgrp, unsigned int event);
  
-       int (*release) (struct inode *inode, struct file *file);
+       int (*release)(struct inode *inode, struct file *file);
  };
  
  struct cgroup_scanner {
@@ -348,7 +364,8 @@ static inline struct cgroup* task_cgroup(struct task_struct *task,
         return task_subsys_state(task, subsys_id)->cgroup;
  }
  
-int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss);
+int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss,
+                                                       char *nodename);
  
  /* A cgroup_iter should be treated as an opaque object */
  struct cgroup_iter {
diff --git a/include/linux/coda.h b/include/linux/coda.h

index b5cf0780c51a6a0e9ebc243a6053922572756f09..96c87693800ba1b0eaf46065f670b46fb4e3cbfc 100644 (file)
--- a/include/linux/coda.h
+++ b/include/linux/coda.h
@@ -199,28 +199,6 @@ typedef u_int32_t vuid_t;
  typedef u_int32_t vgid_t;
  #endif /*_VUID_T_ */
  
-#ifdef CONFIG_CODA_FS_OLD_API
-struct CodaFid {
-       u_int32_t opaque[3];
-};
-
-static __inline__ ino_t  coda_f2i(struct CodaFid *fid)
-{
-       if ( ! fid ) 
-               return 0; 
-       if (fid->opaque[1] == 0xfffffffe || fid->opaque[1] == 0xffffffff)
-               return ((fid->opaque[0] << 20) | (fid->opaque[2] & 0xfffff));
-       else
-               return (fid->opaque[2] + (fid->opaque[1]<<10) + (fid->opaque[0]<<20));
-}
-
-struct coda_cred {
-    vuid_t cr_uid, cr_euid, cr_suid, cr_fsuid; /* Real, efftve, set, fs uid*/
-    vgid_t cr_groupid, cr_egid, cr_sgid, cr_fsgid; /* same for groups */
-};
-
-#else /* not defined(CONFIG_CODA_FS_OLD_API) */
-
  struct CodaFid {
         u_int32_t opaque[4];
  };
@@ -228,8 +206,6 @@ struct CodaFid {
  #define coda_f2i(fid)\
         (fid ? (fid->opaque[3] ^ (fid->opaque[2]<<10) ^ (fid->opaque[1]<<20) ^ fid->opaque[0]) : 0)
  
-#endif
-
  #ifndef _VENUS_VATTR_T_
  #define _VENUS_VATTR_T_
  /*
@@ -313,15 +289,7 @@ struct coda_statfs {
  
  #define CIOC_KERNEL_VERSION _IOWR('c', 10, size_t)
  
-#if 0
-#define CODA_KERNEL_VERSION 0 /* don't care about kernel version number */
-#define CODA_KERNEL_VERSION 1 /* The old venus 4.6 compatible interface */
-#endif
-#ifdef CONFIG_CODA_FS_OLD_API
-#define CODA_KERNEL_VERSION 2 /* venus_lookup got an extra parameter */
-#else
  #define CODA_KERNEL_VERSION 3 /* 128-bit file identifiers */
-#endif
  
  /*
   *        Venus <-> Coda  RPC arguments
@@ -329,16 +297,9 @@ struct coda_statfs {
  struct coda_in_hdr {
      u_int32_t opcode;
      u_int32_t unique;      /* Keep multiple outstanding msgs distinct */
-#ifdef CONFIG_CODA_FS_OLD_API
-    u_int16_t pid;         /* Common to all */
-    u_int16_t pgid;        /* Common to all */
-    u_int16_t sid;          /* Common to all */
-    struct coda_cred cred;  /* Common to all */
-#else
      pid_t pid;
      pid_t pgid;
      vuid_t uid;
-#endif
  };
  
  /* Really important that opcode and unique are 1st two fields! */
@@ -613,11 +574,7 @@ struct coda_vget_out {
  /* CODA_PURGEUSER is a venus->kernel call */
  struct coda_purgeuser_out {
      struct coda_out_hdr oh;
-#ifdef CONFIG_CODA_FS_OLD_API
-    struct coda_cred cred;
-#else
      vuid_t uid;
-#endif
  };
  
  /* coda_zapfile: */
diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h

index e2bf7e5db39a513c650e1e422f024f31c0e6ed2d..c4811da1338b7c92546e5d0d0a48c8c4d8c9c7c6 100644 (file)
--- a/include/linux/consolemap.h
+++ b/include/linux/consolemap.h
@@ -3,6 +3,9 @@
   *
   * Interface between console.c, selection.c  and consolemap.c
   */
+#ifndef __LINUX_CONSOLEMAP_H__
+#define __LINUX_CONSOLEMAP_H__
+
  #define LAT1_MAP 0
  #define GRAF_MAP 1
  #define IBMPC_MAP 2
@@ -10,6 +13,7 @@
  
  #include <linux/types.h>
  
+#ifdef CONFIG_CONSOLE_TRANSLATIONS
  struct vc_data;
  
  extern u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode);
@@ -18,3 +22,13 @@ extern int conv_uni_to_pc(struct vc_data *conp, long ucs);
  extern u32 conv_8bit_to_uni(unsigned char c);
  extern int conv_uni_to_8bit(u32 uni);
  void console_map_init(void);
+#else
+#define inverse_translate(conp, glyph, uni) ((uint16_t)glyph)
+#define set_translate(m, vc) ((unsigned short *)NULL)
+#define conv_uni_to_pc(conp, ucs) ((int) (ucs > 0xff ? -1: ucs))
+#define conv_8bit_to_uni(c) ((uint32_t)(c))
+#define conv_uni_to_8bit(c) ((int) ((c) & 0xff))
+#define console_map_init(c) do { ; } while (0)
+#endif /* CONFIG_CONSOLE_TRANSLATIONS */
+
+#endif /* __LINUX_CONSOLEMAP_H__ */
diff --git a/include/linux/cpu.h b/include/linux/cpu.h

index 7464ba3b4333980268d4af8e71318a0b27a37e9d..d7faf88084973c6a5bfbfca9fe717352b2e1e1a2 100644 (file)
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -69,10 +69,11 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb)
  #endif
  
  int cpu_up(unsigned int cpu);
-
  extern void cpu_hotplug_init(void);
+extern void cpu_maps_update_begin(void);
+extern void cpu_maps_update_done(void);
  
-#else
+#else  /* CONFIG_SMP */
  
  static inline int register_cpu_notifier(struct notifier_block *nb)
  {
@@ -87,10 +88,16 @@ static inline void cpu_hotplug_init(void)
  {
  }
  
+static inline void cpu_maps_update_begin(void)
+{
+}
+
+static inline void cpu_maps_update_done(void)
+{
+}
+
  #endif /* CONFIG_SMP */
  extern struct sysdev_class cpu_sysdev_class;
-extern void cpu_maps_update_begin(void);
-extern void cpu_maps_update_done(void);
  
  #ifdef CONFIG_HOTPLUG_CPU
  /* Stop CPUs going up and down. */
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h

index 22c7ac5cd80cbd18ec3aeb1da42cac0493af8078..6cd39a927e1fc80d2c92248e8c71895caf06aa91 100644 (file)
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -22,5 +22,13 @@ extern struct proc_dir_entry *proc_vmcore;
  
  #define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x))
  
+static inline int is_kdump_kernel(void)
+{
+       return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0;
+}
+#else /* !CONFIG_CRASH_DUMP */
+static inline int is_kdump_kernel(void) { return 0; }
  #endif /* CONFIG_CRASH_DUMP */
+
+extern unsigned long saved_max_pfn;
  #endif /* LINUX_CRASHDUMP_H */
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h

index ab94bc083558c329c8a59e49ff3c7af6076627ca..f352f06fa063b2bd73543403a4e24aca39f4a9d1 100644 (file)
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -39,6 +39,8 @@ extern void __delayacct_blkio_start(void);
  extern void __delayacct_blkio_end(void);
  extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
  extern __u64 __delayacct_blkio_ticks(struct task_struct *);
+extern void __delayacct_freepages_start(void);
+extern void __delayacct_freepages_end(void);
  
  static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
  {
@@ -107,6 +109,18 @@ static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
         return 0;
  }
  
+static inline void delayacct_freepages_start(void)
+{
+       if (current->delays)
+               __delayacct_freepages_start();
+}
+
+static inline void delayacct_freepages_end(void)
+{
+       if (current->delays)
+               __delayacct_freepages_end();
+}
+
  #else
  static inline void delayacct_set_flag(int flag)
  {}
@@ -129,6 +143,11 @@ static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
  { return 0; }
  static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
  { return 0; }
+static inline void delayacct_freepages_start(void)
+{}
+static inline void delayacct_freepages_end(void)
+{}
+
  #endif /* CONFIG_TASK_DELAY_ACCT */
  
  #endif
diff --git a/include/linux/dirent.h b/include/linux/dirent.h

index 5d6023b878003e65d5547f17faf4aea45fde89b4..f072fb8d10a3268b402323c91b40be250872726a 100644 (file)
--- a/include/linux/dirent.h
+++ b/include/linux/dirent.h
@@ -1,23 +1,6 @@
  #ifndef _LINUX_DIRENT_H
  #define _LINUX_DIRENT_H
  
-struct dirent {
-       long            d_ino;
-       __kernel_off_t  d_off;
-       unsigned short  d_reclen;
-       char            d_name[256]; /* We must not include limits.h! */
-};
-
-struct dirent64 {
-       __u64           d_ino;
-       __s64           d_off;
-       unsigned short  d_reclen;
-       unsigned char   d_type;
-       char            d_name[256];
-};
-
-#ifdef __KERNEL__
-
  struct linux_dirent64 {
         u64             d_ino;
         s64             d_off;
@@ -26,7 +9,4 @@ struct linux_dirent64 {
         char            d_name[0];
  };
  
-#endif /* __KERNEL__ */
-
-
  #endif
diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h

index 84cec2aa9f1e98bd70ad9bb65ac58c06e2e9e5b4..2efe7b863cff361301d4a73c1c4e46338d0b02b1 100644 (file)
--- a/include/linux/ext2_fs.h
+++ b/include/linux/ext2_fs.h
@@ -284,8 +284,8 @@ struct ext2_inode {
  
  #ifdef __hurd__
  #define i_translator   osd1.hurd1.h_i_translator
-#define i_frag         osd2.hurd2.h_i_frag;
-#define i_fsize                osd2.hurd2.h_i_fsize;
+#define i_frag         osd2.hurd2.h_i_frag
+#define i_fsize                osd2.hurd2.h_i_fsize
  #define i_uid_high     osd2.hurd2.h_i_uid_high
  #define i_gid_high     osd2.hurd2.h_i_gid_high
  #define i_author       osd2.hurd2.h_i_author
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h

index 36c5403963778bcb5a1a5b9193267a616728cdde..80171ee89a2222b8e00fb65b2220b4f0fcf78165 100644 (file)
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -832,6 +832,7 @@ extern void ext3_discard_reservation (struct inode *);
  extern void ext3_dirty_inode(struct inode *);
  extern int ext3_change_inode_journal_flag(struct inode *, int);
  extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
+extern int ext3_can_truncate(struct inode *inode);
  extern void ext3_truncate (struct inode *);
  extern void ext3_set_inode_flags(struct inode *);
  extern void ext3_get_inode_flags(struct ext3_inode_info *);
diff --git a/include/linux/fd1772.h b/include/linux/fd1772.h

deleted file mode 100644 (file)

index 871d6e4..0000000
--- a/include/linux/fd1772.h
+++ /dev/null
@@ -1,80 +0,0 @@
-#ifndef _LINUX_FD1772REG_H
-#define _LINUX_FD1772REG_H
-
-/*
-** WD1772 stuff - originally from the M68K Linux
- * Modified for Archimedes by Dave Gilbert (gilbertd@cs.man.ac.uk)
- */
-
-/* register codes */
-
-#define FDC1772SELREG_STP   (0x80)   /* command/status register */
-#define FDC1772SELREG_TRA   (0x82)   /* track register */
-#define FDC1772SELREG_SEC   (0x84)   /* sector register */
-#define FDC1772SELREG_DTA   (0x86)   /* data register */
-
-/* register names for FDC1772_READ/WRITE macros */
-
-#define FDC1772REG_CMD         0
-#define FDC1772REG_STATUS      0
-#define FDC1772REG_TRACK       2
-#define FDC1772REG_SECTOR      4
-#define FDC1772REG_DATA                6
-
-/* command opcodes */
-
-#define FDC1772CMD_RESTORE  (0x00)   /*  -                   */
-#define FDC1772CMD_SEEK     (0x10)   /*   |                  */
-#define FDC1772CMD_STEP     (0x20)   /*   |  TYP 1 Commands  */
-#define FDC1772CMD_STIN     (0x40)   /*   |                  */
-#define FDC1772CMD_STOT     (0x60)   /*  -                   */
-#define FDC1772CMD_RDSEC    (0x80)   /*  -   TYP 2 Commands  */
-#define FDC1772CMD_WRSEC    (0xa0)   /*  -          "        */
-#define FDC1772CMD_RDADR    (0xc0)   /*  -                   */
-#define FDC1772CMD_RDTRA    (0xe0)   /*   |  TYP 3 Commands  */
-#define FDC1772CMD_WRTRA    (0xf0)   /*  -                   */
-#define FDC1772CMD_FORCI    (0xd0)   /*  -   TYP 4 Command   */
-
-/* command modifier bits */
-
-#define FDC1772CMDADD_SR6   (0x00)   /* step rate settings */
-#define FDC1772CMDADD_SR12  (0x01)
-#define FDC1772CMDADD_SR2   (0x02)
-#define FDC1772CMDADD_SR3   (0x03)
-#define FDC1772CMDADD_V     (0x04)   /* verify */
-#define FDC1772CMDADD_H     (0x08)   /* wait for spin-up */
-#define FDC1772CMDADD_U     (0x10)   /* update track register */
-#define FDC1772CMDADD_M     (0x10)   /* multiple sector access */
-#define FDC1772CMDADD_E     (0x04)   /* head settling flag */
-#define FDC1772CMDADD_P     (0x02)   /* precompensation */
-#define FDC1772CMDADD_A0    (0x01)   /* DAM flag */
-
-/* status register bits */
-
-#define        FDC1772STAT_MOTORON     (0x80)   /* motor on */
-#define        FDC1772STAT_WPROT       (0x40)   /* write protected (FDC1772CMD_WR*) */
-#define        FDC1772STAT_SPINUP      (0x20)   /* motor speed stable (Type I) */
-#define        FDC1772STAT_DELDAM      (0x20)   /* sector has deleted DAM (Type II+III) */
-#define        FDC1772STAT_RECNF       (0x10)   /* record not found */
-#define        FDC1772STAT_CRC         (0x08)   /* CRC error */
-#define        FDC1772STAT_TR00        (0x04)   /* Track 00 flag (Type I) */
-#define        FDC1772STAT_LOST        (0x04)   /* Lost Data (Type II+III) */
-#define        FDC1772STAT_IDX         (0x02)   /* Index status (Type I) */
-#define        FDC1772STAT_DRQ         (0x02)   /* DRQ status (Type II+III) */
-#define        FDC1772STAT_BUSY        (0x01)   /* FDC1772 is busy */
-
-
-/* PSG Port A Bit Nr 0 .. Side Sel .. 0 -> Side 1  1 -> Side 2 */
-#define DSKSIDE     (0x01)
-        
-#define DSKDRVNONE  (0x06)
-#define DSKDRV0     (0x02)
-#define DSKDRV1     (0x04)
-
-/* step rates */
-#define        FDC1772STEP_6   0x00
-#define        FDC1772STEP_12  0x01
-#define        FDC1772STEP_2   0x02
-#define        FDC1772STEP_3   0x03
-
-#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h

index 4b86f806014c5e8a82f0664f3e5c9acad0c2d0d0..49d8eb7a71be2b89d9148575618d2550018a5141 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -885,6 +885,12 @@ static inline int file_check_writeable(struct file *filp)
  #define FL_CLOSE       64      /* unlock on close */
  #define FL_SLEEP       128     /* A blocking lock */
  
+/*
+ * Special return value from posix_lock_file() and vfs_lock_file() for
+ * asynchronous locking.
+ */
+#define FILE_LOCK_DEFERRED 1
+
  /*
   * The POSIX file lock owner is determined by
   * the "struct files_struct" in the thread group
diff --git a/include/linux/fuse.h b/include/linux/fuse.h

index d48282197696fdf1644820a121ce75cd5571bd67..265635dc990812a319dcd632131e8c9f79051f1d 100644 (file)
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -104,11 +104,14 @@ struct fuse_file_lock {
  
  /**
   * INIT request/reply flags
+ *
+ * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
   */
  #define FUSE_ASYNC_READ                (1 << 0)
  #define FUSE_POSIX_LOCKS       (1 << 1)
  #define FUSE_FILE_OPS          (1 << 2)
  #define FUSE_ATOMIC_O_TRUNC    (1 << 3)
+#define FUSE_EXPORT_SUPPORT    (1 << 4)
  #define FUSE_BIG_WRITES                (1 << 5)
  
  /**
diff --git a/include/linux/genhd.h b/include/linux/genhd.h

index e8787417f65a1175cb4e1542f7271c0d3dbfd87b..118216f1bd3c357a61b154face10a267a67f3290 100644 (file)
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -541,7 +541,7 @@ extern dev_t blk_lookup_devt(const char *name, int part);
  extern char *disk_name (struct gendisk *hd, int part, char *buf);
  
  extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
-extern void add_partition(struct gendisk *, int, sector_t, sector_t, int);
+extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int);
  extern void delete_partition(struct gendisk *, int);
  extern void printk_all_partitions(void);
  
diff --git a/include/linux/gpio.h b/include/linux/gpio.h

index 98be6c5762b923fcb2a98ced684051c3ea7f773c..730a20b83576b4ab39d81810085c20d61eec0225 100644 (file)
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -79,6 +79,19 @@ static inline void gpio_set_value_cansleep(unsigned gpio, int value)
         WARN_ON(1);
  }
  
+static inline int gpio_export(unsigned gpio, bool direction_may_change)
+{
+       /* GPIO can never have been requested or set as {in,out}put */
+       WARN_ON(1);
+       return -EINVAL;
+}
+
+static inline void gpio_unexport(unsigned gpio)
+{
+       /* GPIO can never have been exported */
+       WARN_ON(1);
+}
+
  static inline int gpio_to_irq(unsigned gpio)
  {
         /* GPIO can never have been requested or set as input */
diff --git a/include/linux/i2c/max732x.h b/include/linux/i2c/max732x.h

new file mode 100644 (file)

index 0000000..e103366
--- /dev/null
+++ b/include/linux/i2c/max732x.h
@@ -0,0 +1,19 @@
+#ifndef __LINUX_I2C_MAX732X_H
+#define __LINUX_I2C_MAX732X_H
+
+/* platform data for the MAX732x 8/16-bit I/O expander driver */
+
+struct max732x_platform_data {
+       /* number of the first GPIO */
+       unsigned        gpio_base;
+
+       void            *context;       /* param to setup/teardown */
+
+       int             (*setup)(struct i2c_client *client,
+                               unsigned gpio, unsigned ngpio,
+                               void *context);
+       int             (*teardown)(struct i2c_client *client,
+                               unsigned gpio, unsigned ngpio,
+                               void *context);
+};
+#endif /* __LINUX_I2C_MAX732X_H */
diff --git a/include/linux/idr.h b/include/linux/idr.h

index 9a2d762124de7684983cdb36a51dc3814b461a8f..fa035f96f2a3b340c468e4b58d9010df3b21be69 100644 (file)
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -15,6 +15,7 @@
  #include <linux/types.h>
  #include <linux/bitops.h>
  #include <linux/init.h>
+#include <linux/rcupdate.h>
  
  #if BITS_PER_LONG == 32
  # define IDR_BITS 5
@@ -51,6 +52,7 @@ struct idr_layer {
         unsigned long            bitmap; /* A zero bit means "space here" */
         struct idr_layer        *ary[1<<IDR_BITS];
         int                      count;  /* When zero, we can release it */
+       struct rcu_head          rcu_head;
  };
  
  struct idr {
@@ -71,6 +73,28 @@ struct idr {
  }
  #define DEFINE_IDR(name)       struct idr name = IDR_INIT(name)
  
+/* Actions to be taken after a call to _idr_sub_alloc */
+#define IDR_NEED_TO_GROW -2
+#define IDR_NOMORE_SPACE -3
+
+#define _idr_rc_to_errno(rc) ((rc) == -1 ? -EAGAIN : -ENOSPC)
+
+/**
+ * idr synchronization (stolen from radix-tree.h)
+ *
+ * idr_find() is able to be called locklessly, using RCU. The caller must
+ * ensure calls to this function are made within rcu_read_lock() regions.
+ * Other readers (lock-free or otherwise) and modifications may be running
+ * concurrently.
+ *
+ * It is still required that the caller manage the synchronization and
+ * lifetimes of the items. So if RCU lock-free lookups are used, typically
+ * this would mean that the items have their own locks, or are amenable to
+ * lock-free access; and that the items are freed by RCU (or only freed after
+ * having been deleted from the idr tree *and* a synchronize_rcu() grace
+ * period).
+ */
+
  /*
   * This is what we export.
   */
diff --git a/include/linux/init.h b/include/linux/init.h

index 21d658cdfa27769146ea52dae2cbf61b3de7f302..42ae95411a93510433c25c31b03b0b28f095869c 100644 (file)
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -275,13 +275,7 @@ void __init parse_early_param(void);
  
  #define security_initcall(fn)          module_init(fn)
  
-/* These macros create a dummy inline: gcc 2.9x does not count alias
- as usage, hence the `unused function' warning when __init functions
- are declared static. We use the dummy __*_module_inline functions
- both to kill the warning and check the type of the init/cleanup
- function. */
-
-/* Each module must use one module_init(), or one no_module_init */
+/* Each module must use one module_init(). */
  #define module_init(initfn)                                    \
         static inline initcall_t __inittest(void)               \
         { return initfn; }                                      \
diff --git a/include/linux/init_task.h b/include/linux/init_task.h

index 93c45acf249ab20cc19d43c94021a25691be9c76..021d8e720c7941f17cd6a541a63b7a9275f68c9c 100644 (file)
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -122,7 +122,7 @@ extern struct group_info init_groups;
         .state          = 0,                                            \
         .stack          = &init_thread_info,                            \
         .usage          = ATOMIC_INIT(2),                               \
-       .flags          = 0,                                            \
+       .flags          = PF_KTHREAD,                                   \
         .lock_depth     = -1,                                           \
         .prio           = MAX_PRIO-20,                                  \
         .static_prio    = MAX_PRIO-20,                                  \
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h

index ea6c18a8b0d442d13dabcdeca9961fb8540ca451..ea330f9e710015c2c8a4b80ebfaec9cd133ffcdd 100644 (file)
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -36,6 +36,7 @@ struct ipc_namespace {
         int             msg_ctlmni;
         atomic_t        msg_bytes;
         atomic_t        msg_hdrs;
+       int             auto_msgmni;
  
         size_t          shm_ctlmax;
         size_t          shm_ctlall;
@@ -53,7 +54,7 @@ extern atomic_t nr_ipc_ns;
  
  extern int register_ipcns_notifier(struct ipc_namespace *);
  extern int cond_register_ipcns_notifier(struct ipc_namespace *);
-extern int unregister_ipcns_notifier(struct ipc_namespace *);
+extern void unregister_ipcns_notifier(struct ipc_namespace *);
  extern int ipcns_notify(unsigned long);
  
  #else /* CONFIG_SYSVIPC */
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h

index 2b1c2e58566ea04460370fef4a4b686794ab512d..74bde13224c926f5204a2c514609def00d465325 100644 (file)
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -11,6 +11,8 @@
  #ifndef _LINUX_TRACE_IRQFLAGS_H
  #define _LINUX_TRACE_IRQFLAGS_H
  
+#include <linux/typecheck.h>
+
  #ifdef CONFIG_TRACE_IRQFLAGS
    extern void trace_softirqs_on(unsigned long ip);
    extern void trace_softirqs_off(unsigned long ip);
@@ -58,18 +60,24 @@
         do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0)
  #define local_irq_disable() \
         do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0)
-#define local_irq_save(flags) \
-       do { raw_local_irq_save(flags); trace_hardirqs_off(); } while (0)
+#define local_irq_save(flags)                          \
+       do {                                            \
+               typecheck(unsigned long, flags);        \
+               raw_local_irq_save(flags);              \
+               trace_hardirqs_off();                   \
+       } while (0)
  
-#define local_irq_restore(flags)                               \
-       do {                                                    \
-               if (raw_irqs_disabled_flags(flags)) {           \
-                       raw_local_irq_restore(flags);           \
-                       trace_hardirqs_off();                   \
-               } else {                                        \
-                       trace_hardirqs_on();                    \
-                       raw_local_irq_restore(flags);           \
-               }                                               \
+
+#define local_irq_restore(flags)                       \
+       do {                                            \
+               typecheck(unsigned long, flags);        \
+               if (raw_irqs_disabled_flags(flags)) {   \
+                       raw_local_irq_restore(flags);   \
+                       trace_hardirqs_off();           \
+               } else {                                \
+                       trace_hardirqs_on();            \
+                       raw_local_irq_restore(flags);   \
+               }                                       \
         } while (0)
  #else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */
  /*
@@ -78,8 +86,16 @@
   */
  # define raw_local_irq_disable()       local_irq_disable()
  # define raw_local_irq_enable()                local_irq_enable()
-# define raw_local_irq_save(flags)     local_irq_save(flags)
-# define raw_local_irq_restore(flags)  local_irq_restore(flags)
+# define raw_local_irq_save(flags)                     \
+       do {                                            \
+               typecheck(unsigned long, flags);        \
+               local_irq_save(flags);                  \
+       } while (0)
+# define raw_local_irq_restore(flags)                  \
+       do {                                            \
+               typecheck(unsigned long, flags);        \
+               local_irq_restore(flags);               \
+       } while (0)
  #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
  
  #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
@@ -89,7 +105,11 @@
                 raw_safe_halt();                                \
         } while (0)
  
-#define local_save_flags(flags)                raw_local_save_flags(flags)
+#define local_save_flags(flags)                                \
+       do {                                            \
+               typecheck(unsigned long, flags);        \
+               raw_local_save_flags(flags);            \
+       } while (0)
  
  #define irqs_disabled()                                                \
  ({                                                             \
@@ -99,7 +119,11 @@
         raw_irqs_disabled_flags(_flags);                        \
  })
  
-#define irqs_disabled_flags(flags)     raw_irqs_disabled_flags(flags)
+#define irqs_disabled_flags(flags)             \
+({                                             \
+       typecheck(unsigned long, flags);        \
+       raw_irqs_disabled_flags(flags);         \
+})
  #endif         /* CONFIG_X86 */
  
  #endif
diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h

index 00c1801099fa0de4024690c6c415fe0b58724bc5..57aefa160a926863c752bd1a3a312c0781ecc9b8 100644 (file)
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -6,6 +6,7 @@
  #define _LINUX_KALLSYMS_H
  
  #include <linux/errno.h>
+#include <linux/kernel.h>
  #include <linux/stddef.h>
  
  #define KSYM_NAME_LEN 128
@@ -105,18 +106,10 @@ static inline void print_fn_descriptor_symbol(const char *fmt, void *addr)
         print_symbol(fmt, (unsigned long)addr);
  }
  
-#ifndef CONFIG_64BIT
-#define print_ip_sym(ip)               \
-do {                                   \
-       printk("[<%08lx>]", ip);        \
-       print_symbol(" %s\n", ip);      \
-} while(0)
-#else
-#define print_ip_sym(ip)               \
-do {                                   \
-       printk("[<%016lx>]", ip);       \
-       print_symbol(" %s\n", ip);      \
-} while(0)
-#endif
+static inline void print_ip_sym(unsigned long ip)
+{
+       printk("[<%p>]", (void *) ip);
+       print_symbol(" %s\n", ip);
+}
  
  #endif /*_LINUX_KALLSYMS_H*/
diff --git a/include/linux/kernel.h b/include/linux/kernel.h

index f9cd7a513f9c5f91a0d703a2b8080250ecf7805e..fdbbf72ca2ebfbe096171a7b96dbaf539f7fbcbe 100644 (file)
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -14,6 +14,8 @@
  #include <linux/compiler.h>
  #include <linux/bitops.h>
  #include <linux/log2.h>
+#include <linux/typecheck.h>
+#include <linux/ratelimit.h>
  #include <asm/byteorder.h>
  #include <asm/bug.h>
  
@@ -188,11 +190,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
  asmlinkage int printk(const char * fmt, ...)
         __attribute__ ((format (printf, 1, 2))) __cold;
  
-extern int printk_ratelimit_jiffies;
-extern int printk_ratelimit_burst;
+extern struct ratelimit_state printk_ratelimit_state;
  extern int printk_ratelimit(void);
-extern int __ratelimit(int ratelimit_jiffies, int ratelimit_burst);
-extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst);
  extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
                                    unsigned int interval_msec);
  #else
@@ -203,8 +202,6 @@ static inline int printk(const char *s, ...)
         __attribute__ ((format (printf, 1, 2)));
  static inline int __cold printk(const char *s, ...) { return 0; }
  static inline int printk_ratelimit(void) { return 0; }
-static inline int __printk_ratelimit(int ratelimit_jiffies, \
-                                    int ratelimit_burst) { return 0; }
  static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \
                                           unsigned int interval_msec)   \
                 { return false; }
@@ -441,26 +438,6 @@ static inline char *pack_hex_byte(char *buf, u8 byte)
         const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
         (type *)( (char *)__mptr - offsetof(type,member) );})
  
-/*
- * Check at compile time that something is of a particular type.
- * Always evaluates to 1 so you may use it easily in comparisons.
- */
-#define typecheck(type,x) \
-({     type __dummy; \
-       typeof(x) __dummy2; \
-       (void)(&__dummy == &__dummy2); \
-       1; \
-})
-
-/*
- * Check at compile time that 'function' is a certain type, or is a pointer
- * to that type (needs to use typedef for the function type.)
- */
-#define typecheck_fn(type,function) \
-({     typeof(type) __tmp = function; \
-       (void)__tmp; \
-})
-
  struct sysinfo;
  extern int do_sysinfo(struct sysinfo *info);
  
diff --git a/include/linux/kmod.h b/include/linux/kmod.h

index 0509c4ce485793e748054aa558e559691a6af79d..a1a91577813c6bfb25ea89c9304f0c2128a87464 100644 (file)
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -19,6 +19,7 @@
   *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   */
  
+#include <linux/gfp.h>
  #include <linux/stddef.h>
  #include <linux/errno.h>
  #include <linux/compiler.h>
@@ -41,8 +42,8 @@ struct file;
  struct subprocess_info;
  
  /* Allocate a subprocess_info structure */
-struct subprocess_info *call_usermodehelper_setup(char *path,
-                                                 char **argv, char **envp);
+struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
+                                                 char **envp, gfp_t gfp_mask);
  
  /* Set various pieces of state into the subprocess_info structure */
  void call_usermodehelper_setkeys(struct subprocess_info *info,
@@ -69,8 +70,9 @@ static inline int
  call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait)
  {
         struct subprocess_info *info;
+       gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
  
-       info = call_usermodehelper_setup(path, argv, envp);
+       info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
         if (info == NULL)
                 return -ENOMEM;
         return call_usermodehelper_exec(info, wait);
@@ -81,8 +83,9 @@ call_usermodehelper_keys(char *path, char **argv, char **envp,
                          struct key *session_keyring, enum umh_wait wait)
  {
         struct subprocess_info *info;
+       gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
  
-       info = call_usermodehelper_setup(path, argv, envp);
+       info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
         if (info == NULL)
                 return -ENOMEM;
  
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h

index 04a3556bdea6b87e654db168240139dc887decb4..0be7795655fab4279d3b9b4a6fa110b01a4fc070 100644 (file)
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -157,11 +157,10 @@ struct kretprobe {
         int nmissed;
         size_t data_size;
         struct hlist_head free_instances;
-       struct hlist_head used_instances;
+       spinlock_t lock;
  };
  
  struct kretprobe_instance {
-       struct hlist_node uflist; /* either on free list or used list */
         struct hlist_node hlist;
         struct kretprobe *rp;
         kprobe_opcode_t *ret_addr;
@@ -201,7 +200,6 @@ static inline int init_test_probes(void)
  }
  #endif /* CONFIG_KPROBES_SANITY_TEST */
  
-extern spinlock_t kretprobe_lock;
  extern struct mutex kprobe_mutex;
  extern int arch_prepare_kprobe(struct kprobe *p);
  extern void arch_arm_kprobe(struct kprobe *p);
@@ -214,6 +212,9 @@ extern void kprobes_inc_nmissed_count(struct kprobe *p);
  
  /* Get the kprobe at this addr (if any) - called with preemption disabled */
  struct kprobe *get_kprobe(void *addr);
+void kretprobe_hash_lock(struct task_struct *tsk,
+                        struct hlist_head **head, unsigned long *flags);
+void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags);
  struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk);
  
  /* kprobe_running() will just return the current_kprobe on this CPU */
diff --git a/include/linux/kthread.h b/include/linux/kthread.h

index 00dd957e245b43839db5e0972e74ff541e4dafd1..aabc8a13ba71c7b6f9eab1ddf3842b18800045d4 100644 (file)
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -6,7 +6,8 @@
  
  struct task_struct *kthread_create(int (*threadfn)(void *data),
                                    void *data,
-                                  const char namefmt[], ...);
+                                  const char namefmt[], ...)
+       __attribute__((format(printf, 3, 4)));
  
  /**
   * kthread_run - create and wake a thread.
diff --git a/include/linux/list.h b/include/linux/list.h

index 139ec41d9c2ebd7e74d0c53e04433dd996a3f233..453916bc0412cbacd4ed2844fe71599ac07bc086 100644 (file)
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -61,14 +61,10 @@ extern void __list_add(struct list_head *new,
   * Insert a new entry after the specified head.
   * This is good for implementing stacks.
   */
-#ifndef CONFIG_DEBUG_LIST
  static inline void list_add(struct list_head *new, struct list_head *head)
  {
         __list_add(new, head, head->next);
  }
-#else
-extern void list_add(struct list_head *new, struct list_head *head);
-#endif
  
  
  /**
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h

index e6608776bc96138c4fac9eea0b2a4126997bc7b8..fdf3967e13975a4dc24ec7c37026295ea4fa8f0e 100644 (file)
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -35,7 +35,10 @@ extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
  extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
                                         gfp_t gfp_mask);
  extern void mem_cgroup_uncharge_page(struct page *page);
+extern void mem_cgroup_uncharge_cache_page(struct page *page);
  extern void mem_cgroup_move_lists(struct page *page, bool active);
+extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask);
+
  extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
                                         struct list_head *dst,
                                         unsigned long *scanned, int order,
@@ -50,9 +53,9 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
  #define mm_match_cgroup(mm, cgroup)    \
         ((cgroup) == mem_cgroup_from_task((mm)->owner))
  
-extern int mem_cgroup_prepare_migration(struct page *page);
+extern int
+mem_cgroup_prepare_migration(struct page *page, struct page *newpage);
  extern void mem_cgroup_end_migration(struct page *page);
-extern void mem_cgroup_page_migration(struct page *page, struct page *newpage);
  
  /*
   * For memory reclaim.
@@ -97,6 +100,15 @@ static inline void mem_cgroup_uncharge_page(struct page *page)
  {
  }
  
+static inline void mem_cgroup_uncharge_cache_page(struct page *page)
+{
+}
+
+static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
+{
+       return 0;
+}
+
  static inline void mem_cgroup_move_lists(struct page *page, bool active)
  {
  }
@@ -112,7 +124,8 @@ static inline int task_in_mem_cgroup(struct task_struct *task,
         return 1;
  }
  
-static inline int mem_cgroup_prepare_migration(struct page *page)
+static inline int
+mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
  {
         return 0;
  }
@@ -121,11 +134,6 @@ static inline void mem_cgroup_end_migration(struct page *page)
  {
  }
  
-static inline void
-mem_cgroup_page_migration(struct page *page, struct page *newpage)
-{
-}
-
  static inline int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
  {
         return 0;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h

index 02a27ae78539cfe536106f46a8f89b50ea63985e..746f975b58ef3a4843b00622dbfa1958f1000074 100644 (file)
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -159,6 +159,17 @@ struct vm_area_struct {
  #endif
  };
  
+struct core_thread {
+       struct task_struct *task;
+       struct core_thread *next;
+};
+
+struct core_state {
+       atomic_t nr_threads;
+       struct core_thread dumper;
+       struct completion startup;
+};
+
  struct mm_struct {
         struct vm_area_struct * mmap;           /* list of VMAs */
         struct rb_root mm_rb;
@@ -175,7 +186,6 @@ struct mm_struct {
         atomic_t mm_users;                      /* How many users with user space? */
         atomic_t mm_count;                      /* How many references to "struct mm_struct" (users count as 1) */
         int map_count;                          /* number of VMAs */
-       int core_waiters;
         struct rw_semaphore mmap_sem;
         spinlock_t page_table_lock;             /* Protects page tables and some counters */
  
@@ -219,8 +229,7 @@ struct mm_struct {
  
         unsigned long flags; /* Must use atomic bitops to access the bits */
  
-       /* coredumping support */
-       struct completion *core_startup_done, core_done;
+       struct core_state *core_state; /* coredumping support */
  
         /* aio bits */
         rwlock_t                ioctx_list_lock;        /* aio lock */
diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h

index 81cd36b735b0de3677f76b116d047a1089166bdf..ba63858056c7a5f0d3e7017fb19808e6e9dae5b1 100644 (file)
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -2,11 +2,11 @@
  #define _LINUX_MSDOS_FS_H
  
  #include <linux/magic.h>
+#include <asm/byteorder.h>
  
  /*
   * The MS-DOS filesystem constants/structures
   */
-#include <asm/byteorder.h>
  
  #define SECTOR_SIZE    512             /* sector size (bytes) */
  #define SECTOR_BITS    9               /* log2(SECTOR_SIZE) */
@@ -89,24 +89,22 @@
  #define IS_FSINFO(x)   (le32_to_cpu((x)->signature1) == FAT_FSINFO_SIG1 \
                          && le32_to_cpu((x)->signature2) == FAT_FSINFO_SIG2)
  
+struct __fat_dirent {
+       long            d_ino;
+       __kernel_off_t  d_off;
+       unsigned short  d_reclen;
+       char            d_name[256]; /* We must not include limits.h! */
+};
+
  /*
   * ioctl commands
   */
-#define VFAT_IOCTL_READDIR_BOTH                _IOR('r', 1, struct dirent [2])
-#define VFAT_IOCTL_READDIR_SHORT       _IOR('r', 2, struct dirent [2])
+#define VFAT_IOCTL_READDIR_BOTH                _IOR('r', 1, struct __fat_dirent[2])
+#define VFAT_IOCTL_READDIR_SHORT       _IOR('r', 2, struct __fat_dirent[2])
  /* <linux/videotext.h> has used 0x72 ('r') in collision, so skip a few */
  #define FAT_IOCTL_GET_ATTRIBUTES       _IOR('r', 0x10, __u32)
  #define FAT_IOCTL_SET_ATTRIBUTES       _IOW('r', 0x11, __u32)
  
-/*
- * vfat shortname flags
- */
-#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */
-#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */
-#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */
-#define VFAT_SFN_CREATE_WIN95  0x0100 /* emulate win95 rule for create */
-#define VFAT_SFN_CREATE_WINNT  0x0200 /* emulate winnt rule for create */
-
  struct fat_boot_sector {
         __u8    ignored[3];     /* Boot strap short or near jump */
         __u8    system_id[8];   /* Name - can be used to special case
@@ -168,14 +166,6 @@ struct msdos_dir_slot {
         __u8    name11_12[4];   /* last 2 characters in name */
  };
  
-struct fat_slot_info {
-       loff_t i_pos;           /* on-disk position of directory entry */
-       loff_t slot_off;        /* offset for slot or de start */
-       int nr_slots;           /* number of slots + 1(de) in filename */
-       struct msdos_dir_entry *de;
-       struct buffer_head *bh;
-};
-
  #ifdef __KERNEL__
  
  #include <linux/buffer_head.h>
@@ -184,6 +174,15 @@ struct fat_slot_info {
  #include <linux/fs.h>
  #include <linux/mutex.h>
  
+/*
+ * vfat shortname flags
+ */
+#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */
+#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */
+#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */
+#define VFAT_SFN_CREATE_WIN95  0x0100 /* emulate win95 rule for create */
+#define VFAT_SFN_CREATE_WINNT  0x0200 /* emulate winnt rule for create */
+
  struct fat_mount_options {
         uid_t fs_uid;
         gid_t fs_gid;
@@ -202,10 +201,10 @@ struct fat_mount_options {
                  utf8:1,          /* Use of UTF-8 character set (Default) */
                  unicode_xlate:1, /* create escape sequences for unhandled Unicode */
                  numtail:1,       /* Does first alias have a numeric '~1' type tail? */
-                atari:1,         /* Use Atari GEMDOS variation of MS-DOS fs */
                  flush:1,         /* write things quickly */
                  nocase:1,        /* Does this need case conversion? 0=need case conversion*/
-                usefree:1;       /* Use free_clusters for FAT32 */
+                usefree:1,       /* Use free_clusters for FAT32 */
+                tz_utc:1;        /* Filesystem timestamps are in UTC */
  };
  
  #define FAT_HASH_BITS  8
@@ -267,6 +266,14 @@ struct msdos_inode_info {
         struct inode vfs_inode;
  };
  
+struct fat_slot_info {
+       loff_t i_pos;           /* on-disk position of directory entry */
+       loff_t slot_off;        /* offset for slot or de start */
+       int nr_slots;           /* number of slots + 1(de) in filename */
+       struct msdos_dir_entry *de;
+       struct buffer_head *bh;
+};
+
  static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb)
  {
         return sb->s_fs_info;
@@ -428,8 +435,9 @@ extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
  extern void fat_fs_panic(struct super_block *s, const char *fmt, ...);
  extern void fat_clusters_flush(struct super_block *sb);
  extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster);
-extern int date_dos2unix(unsigned short time, unsigned short date);
-extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date);
+extern int date_dos2unix(unsigned short time, unsigned short date, int tz_utc);
+extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date,
+                             int tz_utc);
  extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs);
  
  int fat_cache_init(void);
diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h

index f71201d0f3e7e42f2ffe22d571231cc958dff04f..6316fafe5c2a99558197520d8ccb05046c7fe629 100644 (file)
--- a/include/linux/mtd/ubi.h
+++ b/include/linux/mtd/ubi.h
@@ -45,13 +45,13 @@ enum {
   * @size: how many physical eraseblocks are reserved for this volume
   * @used_bytes: how many bytes of data this volume contains
   * @used_ebs: how many physical eraseblocks of this volume actually contain any
- * data
+ *            data
   * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME)
   * @corrupted: non-zero if the volume is corrupted (static volumes only)
   * @upd_marker: non-zero if the volume has update marker set
   * @alignment: volume alignment
   * @usable_leb_size: how many bytes are available in logical eraseblocks of
- * this volume
+ *                   this volume
   * @name_len: volume name length
   * @name: volume name
   * @cdev: UBI volume character device major and minor numbers
@@ -152,6 +152,7 @@ int ubi_leb_erase(struct ubi_volume_desc *desc, int lnum);
  int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum);
  int ubi_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype);
  int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum);
+int ubi_sync(int ubi_num);
  
  /*
   * This function is the same as the 'ubi_leb_read()' function, but it does not
diff --git a/include/linux/net.h b/include/linux/net.h

index 2f999fbb188dbfc01766c63189bd35017e65ba1d..4a9a30f2d68fe65e5184e367b488b4759ef20e33 100644 (file)
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -351,8 +351,7 @@ static const struct proto_ops name##_ops = {                        \
  
  #ifdef CONFIG_SYSCTL
  #include <linux/sysctl.h>
-extern int net_msg_cost;
-extern int net_msg_burst;
+extern struct ratelimit_state net_ratelimit_state;
  #endif
  
  #endif /* __KERNEL__ */
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h

index a2861d95ecc35fff2d9fb9652dabf0a6754f8a4c..108f47e5fd9511796516d758dca90b1232321bea 100644 (file)
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -12,7 +12,6 @@
  
  #include <linux/types.h>
  #include <linux/unistd.h>
-#include <linux/dirent.h>
  #include <linux/fs.h>
  #include <linux/posix_acl.h>
  #include <linux/mount.h>
diff --git a/include/linux/notifier.h b/include/linux/notifier.h

index bd3d72ddf33398254a503fdbc8709e3cbccdc19e..da2698b0fdd1d0989e5f1e0c2739262d002eb3df 100644 (file)
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -214,6 +214,8 @@ static inline int notifier_to_errno(int ret)
  #define CPU_DEAD               0x0007 /* CPU (unsigned)v dead */
  #define CPU_DYING              0x0008 /* CPU (unsigned)v not running any task,
                                         * not handling interrupts, soon dead */
+#define CPU_POST_DEAD          0x0009 /* CPU (unsigned)v dead, cpu_hotplug
+                                       * lock is dropped */
  
  /* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
   * operation in progress
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h

index 0e66b57631fc767f202c32771f2ab5c8194c34cd..c8a768e59640665af635cedb9c84745e421fdbab 100644 (file)
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -82,9 +82,12 @@ static inline void get_nsproxy(struct nsproxy *ns)
  }
  
  #ifdef CONFIG_CGROUP_NS
-int ns_cgroup_clone(struct task_struct *tsk);
+int ns_cgroup_clone(struct task_struct *tsk, struct pid *pid);
  #else
-static inline int ns_cgroup_clone(struct task_struct *tsk) { return 0; }
+static inline int ns_cgroup_clone(struct task_struct *tsk, struct pid *pid)
+{
+       return 0;
+}
  #endif
  
  #endif
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h

index 119ae7b8f028cb9795fddb16a28abc7530294764..c3b1761aba2693d431bc3f65e8987eaaaf8070b5 100644 (file)
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2400,6 +2400,9 @@
  #define PCI_DEVICE_ID_INTEL_ICH10_4    0x3a30
  #define PCI_DEVICE_ID_INTEL_ICH10_5    0x3a60
  #define PCI_DEVICE_ID_INTEL_IOAT_SNB   0x402f
+#define PCI_DEVICE_ID_INTEL_5100_16    0x65f0
+#define PCI_DEVICE_ID_INTEL_5100_21    0x65f5
+#define PCI_DEVICE_ID_INTEL_5100_22    0x65f6
  #define PCI_DEVICE_ID_INTEL_5400_ERR   0x4030
  #define PCI_DEVICE_ID_INTEL_5400_FBD0  0x4035
  #define PCI_DEVICE_ID_INTEL_5400_FBD1  0x4036
diff --git a/include/linux/pid.h b/include/linux/pid.h

index c21c7e8124a7bb7bde92937fc37cf0810598d091..22921ac4cfd948b88e10a06c0725b33363b38caf 100644 (file)
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -48,7 +48,7 @@ enum pid_type
   */
  
  struct upid {
-       /* Try to keep pid_chain in the same cacheline as nr for find_pid */
+       /* Try to keep pid_chain in the same cacheline as nr for find_vpid */
         int nr;
         struct pid_namespace *ns;
         struct hlist_node pid_chain;
@@ -57,10 +57,10 @@ struct upid {
  struct pid
  {
         atomic_t count;
+       unsigned int level;
         /* lists of tasks that use this pid */
         struct hlist_head tasks[PIDTYPE_MAX];
         struct rcu_head rcu;
-       unsigned int level;
         struct upid numbers[1];
  };
  
@@ -105,14 +105,12 @@ extern struct pid_namespace init_pid_ns;
   * or rcu_read_lock() held.
   *
   * find_pid_ns() finds the pid in the namespace specified
- * find_pid() find the pid by its global id, i.e. in the init namespace
   * find_vpid() finr the pid by its virtual id, i.e. in the current namespace
   *
- * see also find_task_by_pid() set in include/linux/sched.h
+ * see also find_task_by_vpid() set in include/linux/sched.h
   */
  extern struct pid *find_pid_ns(int nr, struct pid_namespace *ns);
  extern struct pid *find_vpid(int nr);
-extern struct pid *find_pid(int nr);
  
  /*
   * Lookup a PID in the hash table, and return with it's count elevated.
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h

index caff5283d15c6e11fe838ffad6b2877077af490e..1af82c4e17d4e79a14d1854b35fd39ef93bdb649 100644 (file)
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -14,6 +14,8 @@ struct pidmap {
  
  #define PIDMAP_ENTRIES         ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
  
+struct bsd_acct_struct;
+
  struct pid_namespace {
         struct kref kref;
         struct pidmap pidmap[PIDMAP_ENTRIES];
@@ -25,6 +27,9 @@ struct pid_namespace {
  #ifdef CONFIG_PROC_FS
         struct vfsmount *proc_mnt;
  #endif
+#ifdef CONFIG_BSD_PROCESS_ACCT
+       struct bsd_acct_struct *bacct;
+#endif
  };
  
  extern struct pid_namespace init_pid_ns;
@@ -85,4 +90,7 @@ static inline struct task_struct *task_child_reaper(struct task_struct *tsk)
         return tsk->nsproxy->pid_ns->child_reaper;
  }
  
+void pidhash_init(void);
+void pidmap_init(void);
+
  #endif /* _LINUX_PID_NS_H */
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h

index 15a9eaf4a8026101a042be7f67bd67e6d9d4411b..f560d1705afec2f2ab9c6c9103d7547961808864 100644 (file)
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -79,6 +79,7 @@ struct proc_dir_entry {
         int pde_users;  /* number of callers into module in progress */
         spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
         struct completion *pde_unload_completion;
+       struct list_head pde_openers;   /* who did ->open, but not ->release */
  };
  
  struct kcore_list {
@@ -138,7 +139,6 @@ extern int proc_readdir(struct file *, void *, filldir_t);
  extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
  
  extern const struct file_operations proc_kcore_operations;
-extern const struct file_operations proc_kmsg_operations;
  extern const struct file_operations ppc_htab_operations;
  
  extern int pid_ns_prepare_proc(struct pid_namespace *ns);
diff --git a/include/linux/profile.h b/include/linux/profile.h

index 05c1cc736937fcc48d5df8781a6ad1b391f73807..7e7087239af5fc0497ebf40bf92d65cb99b5749f 100644 (file)
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -8,8 +8,6 @@
  
  #include <asm/errno.h>
  
-extern int prof_on __read_mostly;
-
  #define CPU_PROFILING  1
  #define SCHED_PROFILING        2
  #define SLEEP_PROFILING        3
@@ -19,14 +17,31 @@ struct proc_dir_entry;
  struct pt_regs;
  struct notifier_block;
  
+#if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS)
+void create_prof_cpu_mask(struct proc_dir_entry *de);
+#else
+static inline void create_prof_cpu_mask(struct proc_dir_entry *de)
+{
+}
+#endif
+
+enum profile_type {
+       PROFILE_TASK_EXIT,
+       PROFILE_MUNMAP
+};
+
+#ifdef CONFIG_PROFILING
+
+extern int prof_on __read_mostly;
+
  /* init basic kernel profiler */
  void __init profile_init(void);
-void profile_tick(int);
+void profile_tick(int type);
  
  /*
   * Add multiple profiler hits to a given address:
   */
-void profile_hits(int, void *ip, unsigned int nr_hits);
+void profile_hits(int type, void *ip, unsigned int nr_hits);
  
  /*
   * Single profiler hit:
@@ -40,19 +55,6 @@ static inline void profile_hit(int type, void *ip)
                 profile_hits(type, ip, 1);
  }
  
-#ifdef CONFIG_PROC_FS
-void create_prof_cpu_mask(struct proc_dir_entry *);
-#else
-#define create_prof_cpu_mask(x)                        do { (void)(x); } while (0)
-#endif
-
-enum profile_type {
-       PROFILE_TASK_EXIT,
-       PROFILE_MUNMAP
-};
-
-#ifdef CONFIG_PROFILING
-
  struct task_struct;
  struct mm_struct;
  
@@ -80,6 +82,28 @@ struct pt_regs;
  
  #else
  
+#define prof_on 0
+
+static inline void profile_init(void)
+{
+       return;
+}
+
+static inline void profile_tick(int type)
+{
+       return;
+}
+
+static inline void profile_hits(int type, void *ip, unsigned int nr_hits)
+{
+       return;
+}
+
+static inline void profile_hit(int type, void *ip)
+{
+       return;
+}
+
  static inline int task_handoff_register(struct notifier_block * n)
  {
         return -ENOSYS;
diff --git a/include/linux/quota.h b/include/linux/quota.h

index dcddfb2009479f42a543363d0c0873c76d4bfcee..376a05048bc580650bd1923c9f21c6e3852cea11 100644 (file)
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -41,9 +41,6 @@
  #define __DQUOT_VERSION__      "dquot_6.5.1"
  #define __DQUOT_NUM_VERSION__  6*10000+5*100+1
  
-typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
-typedef __u64 qsize_t;          /* Type in which we store sizes */
-
  /* Size of blocks in which are counted size limits */
  #define QUOTABLOCK_BITS 10
  #define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
@@ -138,6 +135,10 @@ struct if_dqinfo {
  #define QUOTA_NL_BHARDWARN 4           /* Block hardlimit reached */
  #define QUOTA_NL_BSOFTLONGWARN 5       /* Block grace time expired */
  #define QUOTA_NL_BSOFTWARN 6           /* Block softlimit reached */
+#define QUOTA_NL_IHARDBELOW 7          /* Usage got below inode hardlimit */
+#define QUOTA_NL_ISOFTBELOW 8          /* Usage got below inode softlimit */
+#define QUOTA_NL_BHARDBELOW 9          /* Usage got below block hardlimit */
+#define QUOTA_NL_BSOFTBELOW 10         /* Usage got below block softlimit */
  
  enum {
         QUOTA_NL_C_UNSPEC,
@@ -172,6 +173,9 @@ enum {
  
  #include <asm/atomic.h>
  
+typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
+typedef __u64 qsize_t;          /* Type in which we store sizes */
+
  extern spinlock_t dq_data_lock;
  
  /* Maximal numbers of writes for quota operation (insert/delete/update)
@@ -223,12 +227,10 @@ struct super_block;
  #define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B) /* Is info dirty? */
  
  extern void mark_info_dirty(struct super_block *sb, int type);
-#define info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags)
-#define info_any_dquot_dirty(info) (!list_empty(&(info)->dqi_dirty_list))
-#define info_any_dirty(info) (info_dirty(info) || info_any_dquot_dirty(info))
-
-#define sb_dqopt(sb) (&(sb)->s_dquot)
-#define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type))
+static inline int info_dirty(struct mem_dqinfo *info)
+{
+       return test_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
+}
  
  struct dqstats {
         int lookups;
@@ -337,19 +339,6 @@ struct quota_info {
         struct quota_format_ops *ops[MAXQUOTAS];        /* Operations for each type */
  };
  
-#define sb_has_quota_enabled(sb, type) ((type)==USRQUOTA ? \
-       (sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) : (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED))
-
-#define sb_any_quota_enabled(sb) (sb_has_quota_enabled(sb, USRQUOTA) | \
-                                 sb_has_quota_enabled(sb, GRPQUOTA))
-
-#define sb_has_quota_suspended(sb, type) \
-       ((type) == USRQUOTA ? (sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED) : \
-                             (sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED))
-
-#define sb_any_quota_suspended(sb) (sb_has_quota_suspended(sb, USRQUOTA) | \
-                                 sb_has_quota_suspended(sb, GRPQUOTA))
-
  int register_quota_format(struct quota_format_type *fmt);
  void unregister_quota_format(struct quota_format_type *fmt);
  
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h

index f867020538539738cafa8ac22130379889452833..742187f7a05c54051da11975373acd5cc3de56e2 100644 (file)
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -11,42 +11,85 @@
  #define _LINUX_QUOTAOPS_
  
  #include <linux/smp_lock.h>
-
  #include <linux/fs.h>
  
+static inline struct quota_info *sb_dqopt(struct super_block *sb)
+{
+       return &sb->s_dquot;
+}
+
  #if defined(CONFIG_QUOTA)
  
  /*
   * declaration of quota_function calls in kernel.
   */
-extern void sync_dquots(struct super_block *sb, int type);
-
-extern int dquot_initialize(struct inode *inode, int type);
-extern int dquot_drop(struct inode *inode);
-
-extern int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
-extern int dquot_alloc_inode(const struct inode *inode, unsigned long number);
-
-extern int dquot_free_space(struct inode *inode, qsize_t number);
-extern int dquot_free_inode(const struct inode *inode, unsigned long number);
-
-extern int dquot_transfer(struct inode *inode, struct iattr *iattr);
-extern int dquot_commit(struct dquot *dquot);
-extern int dquot_acquire(struct dquot *dquot);
-extern int dquot_release(struct dquot *dquot);
-extern int dquot_commit_info(struct super_block *sb, int type);
-extern int dquot_mark_dquot_dirty(struct dquot *dquot);
-
-extern int vfs_quota_on(struct super_block *sb, int type, int format_id,
-               char *path, int remount);
-extern int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
-               int format_id, int type);
-extern int vfs_quota_off(struct super_block *sb, int type, int remount);
-extern int vfs_quota_sync(struct super_block *sb, int type);
-extern int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
-extern int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
-extern int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
-extern int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
+void sync_dquots(struct super_block *sb, int type);
+
+int dquot_initialize(struct inode *inode, int type);
+int dquot_drop(struct inode *inode);
+
+int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
+int dquot_alloc_inode(const struct inode *inode, unsigned long number);
+
+int dquot_free_space(struct inode *inode, qsize_t number);
+int dquot_free_inode(const struct inode *inode, unsigned long number);
+
+int dquot_transfer(struct inode *inode, struct iattr *iattr);
+int dquot_commit(struct dquot *dquot);
+int dquot_acquire(struct dquot *dquot);
+int dquot_release(struct dquot *dquot);
+int dquot_commit_info(struct super_block *sb, int type);
+int dquot_mark_dquot_dirty(struct dquot *dquot);
+
+int vfs_quota_on(struct super_block *sb, int type, int format_id,
+       char *path, int remount);
+int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
+       int format_id, int type);
+int vfs_quota_off(struct super_block *sb, int type, int remount);
+int vfs_quota_sync(struct super_block *sb, int type);
+int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
+int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
+int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
+int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
+
+void vfs_dq_drop(struct inode *inode);
+int vfs_dq_transfer(struct inode *inode, struct iattr *iattr);
+int vfs_dq_quota_on_remount(struct super_block *sb);
+
+static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type)
+{
+       return sb_dqopt(sb)->info + type;
+}
+
+/*
+ * Functions for checking status of quota
+ */
+
+static inline int sb_has_quota_enabled(struct super_block *sb, int type)
+{
+       if (type == USRQUOTA)
+               return sb_dqopt(sb)->flags & DQUOT_USR_ENABLED;
+       return sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED;
+}
+
+static inline int sb_any_quota_enabled(struct super_block *sb)
+{
+       return sb_has_quota_enabled(sb, USRQUOTA) ||
+               sb_has_quota_enabled(sb, GRPQUOTA);
+}
+
+static inline int sb_has_quota_suspended(struct super_block *sb, int type)
+{
+       if (type == USRQUOTA)
+               return sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED;
+       return sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED;
+}
+
+static inline int sb_any_quota_suspended(struct super_block *sb)
+{
+       return sb_has_quota_suspended(sb, USRQUOTA) ||
+               sb_has_quota_suspended(sb, GRPQUOTA);
+}
  
  /*
   * Operations supported for diskquotas.
@@ -59,38 +102,16 @@ extern struct quotactl_ops vfs_quotactl_ops;
  
  /* It is better to call this function outside of any transaction as it might
   * need a lot of space in journal for dquot structure allocation. */
-static inline void DQUOT_INIT(struct inode *inode)
+static inline void vfs_dq_init(struct inode *inode)
  {
         BUG_ON(!inode->i_sb);
         if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode))
                 inode->i_sb->dq_op->initialize(inode, -1);
  }
  
-/* The same as with DQUOT_INIT */
-static inline void DQUOT_DROP(struct inode *inode)
-{
-       /* Here we can get arbitrary inode from clear_inode() so we have
-        * to be careful. OTOH we don't need locking as quota operations
-        * are allowed to change only at mount time */
-       if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
-           && inode->i_sb->dq_op->drop) {
-               int cnt;
-               /* Test before calling to rule out calls from proc and such
-                 * where we are not allowed to block. Note that this is
-                * actually reliable test even without the lock - the caller
-                * must assure that nobody can come after the DQUOT_DROP and
-                * add quota pointers back anyway */
-               for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-                       if (inode->i_dquot[cnt] != NODQUOT)
-                               break;
-               if (cnt < MAXQUOTAS)
-                       inode->i_sb->dq_op->drop(inode);
-       }
-}
-
  /* The following allocation/freeing/transfer functions *must* be called inside
   * a transaction (deadlocks possible otherwise) */
-static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
  {
         if (sb_any_quota_enabled(inode->i_sb)) {
                 /* Used space is updated in alloc_space() */
@@ -102,15 +123,15 @@ static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
         return 0;
  }
  
-static inline int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
  {
         int ret;
-        if (!(ret =  DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr)))
+        if (!(ret =  vfs_dq_prealloc_space_nodirty(inode, nr)))
                 mark_inode_dirty(inode);
         return ret;
  }
  
-static inline int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
  {
         if (sb_any_quota_enabled(inode->i_sb)) {
                 /* Used space is updated in alloc_space() */
@@ -122,25 +143,25 @@ static inline int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
         return 0;
  }
  
-static inline int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
  {
         int ret;
-       if (!(ret = DQUOT_ALLOC_SPACE_NODIRTY(inode, nr)))
+       if (!(ret = vfs_dq_alloc_space_nodirty(inode, nr)))
                 mark_inode_dirty(inode);
         return ret;
  }
  
-static inline int DQUOT_ALLOC_INODE(struct inode *inode)
+static inline int vfs_dq_alloc_inode(struct inode *inode)
  {
         if (sb_any_quota_enabled(inode->i_sb)) {
-               DQUOT_INIT(inode);
+               vfs_dq_init(inode);
                 if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA)
                         return 1;
         }
         return 0;
  }
  
-static inline void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
  {
         if (sb_any_quota_enabled(inode->i_sb))
                 inode->i_sb->dq_op->free_space(inode, nr);
@@ -148,35 +169,25 @@ static inline void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
                 inode_sub_bytes(inode, nr);
  }
  
-static inline void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
  {
-       DQUOT_FREE_SPACE_NODIRTY(inode, nr);
+       vfs_dq_free_space_nodirty(inode, nr);
         mark_inode_dirty(inode);
  }
  
-static inline void DQUOT_FREE_INODE(struct inode *inode)
+static inline void vfs_dq_free_inode(struct inode *inode)
  {
         if (sb_any_quota_enabled(inode->i_sb))
                 inode->i_sb->dq_op->free_inode(inode, 1);
  }
  
-static inline int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr)
-{
-       if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
-               DQUOT_INIT(inode);
-               if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
-                       return 1;
-       }
-       return 0;
-}
-
  /* The following two functions cannot be called inside a transaction */
-static inline void DQUOT_SYNC(struct super_block *sb)
+static inline void vfs_dq_sync(struct super_block *sb)
  {
         sync_dquots(sb, -1);
  }
  
-static inline int DQUOT_OFF(struct super_block *sb, int remount)
+static inline int vfs_dq_off(struct super_block *sb, int remount)
  {
         int ret = -ENOSYS;
  
@@ -185,22 +196,27 @@ static inline int DQUOT_OFF(struct super_block *sb, int remount)
         return ret;
  }
  
-static inline int DQUOT_ON_REMOUNT(struct super_block *sb)
+#else
+
+static inline int sb_has_quota_enabled(struct super_block *sb, int type)
  {
-       int cnt;
-       int ret = 0, err;
+       return 0;
+}
  
-       if (!sb->s_qcop || !sb->s_qcop->quota_on)
-               return -ENOSYS;
-       for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-               err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1);
-               if (err < 0 && !ret)
-                       ret = err;
-       }
-       return ret;
+static inline int sb_any_quota_enabled(struct super_block *sb)
+{
+       return 0;
  }
  
-#else
+static inline int sb_has_quota_suspended(struct super_block *sb, int type)
+{
+       return 0;
+}
+
+static inline int sb_any_quota_suspended(struct super_block *sb)
+{
+       return 0;
+}
  
  /*
   * NO-OP when quota not configured.
@@ -208,113 +224,144 @@ static inline int DQUOT_ON_REMOUNT(struct super_block *sb)
  #define sb_dquot_ops                           (NULL)
  #define sb_quotactl_ops                                (NULL)
  
-static inline void DQUOT_INIT(struct inode *inode)
+static inline void vfs_dq_init(struct inode *inode)
  {
  }
  
-static inline void DQUOT_DROP(struct inode *inode)
+static inline void vfs_dq_drop(struct inode *inode)
  {
  }
  
-static inline int DQUOT_ALLOC_INODE(struct inode *inode)
+static inline int vfs_dq_alloc_inode(struct inode *inode)
  {
         return 0;
  }
  
-static inline void DQUOT_FREE_INODE(struct inode *inode)
+static inline void vfs_dq_free_inode(struct inode *inode)
  {
  }
  
-static inline void DQUOT_SYNC(struct super_block *sb)
+static inline void vfs_dq_sync(struct super_block *sb)
  {
  }
  
-static inline int DQUOT_OFF(struct super_block *sb, int remount)
+static inline int vfs_dq_off(struct super_block *sb, int remount)
  {
         return 0;
  }
  
-static inline int DQUOT_ON_REMOUNT(struct super_block *sb)
+static inline int vfs_dq_quota_on_remount(struct super_block *sb)
  {
         return 0;
  }
  
-static inline int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr)
+static inline int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
  {
         return 0;
  }
  
-static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
  {
         inode_add_bytes(inode, nr);
         return 0;
  }
  
-static inline int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
  {
-       DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr);
+       vfs_dq_prealloc_space_nodirty(inode, nr);
         mark_inode_dirty(inode);
         return 0;
  }
  
-static inline int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
  {
         inode_add_bytes(inode, nr);
         return 0;
  }
  
-static inline int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
  {
-       DQUOT_ALLOC_SPACE_NODIRTY(inode, nr);
+       vfs_dq_alloc_space_nodirty(inode, nr);
         mark_inode_dirty(inode);
         return 0;
  }
  
-static inline void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
  {
         inode_sub_bytes(inode, nr);
  }
  
-static inline void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
  {
-       DQUOT_FREE_SPACE_NODIRTY(inode, nr);
+       vfs_dq_free_space_nodirty(inode, nr);
         mark_inode_dirty(inode);
  }      
  
  #endif /* CONFIG_QUOTA */
  
-static inline int DQUOT_PREALLOC_BLOCK_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_block_nodirty(struct inode *inode, qsize_t nr)
  {
-       return DQUOT_PREALLOC_SPACE_NODIRTY(inode,
+       return vfs_dq_prealloc_space_nodirty(inode,
                         nr << inode->i_sb->s_blocksize_bits);
  }
  
-static inline int DQUOT_PREALLOC_BLOCK(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_block(struct inode *inode, qsize_t nr)
  {
-       return DQUOT_PREALLOC_SPACE(inode,
+       return vfs_dq_prealloc_space(inode,
                         nr << inode->i_sb->s_blocksize_bits);
  }
  
-static inline int DQUOT_ALLOC_BLOCK_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_block_nodirty(struct inode *inode, qsize_t nr)
  {
-       return DQUOT_ALLOC_SPACE_NODIRTY(inode,
+       return vfs_dq_alloc_space_nodirty(inode,
                         nr << inode->i_sb->s_blocksize_bits);
  }
  
-static inline int DQUOT_ALLOC_BLOCK(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_block(struct inode *inode, qsize_t nr)
  {
-       return DQUOT_ALLOC_SPACE(inode,
+       return vfs_dq_alloc_space(inode,
                         nr << inode->i_sb->s_blocksize_bits);
  }
  
-static inline void DQUOT_FREE_BLOCK_NODIRTY(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_block_nodirty(struct inode *inode, qsize_t nr)
  {
-       DQUOT_FREE_SPACE_NODIRTY(inode, nr << inode->i_sb->s_blocksize_bits);
+       vfs_dq_free_space_nodirty(inode, nr << inode->i_sb->s_blocksize_bits);
  }
  
-static inline void DQUOT_FREE_BLOCK(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_block(struct inode *inode, qsize_t nr)
  {
-       DQUOT_FREE_SPACE(inode, nr << inode->i_sb->s_blocksize_bits);
+       vfs_dq_free_space(inode, nr << inode->i_sb->s_blocksize_bits);
  }
  
+/*
+ * Define uppercase equivalents for compatibility with old function names
+ * Can go away when we think all users have been converted (15/04/2008)
+ */
+#define DQUOT_INIT(inode) vfs_dq_init(inode)
+#define DQUOT_DROP(inode) vfs_dq_drop(inode)
+#define DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr) \
+                               vfs_dq_prealloc_space_nodirty(inode, nr)
+#define DQUOT_PREALLOC_SPACE(inode, nr) vfs_dq_prealloc_space(inode, nr)
+#define DQUOT_ALLOC_SPACE_NODIRTY(inode, nr) \
+                               vfs_dq_alloc_space_nodirty(inode, nr)
+#define DQUOT_ALLOC_SPACE(inode, nr) vfs_dq_alloc_space(inode, nr)
+#define DQUOT_PREALLOC_BLOCK_NODIRTY(inode, nr) \
+                               vfs_dq_prealloc_block_nodirty(inode, nr)
+#define DQUOT_PREALLOC_BLOCK(inode, nr) vfs_dq_prealloc_block(inode, nr)
+#define DQUOT_ALLOC_BLOCK_NODIRTY(inode, nr) \
+                               vfs_dq_alloc_block_nodirty(inode, nr)
+#define DQUOT_ALLOC_BLOCK(inode, nr) vfs_dq_alloc_block(inode, nr)
+#define DQUOT_ALLOC_INODE(inode) vfs_dq_alloc_inode(inode)
+#define DQUOT_FREE_SPACE_NODIRTY(inode, nr) \
+                               vfs_dq_free_space_nodirty(inode, nr)
+#define DQUOT_FREE_SPACE(inode, nr) vfs_dq_free_space(inode, nr)
+#define DQUOT_FREE_BLOCK_NODIRTY(inode, nr) \
+                               vfs_dq_free_block_nodirty(inode, nr)
+#define DQUOT_FREE_BLOCK(inode, nr) vfs_dq_free_block(inode, nr)
+#define DQUOT_FREE_INODE(inode) vfs_dq_free_inode(inode)
+#define DQUOT_TRANSFER(inode, iattr) vfs_dq_transfer(inode, iattr)
+#define DQUOT_SYNC(sb) vfs_dq_sync(sb)
+#define DQUOT_OFF(sb, remount) vfs_dq_off(sb, remount)
+#define DQUOT_ON_REMOUNT(sb) vfs_dq_quota_on_remount(sb)
+
  #endif /* _LINUX_QUOTAOPS_ */
diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h

new file mode 100644 (file)

index 0000000..18a5b9b
--- /dev/null
+++ b/include/linux/ratelimit.h
@@ -0,0 +1,27 @@
+#ifndef _LINUX_RATELIMIT_H
+#define _LINUX_RATELIMIT_H
+#include <linux/param.h>
+
+#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ)
+#define DEFAULT_RATELIMIT_BURST 10
+
+struct ratelimit_state {
+       int interval;
+       int burst;
+       int printed;
+       int missed;
+       unsigned long begin;
+};
+
+#define DEFINE_RATELIMIT_STATE(name, interval, burst)          \
+               struct ratelimit_state name = {interval, burst,}
+
+extern int __ratelimit(struct ratelimit_state *rs);
+
+static inline int ratelimit(void)
+{
+       static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+                                       DEFAULT_RATELIMIT_BURST);
+       return __ratelimit(&rs);
+}
+#endif
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h

index f04b64eca6366766d2abf74ded470bce99791e3d..0967f03b07056928c4176826d4bf216f329cbde9 100644 (file)
--- a/include/linux/rcupreempt.h
+++ b/include/linux/rcupreempt.h
@@ -115,16 +115,21 @@ DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched);
  
  static inline void rcu_enter_nohz(void)
  {
+       static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
+
         smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
         __get_cpu_var(rcu_dyntick_sched).dynticks++;
-       WARN_ON(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1);
+       WARN_ON_RATELIMIT(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1, &rs);
  }
  
  static inline void rcu_exit_nohz(void)
  {
+       static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
+
         smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
         __get_cpu_var(rcu_dyntick_sched).dynticks++;
-       WARN_ON(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1));
+       WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1),
+                               &rs);
  }
  
  #else /* CONFIG_NO_HZ */
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h

index 4aacaeecb56f503adf60a680710b46fc485a39d4..e9963af16cda6ade6ede6dded784c2624f4bc449 100644 (file)
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -526,8 +526,8 @@ struct item_head {
  ** p is the array of __u32, i is the index into the array, v is the value
  ** to store there.
  */
-#define get_block_num(p, i) le32_to_cpu(get_unaligned((p) + (i)))
-#define put_block_num(p, i, v) put_unaligned(cpu_to_le32(v), (p) + (i))
+#define get_block_num(p, i) get_unaligned_le32((p) + (i))
+#define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i))
  
  //
  // in old version uniqueness field shows key type
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h

index 336ee43ed7d8897eb9a1b4c7453123bf7ab3ebc8..315517e8bfa1cdd6704f4a9620ca8a3d8c7f5d2a 100644 (file)
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -152,7 +152,7 @@ struct reiserfs_journal_list {
         atomic_t j_nonzerolen;
         atomic_t j_commit_left;
         atomic_t j_older_commits_done;  /* all commits older than this on disk */
-       struct semaphore j_commit_lock;
+       struct mutex j_commit_mutex;
         unsigned long j_trans_id;
         time_t j_timestamp;
         struct reiserfs_list_bitmap *j_list_bitmap;
@@ -193,8 +193,8 @@ struct reiserfs_journal {
         struct buffer_head *j_header_bh;
  
         time_t j_trans_start_time;      /* time this transaction started */
-       struct semaphore j_lock;
-       struct semaphore j_flush_sem;
+       struct mutex j_mutex;
+       struct mutex j_flush_mutex;
         wait_queue_head_t j_join_wait;  /* wait for current transaction to finish before starting new one */
         atomic_t j_jlock;       /* lock for j_join_wait */
         int j_list_bitmap_index;        /* number of next list bitmap to use */
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h

index 6d9e1fca098c468b49287791e181f6e4f51e7d54..fdeadd9740dc5f99deaa4a8662494914f438e8c4 100644 (file)
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -63,9 +63,14 @@ u64 res_counter_read_u64(struct res_counter *counter, int member);
  ssize_t res_counter_read(struct res_counter *counter, int member,
                 const char __user *buf, size_t nbytes, loff_t *pos,
                 int (*read_strategy)(unsigned long long val, char *s));
-ssize_t res_counter_write(struct res_counter *counter, int member,
-               const char __user *buf, size_t nbytes, loff_t *pos,
-               int (*write_strategy)(char *buf, unsigned long long *val));
+
+typedef int (*write_strategy_fn)(const char *buf, unsigned long long *val);
+
+int res_counter_memparse_write_strategy(const char *buf,
+                                       unsigned long long *res);
+
+int res_counter_write(struct res_counter *counter, int member,
+                     const char *buffer, write_strategy_fn write_strategy);
  
  /*
   * the field descriptors. one for each member of res_counter
@@ -95,8 +100,10 @@ void res_counter_init(struct res_counter *counter);
   * counter->limit _locked call expects the counter->lock to be taken
   */
  
-int res_counter_charge_locked(struct res_counter *counter, unsigned long val);
-int res_counter_charge(struct res_counter *counter, unsigned long val);
+int __must_check res_counter_charge_locked(struct res_counter *counter,
+               unsigned long val);
+int __must_check res_counter_charge(struct res_counter *counter,
+               unsigned long val);
  
  /*
   * uncharge - tell that some portion of the resource is released
@@ -151,4 +158,20 @@ static inline void res_counter_reset_failcnt(struct res_counter *cnt)
         cnt->failcnt = 0;
         spin_unlock_irqrestore(&cnt->lock, flags);
  }
+
+static inline int res_counter_set_limit(struct res_counter *cnt,
+               unsigned long long limit)
+{
+       unsigned long flags;
+       int ret = -EBUSY;
+
+       spin_lock_irqsave(&cnt->lock, flags);
+       if (cnt->usage < limit) {
+               cnt->limit = limit;
+               ret = 0;
+       }
+       spin_unlock_irqrestore(&cnt->lock, flags);
+       return ret;
+}
+
  #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 6aca4a16e377633a1d2cde29fba627b086241b0a..42036ffe6b00f5bb939c5e576d6bce5bf0518346 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -506,6 +506,10 @@ struct signal_struct {
         unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
         unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
         unsigned long inblock, oublock, cinblock, coublock;
+#ifdef CONFIG_TASK_XACCT
+       u64 rchar, wchar, syscr, syscw;
+#endif
+       struct task_io_accounting ioac;
  
         /*
          * Cumulative ns of scheduled CPU time for dead threads in the
@@ -668,6 +672,10 @@ struct task_delay_info {
                                 /* io operations performed */
         u32 swapin_count;       /* total count of the number of swapin block */
                                 /* io operations performed */
+
+       struct timespec freepages_start, freepages_end;
+       u64 freepages_delay;    /* wait for memory reclaim */
+       u32 freepages_count;    /* total count of memory reclaim */
  };
  #endif /* CONFIG_TASK_DELAY_ACCT */
  
@@ -1257,7 +1265,7 @@ struct task_struct {
  #if defined(CONFIG_TASK_XACCT)
         u64 acct_rss_mem1;      /* accumulated rss usage */
         u64 acct_vm_mem1;       /* accumulated virtual memory usage */
-       cputime_t acct_stimexpd;/* stime since last update */
+       cputime_t acct_timexpd; /* stime + utime since last update */
  #endif
  #ifdef CONFIG_CPUSETS
         nodemask_t mems_allowed;
@@ -1496,7 +1504,7 @@ static inline void put_task_struct(struct task_struct *t)
  #define PF_KSWAPD      0x00040000      /* I am kswapd */
  #define PF_SWAPOFF     0x00080000      /* I am in swapoff */
  #define PF_LESS_THROTTLE 0x00100000    /* Throttle me less: I clean memory */
-#define PF_BORROWED_MM 0x00200000      /* I am a kthread doing use_mm */
+#define PF_KTHREAD     0x00200000      /* I am a kernel thread */
  #define PF_RANDOMIZE   0x00400000      /* randomize virtual address space */
  #define PF_SWAPWRITE   0x00800000      /* Allowed to write to swap */
  #define PF_SPREAD_PAGE 0x01000000      /* Spread page cache over cpuset */
@@ -1715,19 +1723,13 @@ extern struct pid_namespace init_pid_ns;
   *      finds a task by its pid in the specified namespace
   * find_task_by_vpid():
   *      finds a task by its virtual pid
- * find_task_by_pid():
- *      finds a task by its global pid
   *
- * see also find_pid() etc in include/linux/pid.h
+ * see also find_vpid() etc in include/linux/pid.h
   */
  
  extern struct task_struct *find_task_by_pid_type_ns(int type, int pid,
                 struct pid_namespace *ns);
  
-static inline struct task_struct *__deprecated find_task_by_pid(pid_t nr)
-{
-       return find_task_by_pid_type_ns(PIDTYPE_PID, nr, &init_pid_ns);
-}
  extern struct task_struct *find_task_by_vpid(pid_t nr);
  extern struct task_struct *find_task_by_pid_ns(pid_t nr,
                 struct pid_namespace *ns);
@@ -1800,7 +1802,6 @@ extern void force_sig(int, struct task_struct *);
  extern void force_sig_specific(int, struct task_struct *);
  extern int send_sig(int, struct task_struct *, int);
  extern void zap_other_threads(struct task_struct *p);
-extern int kill_proc(pid_t, int, int);
  extern struct sigqueue *sigqueue_alloc(void);
  extern void sigqueue_free(struct sigqueue *);
  extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
@@ -2054,9 +2055,6 @@ static inline int signal_pending_state(long state, struct task_struct *p)
         if (!signal_pending(p))
                 return 0;
  
-       if (state & (__TASK_STOPPED | __TASK_TRACED))
-               return 0;
-
         return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
  }
  
diff --git a/include/linux/sem.h b/include/linux/sem.h

index c8eaad9e4b72afacb1725204abbda308df0b3d45..1b191c176bcd33ecfe5f16d41ff53376635e52c0 100644 (file)
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -78,6 +78,7 @@ struct  seminfo {
  
  #ifdef __KERNEL__
  #include <asm/atomic.h>
+#include <linux/rcupdate.h>
  
  struct task_struct;
  
@@ -93,23 +94,19 @@ struct sem_array {
         time_t                  sem_otime;      /* last semop time */
         time_t                  sem_ctime;      /* last change time */
         struct sem              *sem_base;      /* ptr to first semaphore in array */
-       struct sem_queue        *sem_pending;   /* pending operations to be processed */
-       struct sem_queue        **sem_pending_last; /* last pending operation */
-       struct sem_undo         *undo;          /* undo requests on this array */
+       struct list_head        sem_pending;    /* pending operations to be processed */
+       struct list_head        list_id;        /* undo requests on this array */
         unsigned long           sem_nsems;      /* no. of semaphores in array */
  };
  
  /* One queue for each sleeping process in the system. */
  struct sem_queue {
-       struct sem_queue *      next;    /* next entry in the queue */
-       struct sem_queue **     prev;    /* previous entry in the queue, *(q->prev) == q */
-       struct task_struct*     sleeper; /* this process */
-       struct sem_undo *       undo;    /* undo structure */
+       struct list_head        list;    /* queue of pending operations */
+       struct task_struct      *sleeper; /* this process */
+       struct sem_undo         *undo;   /* undo structure */
         int                     pid;     /* process id of requesting process */
         int                     status;  /* completion status of operation */
-       struct sem_array *      sma;     /* semaphore array for operations */
-       int                     id;      /* internal sem id */
-       struct sembuf *         sops;    /* array of pending operations */
+       struct sembuf           *sops;   /* array of pending operations */
         int                     nsops;   /* number of operations */
         int                     alter;   /* does the operation alter the array? */
  };
@@ -118,8 +115,11 @@ struct sem_queue {
   * when the process exits.
   */
  struct sem_undo {
-       struct sem_undo *       proc_next;      /* next entry on this process */
-       struct sem_undo *       id_next;        /* next entry on this semaphore set */
+       struct list_head        list_proc;      /* per-process list: all undos from one process. */
+                                               /* rcu protected */
+       struct rcu_head         rcu;            /* rcu struct for sem_undo() */
+       struct sem_undo_list    *ulp;           /* sem_undo_list for the process */
+       struct list_head        list_id;        /* per semaphore array list: all undos for one array */
         int                     semid;          /* semaphore set identifier */
         short *                 semadj;         /* array of adjustments, one per semaphore */
  };
@@ -128,9 +128,9 @@ struct sem_undo {
   * that may be shared among all a CLONE_SYSVSEM task group.
   */ 
  struct sem_undo_list {
-       atomic_t        refcnt;
-       spinlock_t      lock;
-       struct sem_undo *proc_list;
+       atomic_t                refcnt;
+       spinlock_t              lock;
+       struct list_head        list_proc;
  };
  
  struct sysv_sem {
diff --git a/include/linux/sm501.h b/include/linux/sm501.h

index b530fa6a1d341c91c28d8992e61185a6b3a5f7e4..214f93209b8c0fb847971b41f0b5ea242b241e8a 100644 (file)
--- a/include/linux/sm501.h
+++ b/include/linux/sm501.h
@@ -46,24 +46,6 @@ extern unsigned long sm501_modify_reg(struct device *dev,
                                       unsigned long set,
                                       unsigned long clear);
  
-/* sm501_gpio_set
- *
- * set the state of the given GPIO line
-*/
-
-extern void sm501_gpio_set(struct device *dev,
-                          unsigned long gpio,
-                          unsigned int to,
-                          unsigned int dir);
-
-/* sm501_gpio_get
- *
- * get the state of the given GPIO line
-*/
-
-extern unsigned long sm501_gpio_get(struct device *dev,
-                                   unsigned long gpio);
-
  
  /* Platform data definitions */
  
@@ -104,11 +86,19 @@ struct sm501_platdata_fb {
         struct sm501_platdata_fbsub     *fb_pnl;
  };
  
-/* gpio i2c */
+/* gpio i2c
+ *
+ * Note, we have to pass in the bus number, as the number used will be
+ * passed to the i2c-gpio driver's platform_device.id, subsequently used
+ * to register the i2c bus.
+*/
  
  struct sm501_platdata_gpio_i2c {
+       unsigned int            bus_num;
         unsigned int            pin_sda;
         unsigned int            pin_scl;
+       int                     udelay;
+       int                     timeout;
  };
  
  /* sm501_initdata
@@ -131,6 +121,7 @@ struct sm501_reg_init {
  #define SM501_USE_FBACCEL      (1<<6)
  #define SM501_USE_AC97         (1<<7)
  #define SM501_USE_I2S          (1<<8)
+#define SM501_USE_GPIO         (1<<9)
  
  #define SM501_USE_ALL          (0xffffffff)
  
@@ -157,6 +148,8 @@ struct sm501_init_gpio {
         struct sm501_reg_init   gpio_ddr_high;
  };
  
+#define SM501_FLAG_SUSPEND_OFF         (1<<4)
+
  /* sm501_platdata
   *
   * This is passed with the platform device to allow the board
@@ -170,6 +163,12 @@ struct sm501_platdata {
         struct sm501_init_gpio          *init_gpiop;
         struct sm501_platdata_fb        *fb;
  
+       int                              flags;
+       int                              gpio_base;
+
+       int     (*get_power)(struct device *dev);
+       int     (*set_power)(struct device *dev, unsigned int on);
+
         struct sm501_platdata_gpio_i2c  *gpio_i2c;
         unsigned int                     gpio_i2c_nr;
  };
diff --git a/include/linux/smb_fs.h b/include/linux/smb_fs.h

index 2c5cd55f44fff4ab96bda8b48cd439d4bcf47099..923cd8a247b103bfeae72cc2680b55db0636d8ad 100644 (file)
--- a/include/linux/smb_fs.h
+++ b/include/linux/smb_fs.h
@@ -43,18 +43,13 @@ static inline struct smb_inode_info *SMB_I(struct inode *inode)
  }
  
  /* macro names are short for word, double-word, long value (?) */
-#define WVAL(buf,pos) \
-       (le16_to_cpu(get_unaligned((__le16 *)((u8 *)(buf) + (pos)))))
-#define DVAL(buf,pos) \
-       (le32_to_cpu(get_unaligned((__le32 *)((u8 *)(buf) + (pos)))))
-#define LVAL(buf,pos) \
-       (le64_to_cpu(get_unaligned((__le64 *)((u8 *)(buf) + (pos)))))
-#define WSET(buf,pos,val) \
-       put_unaligned(cpu_to_le16((u16)(val)), (__le16 *)((u8 *)(buf) + (pos)))
-#define DSET(buf,pos,val) \
-       put_unaligned(cpu_to_le32((u32)(val)), (__le32 *)((u8 *)(buf) + (pos)))
-#define LSET(buf,pos,val) \
-       put_unaligned(cpu_to_le64((u64)(val)), (__le64 *)((u8 *)(buf) + (pos)))
+#define WVAL(buf, pos) (get_unaligned_le16((u8 *)(buf) + (pos)))
+#define DVAL(buf, pos) (get_unaligned_le32((u8 *)(buf) + (pos)))
+#define LVAL(buf, pos) (get_unaligned_le64((u8 *)(buf) + (pos)))
+
+#define WSET(buf, pos, val) put_unaligned_le16((val), (u8 *)(buf) + (pos))
+#define DSET(buf, pos, val) put_unaligned_le32((val), (u8 *)(buf) + (pos))
+#define LSET(buf, pos, val) put_unaligned_le64((val), (u8 *)(buf) + (pos))
  
  /* where to find the base of the SMB packet proper */
  #define smb_base(buf) ((u8 *)(((u8 *)(buf))+4))
diff --git a/include/linux/spi/mcp23s08.h b/include/linux/spi/mcp23s08.h

index 835ddf47d45caa571e14e410ba4ed003d9ca172a..22ef107d7704432b52287fd9fd4df95945c23aef 100644 (file)
--- a/include/linux/spi/mcp23s08.h
+++ b/include/linux/spi/mcp23s08.h
@@ -1,18 +1,25 @@
  
-/* FIXME driver should be able to handle all four slaves that
- * can be hooked up to each chipselect, as well as IRQs...
- */
+/* FIXME driver should be able to handle IRQs...  */
+
+struct mcp23s08_chip_info {
+       bool    is_present;             /* true iff populated */
+       u8      pullups;                /* BIT(x) means enable pullup x */
+};
  
  struct mcp23s08_platform_data {
-       /* four slaves can share one SPI chipselect */
-       u8              slave;
+       /* Four slaves (numbered 0..3) can share one SPI chipselect, and
+        * will provide 8..32 GPIOs using 1..4 gpio_chip instances.
+        */
+       struct mcp23s08_chip_info       chip[4];
  
-       /* number assigned to the first GPIO */
+       /* "base" is the number of the first GPIO.  Dynamic assignment is
+        * not currently supported, and even if there are gaps in chip
+        * addressing the GPIO numbers are sequential .. so for example
+        * if only slaves 0 and 3 are present, their GPIOs range from
+        * base to base+15.
+        */
         unsigned        base;
  
-       /* pins with pullups */
-       u8              pullups;
-
         void            *context;       /* param to setup/teardown */
  
         int             (*setup)(struct spi_device *spi,
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h

index d311a090fae7f38d999904d22e909700d86e3849..61e5610ad165592c0605eb68330ff50d9e0b77f8 100644 (file)
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -46,6 +46,7 @@
   *  linux/spinlock.h:     builds the final spin_*() APIs.
   */
  
+#include <linux/typecheck.h>
  #include <linux/preempt.h>
  #include <linux/linkage.h>
  #include <linux/compiler.h>
@@ -191,23 +192,53 @@ do {                                                              \
  
  #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
  
-#define spin_lock_irqsave(lock, flags) flags = _spin_lock_irqsave(lock)
-#define read_lock_irqsave(lock, flags) flags = _read_lock_irqsave(lock)
-#define write_lock_irqsave(lock, flags)        flags = _write_lock_irqsave(lock)
+#define spin_lock_irqsave(lock, flags)                 \
+       do {                                            \
+               typecheck(unsigned long, flags);        \
+               flags = _spin_lock_irqsave(lock);       \
+       } while (0)
+#define read_lock_irqsave(lock, flags)                 \
+       do {                                            \
+               typecheck(unsigned long, flags);        \
+               flags = _read_lock_irqsave(lock);       \
+       } while (0)
+#define write_lock_irqsave(lock, flags)                        \
+       do {                                            \
+               typecheck(unsigned long, flags);        \
+               flags = _write_lock_irqsave(lock);      \
+       } while (0)
  
  #ifdef CONFIG_DEBUG_LOCK_ALLOC
-#define spin_lock_irqsave_nested(lock, flags, subclass) \
-       flags = _spin_lock_irqsave_nested(lock, subclass)
+#define spin_lock_irqsave_nested(lock, flags, subclass)                        \
+       do {                                                            \
+               typecheck(unsigned long, flags);                        \
+               flags = _spin_lock_irqsave_nested(lock, subclass);      \
+       } while (0)
  #else
-#define spin_lock_irqsave_nested(lock, flags, subclass) \
-       flags = _spin_lock_irqsave(lock)
+#define spin_lock_irqsave_nested(lock, flags, subclass)                        \
+       do {                                                            \
+               typecheck(unsigned long, flags);                        \
+               flags = _spin_lock_irqsave(lock);                       \
+       } while (0)
  #endif
  
  #else
  
-#define spin_lock_irqsave(lock, flags) _spin_lock_irqsave(lock, flags)
-#define read_lock_irqsave(lock, flags) _read_lock_irqsave(lock, flags)
-#define write_lock_irqsave(lock, flags)        _write_lock_irqsave(lock, flags)
+#define spin_lock_irqsave(lock, flags)                 \
+       do {                                            \
+               typecheck(unsigned long, flags);        \
+               _spin_lock_irqsave(lock, flags);        \
+       } while (0)
+#define read_lock_irqsave(lock, flags)                 \
+       do {                                            \
+               typecheck(unsigned long, flags);        \
+               _read_lock_irqsave(lock, flags);        \
+       } while (0)
+#define write_lock_irqsave(lock, flags)                        \
+       do {                                            \
+               typecheck(unsigned long, flags);        \
+               _write_lock_irqsave(lock, flags);       \
+       } while (0)
  #define spin_lock_irqsave_nested(lock, flags, subclass)        \
         spin_lock_irqsave(lock, flags)
  
@@ -260,16 +291,25 @@ do {                                              \
  } while (0)
  #endif
  
-#define spin_unlock_irqrestore(lock, flags) \
-                                       _spin_unlock_irqrestore(lock, flags)
+#define spin_unlock_irqrestore(lock, flags)            \
+       do {                                            \
+               typecheck(unsigned long, flags);        \
+               _spin_unlock_irqrestore(lock, flags);   \
+       } while (0)
  #define spin_unlock_bh(lock)           _spin_unlock_bh(lock)
  
-#define read_unlock_irqrestore(lock, flags) \
-                                       _read_unlock_irqrestore(lock, flags)
+#define read_unlock_irqrestore(lock, flags)            \
+       do {                                            \
+               typecheck(unsigned long, flags);        \
+               _read_unlock_irqrestore(lock, flags);   \
+       } while (0)
  #define read_unlock_bh(lock)           _read_unlock_bh(lock)
  
-#define write_unlock_irqrestore(lock, flags) \
-                                       _write_unlock_irqrestore(lock, flags)
+#define write_unlock_irqrestore(lock, flags)           \
+       do {                                            \
+               typecheck(unsigned long, flags);        \
+               _write_unlock_irqrestore(lock, flags);  \
+       } while (0)
  #define write_unlock_bh(lock)          _write_unlock_bh(lock)
  
  #define spin_trylock_bh(lock)  __cond_lock(lock, _spin_trylock_bh(lock))
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h

index 06f2bf76c0306d1965d5d70c0613f1e1cc481975..d6ff145919ca3d3db7a01cf53c93ff628b0d29a6 100644 (file)
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -411,7 +411,7 @@ asmlinkage long sys_bind(int, struct sockaddr __user *, int);
  asmlinkage long sys_connect(int, struct sockaddr __user *, int);
  asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *);
  asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *,
-                           const sigset_t *, size_t, int);
+                           const __user sigset_t *, size_t, int);
  asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *);
  asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *);
  asmlinkage long sys_send(int, void __user *, size_t, unsigned);
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h

index 5d69c0744fff60283c74fbd4ca7a8c28ee4715cf..18269e956a7189d600387a12f70e7bf9c2996d55 100644 (file)
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -31,7 +31,7 @@
   */
  
  
-#define TASKSTATS_VERSION      6
+#define TASKSTATS_VERSION      7
  #define TS_COMM_LEN            32      /* should be >= TASK_COMM_LEN
                                          * in linux/sched.h */
  
@@ -157,6 +157,10 @@ struct taskstats {
         __u64   ac_utimescaled;         /* utime scaled on frequency etc */
         __u64   ac_stimescaled;         /* stime scaled on frequency etc */
         __u64   cpu_scaled_run_real_total; /* scaled cpu_run_real_total */
+
+       /* Delay waiting for memory reclaim */
+       __u64   freepages_count;
+       __u64   freepages_delay_total;
  };
  
  
diff --git a/include/linux/typecheck.h b/include/linux/typecheck.h

new file mode 100644 (file)

index 0000000..eb5b74a
--- /dev/null
+++ b/include/linux/typecheck.h
@@ -0,0 +1,24 @@
+#ifndef TYPECHECK_H_INCLUDED
+#define TYPECHECK_H_INCLUDED
+
+/*
+ * Check at compile time that something is of a particular type.
+ * Always evaluates to 1 so you may use it easily in comparisons.
+ */
+#define typecheck(type,x) \
+({     type __dummy; \
+       typeof(x) __dummy2; \
+       (void)(&__dummy == &__dummy2); \
+       1; \
+})
+
+/*
+ * Check at compile time that 'function' is a certain type, or is a pointer
+ * to that type (needs to use typedef for the function type.)
+ */
+#define typecheck_fn(type,function) \
+({     typeof(type) __tmp = function; \
+       (void)__tmp; \
+})
+
+#endif         /* TYPECHECK_H_INCLUDED */
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h

index 747c3a49cdc9489ca791460794266553174246e3..c932390c6da08c2715b39db9eb19ecea75709023 100644 (file)
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -330,7 +330,7 @@ extern int usb_string_id(struct usb_composite_dev *c);
         dev_vdbg(&(d)->gadget->dev , fmt , ## args)
  #define ERROR(d, fmt, args...) \
         dev_err(&(d)->gadget->dev , fmt , ## args)
-#define WARN(d, fmt, args...) \
+#define WARNING(d, fmt, args...) \
         dev_warn(&(d)->gadget->dev , fmt , ## args)
  #define INFO(d, fmt, args...) \
         dev_info(&(d)->gadget->dev , fmt , ## args)
diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h

index 8eff0b53910b961ac76b662267c620a92a693a6f..b3c4a60ceeb305ffc9a670e57c08a580ab36798c 100644 (file)
--- a/include/linux/virtio_9p.h
+++ b/include/linux/virtio_9p.h
@@ -1,5 +1,7 @@
  #ifndef _LINUX_VIRTIO_9P_H
  #define _LINUX_VIRTIO_9P_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
  #include <linux/virtio_config.h>
  
  /* The ID for virtio console */
diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h

index 979524ee75b7eeec0fac53aeb01c0fa1592911fc..c30c7bfbf39bcb0f26507b88a9e82ec336d6eff6 100644 (file)
--- a/include/linux/virtio_balloon.h
+++ b/include/linux/virtio_balloon.h
@@ -1,5 +1,7 @@
  #ifndef _LINUX_VIRTIO_BALLOON_H
  #define _LINUX_VIRTIO_BALLOON_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
  #include <linux/virtio_config.h>
  
  /* The ID for virtio_balloon */
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h

index 5f79a5f9de796b297c86f2647acfce713a533826..c1aef85243bf468522364425b6e06c022d9c8071 100644 (file)
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -1,5 +1,7 @@
  #ifndef _LINUX_VIRTIO_BLK_H
  #define _LINUX_VIRTIO_BLK_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
  #include <linux/virtio_config.h>
  
  /* The ID for virtio_block */
@@ -11,6 +13,7 @@
  #define VIRTIO_BLK_F_SEG_MAX   2       /* Indicates maximum # of segments */
  #define VIRTIO_BLK_F_GEOMETRY  4       /* Legacy geometry available  */
  #define VIRTIO_BLK_F_RO                5       /* Disk is read-only */
+#define VIRTIO_BLK_F_BLK_SIZE  6       /* Block size of disk is available*/
  
  struct virtio_blk_config
  {
@@ -26,6 +29,8 @@ struct virtio_blk_config
                 __u8 heads;
                 __u8 sectors;
         } geometry;
+       /* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */
+       __u32 blk_size;
  } __attribute__((packed));
  
  /* These two define direction. */
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h

index f364bbf63c3413547738ae4028b399577bc76650..bf8ec283b232af65116af4fa338ad602f1b8d798 100644 (file)
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -1,5 +1,8 @@
  #ifndef _LINUX_VIRTIO_CONFIG_H
  #define _LINUX_VIRTIO_CONFIG_H
+/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
+ * anyone can use the definitions to implement compatible drivers/servers. */
+
  /* Virtio devices use a standardized configuration space to define their
   * features and pass configuration information, but each implementation can
   * store and access that space differently. */
@@ -15,6 +18,12 @@
  /* We've given up on this device. */
  #define VIRTIO_CONFIG_S_FAILED         0x80
  
+/* Some virtio feature bits (currently bits 28 through 31) are reserved for the
+ * transport being used (eg. virtio_ring), the rest are per-device feature
+ * bits. */
+#define VIRTIO_TRANSPORT_F_START       28
+#define VIRTIO_TRANSPORT_F_END         32
+
  /* Do we get callbacks when the ring is completely used, even if we've
   * suppressed them? */
  #define VIRTIO_F_NOTIFY_ON_EMPTY       24
@@ -52,9 +61,10 @@
   * @get_features: get the array of feature bits for this device.
   *     vdev: the virtio_device
   *     Returns the first 32 feature bits (all we currently need).
- * @set_features: confirm what device features we'll be using.
+ * @finalize_features: confirm what device features we'll be using.
   *     vdev: the virtio_device
- *     feature: the first 32 feature bits
+ *     This gives the final feature bits for the device: it can change
+ *     the dev->feature bits if it wants.
   */
  struct virtio_config_ops
  {
@@ -70,7 +80,7 @@ struct virtio_config_ops
                                      void (*callback)(struct virtqueue *));
         void (*del_vq)(struct virtqueue *vq);
         u32 (*get_features)(struct virtio_device *vdev);
-       void (*set_features)(struct virtio_device *vdev, u32 features);
+       void (*finalize_features)(struct virtio_device *vdev);
  };
  
  /* If driver didn't advertise the feature, it will never appear. */
diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h

index ed2d4ead7eb7ad67f0531f299e2076be201a1e92..19a0da0dba41203a59d5d2bce2762d2c2b78a240 100644 (file)
--- a/include/linux/virtio_console.h
+++ b/include/linux/virtio_console.h
@@ -1,6 +1,8 @@
  #ifndef _LINUX_VIRTIO_CONSOLE_H
  #define _LINUX_VIRTIO_CONSOLE_H
  #include <linux/virtio_config.h>
+/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
+ * anyone can use the definitions to implement compatible drivers/servers. */
  
  /* The ID for virtio console */
  #define VIRTIO_ID_CONSOLE      3
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h

index 38c0571820fbffa6851fb1dd2707a925d94e6b81..5e33761b9b8a9d7c038d6e062221f5f68a0def3b 100644 (file)
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -1,5 +1,7 @@
  #ifndef _LINUX_VIRTIO_NET_H
  #define _LINUX_VIRTIO_NET_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
  #include <linux/virtio_config.h>
  
  /* The ID for virtio_net */
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h

index b3151659cf497872578d0f4b37da6577cf7d050b..cdef3574293274eb7fa61d7a34c9315cfbe6ab88 100644 (file)
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -9,9 +9,8 @@
   * Authors:
   *  Anthony Liguori  <aliguori@us.ibm.com>
   *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
   */
  
  #ifndef _LINUX_VIRTIO_PCI_H
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h

index abe481ed990e328bd6f5ec1f5212deeed07b9fea..c4a598fb3826f1de5f2fe6f62a536c148d3f77a9 100644 (file)
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -120,6 +120,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
                                       void (*notify)(struct virtqueue *vq),
                                       void (*callback)(struct virtqueue *vq));
  void vring_del_virtqueue(struct virtqueue *vq);
+/* Filter out transport-specific feature bits. */
+void vring_transport_features(struct virtio_device *vdev);
  
  irqreturn_t vring_interrupt(int irq, void *_vq);
  #endif /* __KERNEL__ */
diff --git a/include/linux/virtio_rng.h b/include/linux/virtio_rng.h

index 331afb6c9f6282a21b9efb7972a161563122974e..1a85dab8a940c31b66cb1a5a36de381418b20739 100644 (file)
--- a/include/linux/virtio_rng.h
+++ b/include/linux/virtio_rng.h
@@ -1,5 +1,7 @@
  #ifndef _LINUX_VIRTIO_RNG_H
  #define _LINUX_VIRTIO_RNG_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
  #include <linux/virtio_config.h>
  
  /* The ID for virtio_rng */
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h

index 9448ffbdcbf62937047a69f1281116f133d5c079..14c0e91be9b5121f811a2375382d0096270b4486 100644 (file)
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -12,6 +12,7 @@
  #include <linux/mutex.h>
  #include <linux/console_struct.h>
  #include <linux/mm.h>
+#include <linux/consolemap.h>
  
  /*
   * Presently, a lot of graphics programs do not restore the contents of
@@ -54,6 +55,7 @@ void redraw_screen(struct vc_data *vc, int is_switch);
  struct tty_struct;
  int tioclinux(struct tty_struct *tty, unsigned long arg);
  
+#ifdef CONFIG_CONSOLE_TRANSLATIONS
  /* consolemap.c */
  
  struct unimapinit;
@@ -71,6 +73,23 @@ void con_free_unimap(struct vc_data *vc);
  void con_protect_unimap(struct vc_data *vc, int rdonly);
  int con_copy_unimap(struct vc_data *dst_vc, struct vc_data *src_vc);
  
+#define vc_translate(vc, c) ((vc)->vc_translate[(c) |                  \
+                                       (vc)->vc_toggle_meta ? 0x80 : 0])
+#else
+#define con_set_trans_old(arg) (0)
+#define con_get_trans_old(arg) (-EINVAL)
+#define con_set_trans_new(arg) (0)
+#define con_get_trans_new(arg) (-EINVAL)
+#define con_clear_unimap(vc, ui) (0)
+#define con_set_unimap(vc, ct, list) (0)
+#define con_set_default_unimap(vc) (0)
+#define con_copy_unimap(d, s) (0)
+#define con_get_unimap(vc, ct, uct, list) (-EINVAL)
+#define con_free_unimap(vc) do { ; } while (0)
+
+#define vc_translate(vc, c) (c)
+#endif
+
  /* vt.c */
  int vt_waitactive(int vt);
  void change_console(struct vc_data *new_vc);
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h

index 14d47120682b81211ac6a86b2e2841cd09c01778..5c158c477ac76c688cc53fa17bc61bae4e04dd43 100644 (file)
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -201,6 +201,8 @@ extern int keventd_up(void);
  extern void init_workqueues(void);
  int execute_in_process_context(work_func_t fn, struct execute_work *);
  
+extern int flush_work(struct work_struct *work);
+
  extern int cancel_work_sync(struct work_struct *work);
  
  /*
diff --git a/include/mtd/ubi-user.h b/include/mtd/ubi-user.h

index a7421f130cc019a3e4144090a3848bf338f3be4b..ccdc562e444e316ec08ba15dc24da4685ed86766 100644 (file)
--- a/include/mtd/ubi-user.h
+++ b/include/mtd/ubi-user.h
@@ -58,6 +58,13 @@
   * device should be used. A &struct ubi_rsvol_req object has to be properly
   * filled and a pointer to it has to be passed to the IOCTL.
   *
+ * UBI volumes re-name
+ * ~~~~~~~~~~~~~~~~~~~
+ *
+ * To re-name several volumes atomically at one go, the %UBI_IOCRNVOL command
+ * of the UBI character device should be used. A &struct ubi_rnvol_req object
+ * has to be properly filled and a pointer to it has to be passed to the IOCTL.
+ *
   * UBI volume update
   * ~~~~~~~~~~~~~~~~~
   *
@@ -104,6 +111,8 @@
  #define UBI_IOCRMVOL _IOW(UBI_IOC_MAGIC, 1, int32_t)
  /* Re-size an UBI volume */
  #define UBI_IOCRSVOL _IOW(UBI_IOC_MAGIC, 2, struct ubi_rsvol_req)
+/* Re-name volumes */
+#define UBI_IOCRNVOL _IOW(UBI_IOC_MAGIC, 3, struct ubi_rnvol_req)
  
  /* IOCTL commands of the UBI control character device */
  
@@ -128,6 +137,9 @@
  /* Maximum MTD device name length supported by UBI */
  #define MAX_UBI_MTD_NAME_LEN 127
  
+/* Maximum amount of UBI volumes that can be re-named at one go */
+#define UBI_MAX_RNVOL 32
+
  /*
   * UBI data type hint constants.
   *
@@ -176,20 +188,20 @@ enum {
   * it will be 512 in case of a 2KiB page NAND flash with 4 512-byte sub-pages.
   *
   * But in rare cases, if this optimizes things, the VID header may be placed to
- * a different offset. For example, the boot-loader might do things faster if the
- * VID header sits at the end of the first 2KiB NAND page with 4 sub-pages. As
- * the boot-loader would not normally need to read EC headers (unless it needs
- * UBI in RW mode), it might be faster to calculate ECC. This is weird example,
- * but it real-life example. So, in this example, @vid_hdr_offer would be
- * 2KiB-64 bytes = 1984. Note, that this position is not even 512-bytes
- * aligned, which is OK, as UBI is clever enough to realize this is 4th sub-page
- * of the first page and add needed padding.
+ * a different offset. For example, the boot-loader might do things faster if
+ * the VID header sits at the end of the first 2KiB NAND page with 4 sub-pages.
+ * As the boot-loader would not normally need to read EC headers (unless it
+ * needs UBI in RW mode), it might be faster to calculate ECC. This is weird
+ * example, but it real-life example. So, in this example, @vid_hdr_offer would
+ * be 2KiB-64 bytes = 1984. Note, that this position is not even 512-bytes
+ * aligned, which is OK, as UBI is clever enough to realize this is 4th
+ * sub-page of the first page and add needed padding.
   */
  struct ubi_attach_req {
         int32_t ubi_num;
         int32_t mtd_num;
         int32_t vid_hdr_offset;
-       uint8_t padding[12];
+       int8_t padding[12];
  };
  
  /**
@@ -250,6 +262,48 @@ struct ubi_rsvol_req {
         int32_t vol_id;
  } __attribute__ ((packed));
  
+/**
+ * struct ubi_rnvol_req - volumes re-name request.
+ * @count: count of volumes to re-name
+ * @padding1:  reserved for future, not used, has to be zeroed
+ * @vol_id: ID of the volume to re-name
+ * @name_len: name length
+ * @padding2:  reserved for future, not used, has to be zeroed
+ * @name: new volume name
+ *
+ * UBI allows to re-name up to %32 volumes at one go. The count of volumes to
+ * re-name is specified in the @count field. The ID of the volumes to re-name
+ * and the new names are specified in the @vol_id and @name fields.
+ *
+ * The UBI volume re-name operation is atomic, which means that should power cut
+ * happen, the volumes will have either old name or new name. So the possible
+ * use-cases of this command is atomic upgrade. Indeed, to upgrade, say, volumes
+ * A and B one may create temporary volumes %A1 and %B1 with the new contents,
+ * then atomically re-name A1->A and B1->B, in which case old %A and %B will
+ * be removed.
+ *
+ * If it is not desirable to remove old A and B, the re-name request has to
+ * contain 4 entries: A1->A, A->A1, B1->B, B->B1, in which case old A1 and B1
+ * become A and B, and old A and B will become A1 and B1.
+ *
+ * It is also OK to request: A1->A, A1->X, B1->B, B->Y, in which case old A1
+ * and B1 become A and B, and old A and B become X and Y.
+ *
+ * In other words, in case of re-naming into an existing volume name, the
+ * existing volume is removed, unless it is re-named as well at the same
+ * re-name request.
+ */
+struct ubi_rnvol_req {
+       int32_t count;
+       int8_t padding1[12];
+       struct {
+               int32_t vol_id;
+               int16_t name_len;
+               int8_t  padding2[2];
+               char    name[UBI_MAX_VOLUME_NAME + 1];
+       } ents[UBI_MAX_RNVOL];
+} __attribute__ ((packed));
+
  /**
   * struct ubi_leb_change_req - a data structure used in atomic logical
   *                             eraseblock change requests.
@@ -261,8 +315,8 @@ struct ubi_rsvol_req {
  struct ubi_leb_change_req {
         int32_t lnum;
         int32_t bytes;
-       uint8_t dtype;
-       uint8_t padding[7];
+       int8_t  dtype;
+       int8_t  padding[7];
  } __attribute__ ((packed));
  
  #endif /* __UBI_USER_H__ */
diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h

index dfd8bf66ce274f99e486cf80794980f255b398ae..d364fd594ea4d7f2b3f088f460e2e2bc09ae6697 100644 (file)
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -262,7 +262,7 @@ static inline int ieee80211_get_radiotap_len(unsigned char *data)
         struct ieee80211_radiotap_header *hdr =
                 (struct ieee80211_radiotap_header *)data;
  
-       return le16_to_cpu(get_unaligned(&hdr->it_len));
+       return get_unaligned_le16(&hdr->it_len);
  }
  
  #endif                         /* IEEE80211_RADIOTAP_H */
diff --git a/init/do_mounts.c b/init/do_mounts.c

index a1de1bf3d6b9d5694cf71b4c4a9dc03193636b96..f769fac4f4c0de2c1904de7dd3e2e152e196a33a 100644 (file)
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -12,6 +12,7 @@
  #include <linux/device.h>
  #include <linux/init.h>
  #include <linux/fs.h>
+#include <linux/initrd.h>
  
  #include <linux/nfs_fs.h>
  #include <linux/nfs_fs_sb.h>
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c

index 46dfd64ae8fb027cf74e1252b91b2919f81a1923..fedef93b586fad4146bf1f1cfd04dffbb012c0e1 100644 (file)
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -10,8 +10,6 @@
  
  #include "do_mounts.h"
  
-#define BUILD_CRAMDISK
-
  int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */
  
  static int __init prompt_ramdisk(char *str)
@@ -162,14 +160,8 @@ int __init rd_load_image(char *from)
                 goto done;
  
         if (nblocks == 0) {
-#ifdef BUILD_CRAMDISK
                 if (crd_load(in_fd, out_fd) == 0)
                         goto successful_load;
-#else
-               printk(KERN_NOTICE
-                      "RAMDISK: Kernel does not support compressed "
-                      "RAM disk images\n");
-#endif
                 goto done;
         }
  
@@ -267,8 +259,6 @@ int __init rd_load_disk(int n)
         return rd_load_image("/dev/root");
  }
  
-#ifdef BUILD_CRAMDISK
-
  /*
   * gzip declarations
   */
@@ -313,32 +303,11 @@ static int crd_infd, crd_outfd;
  
  static int  __init fill_inbuf(void);
  static void __init flush_window(void);
-static void __init *malloc(size_t size);
-static void __init free(void *where);
  static void __init error(char *m);
-static void __init gzip_mark(void **);
-static void __init gzip_release(void **);
-
-#include "../lib/inflate.c"
  
-static void __init *malloc(size_t size)
-{
-       return kmalloc(size, GFP_KERNEL);
-}
-
-static void __init free(void *where)
-{
-       kfree(where);
-}
-
-static void __init gzip_mark(void **ptr)
-{
-}
-
-static void __init gzip_release(void **ptr)
-{
-}
+#define NO_INFLATE_MALLOC
  
+#include "../lib/inflate.c"
  
  /* ===========================================================================
   * Fill the input buffer. This is called only when the buffer is empty
@@ -425,5 +394,3 @@ static int __init crd_load(int in_fd, int out_fd)
         kfree(window);
         return result;
  }
-
-#endif  /* BUILD_CRAMDISK */
diff --git a/init/initramfs.c b/init/initramfs.c

index 8eeeccb328c9f984812941592258ebd72e5d96e6..644fc01ad5f05244f3f3012becffda2326b2b65d 100644 (file)
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -14,16 +14,6 @@ static void __init error(char *x)
                 message = x;
  }
  
-static void __init *malloc(size_t size)
-{
-       return kmalloc(size, GFP_KERNEL);
-}
-
-static void __init free(void *where)
-{
-       kfree(where);
-}
-
  /* link hash */
  
  #define N_ALIGN(len) ((((len) + 1) & ~3) + 2)
@@ -407,18 +397,10 @@ static long bytes_out;
  
  static void __init flush_window(void);
  static void __init error(char *m);
-static void __init gzip_mark(void **);
-static void __init gzip_release(void **);
  
-#include "../lib/inflate.c"
+#define NO_INFLATE_MALLOC
  
-static void __init gzip_mark(void **ptr)
-{
-}
-
-static void __init gzip_release(void **ptr)
-{
-}
+#include "../lib/inflate.c"
  
  /* ===========================================================================
   * Write the output window window[0..outcnt-1] and update crc and bytes_out.
diff --git a/init/main.c b/init/main.c

index 2769dc031c6222d077206619f755727f7433d3dd..0604cbcaf1e4e2030f487e663966cba66517101b 100644 (file)
--- a/init/main.c
+++ b/init/main.c
@@ -87,8 +87,6 @@ extern void init_IRQ(void);
  extern void fork_init(unsigned long);
  extern void mca_init(void);
  extern void sbus_init(void);
-extern void pidhash_init(void);
-extern void pidmap_init(void);
  extern void prio_tree_init(void);
  extern void radix_tree_init(void);
  extern void free_initmem(void);
diff --git a/init/version.c b/init/version.c

index 9d17d70ee02d0165e4f94e5eacda6ea7c808a469..52a8b98642b8fb78343dfd63e20d1f448acb5866 100644 (file)
--- a/init/version.c
+++ b/init/version.c
@@ -13,10 +13,13 @@
  #include <linux/utsrelease.h>
  #include <linux/version.h>
  
+#ifndef CONFIG_KALLSYMS
  #define version(a) Version_ ## a
  #define version_string(a) version(a)
  
+extern int version_string(LINUX_VERSION_CODE);
  int version_string(LINUX_VERSION_CODE);
+#endif
  
  struct uts_namespace init_uts_ns = {
         .kref = {
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c

index d3497465cc0a5aa461dabce18b0f7af4dc537cea..69bc85978ba0bb04f0f77745cfa842aea69b7c76 100644 (file)
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -27,15 +27,17 @@ static void *get_ipc(ctl_table *table)
  }
  
  /*
- * Routine that is called when a tunable has successfully been changed by
- * hand and it has a callback routine registered on the ipc namespace notifier
- * chain: we don't want such tunables to be recomputed anymore upon memory
- * add/remove or ipc namespace creation/removal.
- * They can come back to a recomputable state by being set to a <0 value.
+ * Routine that is called when the file "auto_msgmni" has successfully been
+ * written.
+ * Two values are allowed:
+ * 0: unregister msgmni's callback routine from the ipc namespace notifier
+ *    chain. This means that msgmni won't be recomputed anymore upon memory
+ *    add/remove or ipc namespace creation/removal.
+ * 1: register back the callback routine.
   */
-static void tunable_set_callback(int val)
+static void ipc_auto_callback(int val)
  {
-       if (val >= 0)
+       if (!val)
                 unregister_ipcns_notifier(current->nsproxy->ipc_ns);
         else {
                 /*
@@ -71,7 +73,12 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write,
         rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos);
  
         if (write && !rc && lenp_bef == *lenp)
-               tunable_set_callback(*((int *)(ipc_table.data)));
+               /*
+                * Tunable has successfully been changed by hand. Disable its
+                * automatic adjustment. This simply requires unregistering
+                * the notifiers that trigger recalculation.
+                */
+               unregister_ipcns_notifier(current->nsproxy->ipc_ns);
  
         return rc;
  }
@@ -87,10 +94,39 @@ static int proc_ipc_doulongvec_minmax(ctl_table *table, int write,
                                         lenp, ppos);
  }
  
+static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
+       struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       struct ctl_table ipc_table;
+       size_t lenp_bef = *lenp;
+       int oldval;
+       int rc;
+
+       memcpy(&ipc_table, table, sizeof(ipc_table));
+       ipc_table.data = get_ipc(table);
+       oldval = *((int *)(ipc_table.data));
+
+       rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos);
+
+       if (write && !rc && lenp_bef == *lenp) {
+               int newval = *((int *)(ipc_table.data));
+               /*
+                * The file "auto_msgmni" has correctly been set.
+                * React by (un)registering the corresponding tunable, if the
+                * value has changed.
+                */
+               if (newval != oldval)
+                       ipc_auto_callback(newval);
+       }
+
+       return rc;
+}
+
  #else
  #define proc_ipc_doulongvec_minmax NULL
  #define proc_ipc_dointvec         NULL
  #define proc_ipc_callback_dointvec NULL
+#define proc_ipcauto_dointvec_minmax NULL
  #endif
  
  #ifdef CONFIG_SYSCTL_SYSCALL
@@ -142,14 +178,11 @@ static int sysctl_ipc_registered_data(ctl_table *table, int __user *name,
         rc = sysctl_ipc_data(table, name, nlen, oldval, oldlenp, newval,
                 newlen);
  
-       if (newval && newlen && rc > 0) {
+       if (newval && newlen && rc > 0)
                 /*
                  * Tunable has successfully been changed from userland
                  */
-               int *data = get_ipc(table);
-
-               tunable_set_callback(*data);
-       }
+               unregister_ipcns_notifier(current->nsproxy->ipc_ns);
  
         return rc;
  }
@@ -158,6 +191,9 @@ static int sysctl_ipc_registered_data(ctl_table *table, int __user *name,
  #define sysctl_ipc_registered_data NULL
  #endif
  
+static int zero;
+static int one = 1;
+
  static struct ctl_table ipc_kern_table[] = {
         {
                 .ctl_name       = KERN_SHMMAX,
@@ -222,6 +258,16 @@ static struct ctl_table ipc_kern_table[] = {
                 .proc_handler   = proc_ipc_dointvec,
                 .strategy       = sysctl_ipc_data,
         },
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "auto_msgmni",
+               .data           = &init_ipc_ns.auto_msgmni,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_ipcauto_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
         {}
  };
  
diff --git a/ipc/ipcns_notifier.c b/ipc/ipcns_notifier.c

index 70ff09183f7ba73529842fbd9eabda0e3de72269..b9b31a4f77e114cff79eef3cd25e075a3dc8d871 100644 (file)
--- a/ipc/ipcns_notifier.c
+++ b/ipc/ipcns_notifier.c
@@ -55,25 +55,35 @@ static int ipcns_callback(struct notifier_block *self,
  
  int register_ipcns_notifier(struct ipc_namespace *ns)
  {
+       int rc;
+
         memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb));
         ns->ipcns_nb.notifier_call = ipcns_callback;
         ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI;
-       return blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb);
+       rc = blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb);
+       if (!rc)
+               ns->auto_msgmni = 1;
+       return rc;
  }
  
  int cond_register_ipcns_notifier(struct ipc_namespace *ns)
  {
+       int rc;
+
         memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb));
         ns->ipcns_nb.notifier_call = ipcns_callback;
         ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI;
-       return blocking_notifier_chain_cond_register(&ipcns_chain,
+       rc = blocking_notifier_chain_cond_register(&ipcns_chain,
                                                         &ns->ipcns_nb);
+       if (!rc)
+               ns->auto_msgmni = 1;
+       return rc;
  }
  
-int unregister_ipcns_notifier(struct ipc_namespace *ns)
+void unregister_ipcns_notifier(struct ipc_namespace *ns)
  {
-       return blocking_notifier_chain_unregister(&ipcns_chain,
-                                               &ns->ipcns_nb);
+       blocking_notifier_chain_unregister(&ipcns_chain, &ns->ipcns_nb);
+       ns->auto_msgmni = 0;
  }
  
  int ipcns_notify(unsigned long val)
diff --git a/ipc/mqueue.c b/ipc/mqueue.c

index 3e84b958186b5d043ef34c706ff4036433f93c8b..1fdc2eb2f6d89a233170c1701a15e96bf70a139a 100644 (file)
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -314,15 +314,11 @@ static int mqueue_unlink(struct inode *dir, struct dentry *dentry)
  *      through std routines)
  */
  static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
-                               size_t count, loff_t * off)
+                               size_t count, loff_t *off)
  {
         struct mqueue_inode_info *info = MQUEUE_I(filp->f_path.dentry->d_inode);
         char buffer[FILENT_SIZE];
-       size_t slen;
-       loff_t o;
-
-       if (!count)
-               return 0;
+       ssize_t ret;
  
         spin_lock(&info->lock);
         snprintf(buffer, sizeof(buffer),
@@ -335,21 +331,14 @@ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
                         pid_vnr(info->notify_owner));
         spin_unlock(&info->lock);
         buffer[sizeof(buffer)-1] = '\0';
-       slen = strlen(buffer)+1;
-
-       o = *off;
-       if (o > slen)
-               return 0;
-
-       if (o + count > slen)
-               count = slen - o;
  
-       if (copy_to_user(u_data, buffer + o, count))
-               return -EFAULT;
+       ret = simple_read_from_buffer(u_data, count, off, buffer,
+                               strlen(buffer));
+       if (ret <= 0)
+               return ret;
  
-       *off = o + count;
         filp->f_path.dentry->d_inode->i_atime = filp->f_path.dentry->d_inode->i_ctime = CURRENT_TIME;
-       return count;
+       return ret;
  }
  
  static int mqueue_flush_file(struct file *filp, fl_owner_t id)
diff --git a/ipc/sem.c b/ipc/sem.c

index e9418df5ff3ecf0c3cb3bb6e6fbac9de5fcac828..bf1bc36cb7ee62bc95021ba62253d5b667802952 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -272,9 +272,8 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
         ns->used_sems += nsems;
  
         sma->sem_base = (struct sem *) &sma[1];
-       /* sma->sem_pending = NULL; */
-       sma->sem_pending_last = &sma->sem_pending;
-       /* sma->undo = NULL; */
+       INIT_LIST_HEAD(&sma->sem_pending);
+       INIT_LIST_HEAD(&sma->list_id);
         sma->sem_nsems = nsems;
         sma->sem_ctime = get_seconds();
         sem_unlock(sma);
@@ -331,38 +330,6 @@ asmlinkage long sys_semget(key_t key, int nsems, int semflg)
         return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
  }
  
-/* Manage the doubly linked list sma->sem_pending as a FIFO:
- * insert new queue elements at the tail sma->sem_pending_last.
- */
-static inline void append_to_queue (struct sem_array * sma,
-                                   struct sem_queue * q)
-{
-       *(q->prev = sma->sem_pending_last) = q;
-       *(sma->sem_pending_last = &q->next) = NULL;
-}
-
-static inline void prepend_to_queue (struct sem_array * sma,
-                                    struct sem_queue * q)
-{
-       q->next = sma->sem_pending;
-       *(q->prev = &sma->sem_pending) = q;
-       if (q->next)
-               q->next->prev = &q->next;
-       else /* sma->sem_pending_last == &sma->sem_pending */
-               sma->sem_pending_last = &q->next;
-}
-
-static inline void remove_from_queue (struct sem_array * sma,
-                                     struct sem_queue * q)
-{
-       *(q->prev) = q->next;
-       if (q->next)
-               q->next->prev = q->prev;
-       else /* sma->sem_pending_last == &q->next */
-               sma->sem_pending_last = q->prev;
-       q->prev = NULL; /* mark as removed */
-}
-
  /*
   * Determine whether a sequence of semaphore operations would succeed
   * all at once. Return 0 if yes, 1 if need to sleep, else return error code.
@@ -438,16 +405,15 @@ static void update_queue (struct sem_array * sma)
         int error;
         struct sem_queue * q;
  
-       q = sma->sem_pending;
-       while(q) {
+       q = list_entry(sma->sem_pending.next, struct sem_queue, list);
+       while (&q->list != &sma->sem_pending) {
                 error = try_atomic_semop(sma, q->sops, q->nsops,
                                          q->undo, q->pid);
  
                 /* Does q->sleeper still need to sleep? */
                 if (error <= 0) {
                         struct sem_queue *n;
-                       remove_from_queue(sma,q);
-                       q->status = IN_WAKEUP;
+
                         /*
                          * Continue scanning. The next operation
                          * that must be checked depends on the type of the
@@ -458,11 +424,26 @@ static void update_queue (struct sem_array * sma)
                          *   for semaphore values to become 0.
                          * - if the operation didn't modify the array,
                          *   then just continue.
+                        * The order of list_del() and reading ->next
+                        * is crucial: In the former case, the list_del()
+                        * must be done first [because we might be the
+                        * first entry in ->sem_pending], in the latter
+                        * case the list_del() must be done last
+                        * [because the list is invalid after the list_del()]
                          */
-                       if (q->alter)
-                               n = sma->sem_pending;
-                       else
-                               n = q->next;
+                       if (q->alter) {
+                               list_del(&q->list);
+                               n = list_entry(sma->sem_pending.next,
+                                               struct sem_queue, list);
+                       } else {
+                               n = list_entry(q->list.next, struct sem_queue,
+                                               list);
+                               list_del(&q->list);
+                       }
+
+                       /* wake up the waiting thread */
+                       q->status = IN_WAKEUP;
+
                         wake_up_process(q->sleeper);
                         /* hands-off: q will disappear immediately after
                          * writing q->status.
@@ -471,7 +452,7 @@ static void update_queue (struct sem_array * sma)
                         q->status = error;
                         q = n;
                 } else {
-                       q = q->next;
+                       q = list_entry(q->list.next, struct sem_queue, list);
                 }
         }
  }
@@ -491,7 +472,7 @@ static int count_semncnt (struct sem_array * sma, ushort semnum)
         struct sem_queue * q;
  
         semncnt = 0;
-       for (q = sma->sem_pending; q; q = q->next) {
+       list_for_each_entry(q, &sma->sem_pending, list) {
                 struct sembuf * sops = q->sops;
                 int nsops = q->nsops;
                 int i;
@@ -503,13 +484,14 @@ static int count_semncnt (struct sem_array * sma, ushort semnum)
         }
         return semncnt;
  }
+
  static int count_semzcnt (struct sem_array * sma, ushort semnum)
  {
         int semzcnt;
         struct sem_queue * q;
  
         semzcnt = 0;
-       for (q = sma->sem_pending; q; q = q->next) {
+       list_for_each_entry(q, &sma->sem_pending, list) {
                 struct sembuf * sops = q->sops;
                 int nsops = q->nsops;
                 int i;
@@ -522,35 +504,41 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum)
         return semzcnt;
  }
  
+void free_un(struct rcu_head *head)
+{
+       struct sem_undo *un = container_of(head, struct sem_undo, rcu);
+       kfree(un);
+}
+
  /* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked
   * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex
   * remains locked on exit.
   */
  static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
  {
-       struct sem_undo *un;
-       struct sem_queue *q;
+       struct sem_undo *un, *tu;
+       struct sem_queue *q, *tq;
         struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
  
-       /* Invalidate the existing undo structures for this semaphore set.
-        * (They will be freed without any further action in exit_sem()
-        * or during the next semop.)
-        */
-       for (un = sma->undo; un; un = un->id_next)
+       /* Free the existing undo structures for this semaphore set.  */
+       assert_spin_locked(&sma->sem_perm.lock);
+       list_for_each_entry_safe(un, tu, &sma->list_id, list_id) {
+               list_del(&un->list_id);
+               spin_lock(&un->ulp->lock);
                 un->semid = -1;
+               list_del_rcu(&un->list_proc);
+               spin_unlock(&un->ulp->lock);
+               call_rcu(&un->rcu, free_un);
+       }
  
         /* Wake up all pending processes and let them fail with EIDRM. */
-       q = sma->sem_pending;
-       while(q) {
-               struct sem_queue *n;
-               /* lazy remove_from_queue: we are killing the whole queue */
-               q->prev = NULL;
-               n = q->next;
+       list_for_each_entry_safe(q, tq, &sma->sem_pending, list) {
+               list_del(&q->list);
+
                 q->status = IN_WAKEUP;
                 wake_up_process(q->sleeper); /* doesn't sleep */
                 smp_wmb();
                 q->status = -EIDRM;     /* hands-off q */
-               q = n;
         }
  
         /* Remove the semaphore set from the IDR */
@@ -763,9 +751,12 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
  
                 for (i = 0; i < nsems; i++)
                         sma->sem_base[i].semval = sem_io[i];
-               for (un = sma->undo; un; un = un->id_next)
+
+               assert_spin_locked(&sma->sem_perm.lock);
+               list_for_each_entry(un, &sma->list_id, list_id) {
                         for (i = 0; i < nsems; i++)
                                 un->semadj[i] = 0;
+               }
                 sma->sem_ctime = get_seconds();
                 /* maybe some queued-up processes were waiting for this */
                 update_queue(sma);
@@ -797,12 +788,15 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
         {
                 int val = arg.val;
                 struct sem_undo *un;
+
                 err = -ERANGE;
                 if (val > SEMVMX || val < 0)
                         goto out_unlock;
  
-               for (un = sma->undo; un; un = un->id_next)
+               assert_spin_locked(&sma->sem_perm.lock);
+               list_for_each_entry(un, &sma->list_id, list_id)
                         un->semadj[semnum] = 0;
+
                 curr->semval = val;
                 curr->sempid = task_tgid_vnr(current);
                 sma->sem_ctime = get_seconds();
@@ -952,6 +946,8 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp)
                         return -ENOMEM;
                 spin_lock_init(&undo_list->lock);
                 atomic_set(&undo_list->refcnt, 1);
+               INIT_LIST_HEAD(&undo_list->list_proc);
+
                 current->sysvsem.undo_list = undo_list;
         }
         *undo_listp = undo_list;
@@ -960,25 +956,27 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp)
  
  static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
  {
-       struct sem_undo **last, *un;
+       struct sem_undo *walk;
  
-       last = &ulp->proc_list;
-       un = *last;
-       while(un != NULL) {
-               if(un->semid==semid)
-                       break;
-               if(un->semid==-1) {
-                       *last=un->proc_next;
-                       kfree(un);
-               } else {
-                       last=&un->proc_next;
-               }
-               un=*last;
+       list_for_each_entry_rcu(walk, &ulp->list_proc, list_proc) {
+               if (walk->semid == semid)
+                       return walk;
         }
-       return un;
+       return NULL;
  }
  
-static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
+/**
+ * find_alloc_undo - Lookup (and if not present create) undo array
+ * @ns: namespace
+ * @semid: semaphore array id
+ *
+ * The function looks up (and if not present creates) the undo structure.
+ * The size of the undo structure depends on the size of the semaphore
+ * array, thus the alloc path is not that straightforward.
+ * Lifetime-rules: sem_undo is rcu-protected, on success, the function
+ * performs a rcu_read_lock().
+ */
+static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
  {
         struct sem_array *sma;
         struct sem_undo_list *ulp;
@@ -990,13 +988,16 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
         if (error)
                 return ERR_PTR(error);
  
+       rcu_read_lock();
         spin_lock(&ulp->lock);
         un = lookup_undo(ulp, semid);
         spin_unlock(&ulp->lock);
         if (likely(un!=NULL))
                 goto out;
+       rcu_read_unlock();
  
         /* no undo structure around - allocate one. */
+       /* step 1: figure out the size of the semaphore array */
         sma = sem_lock_check(ns, semid);
         if (IS_ERR(sma))
                 return ERR_PTR(PTR_ERR(sma));
@@ -1004,37 +1005,45 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
         nsems = sma->sem_nsems;
         sem_getref_and_unlock(sma);
  
+       /* step 2: allocate new undo structure */
         new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
         if (!new) {
                 sem_putref(sma);
                 return ERR_PTR(-ENOMEM);
         }
-       new->semadj = (short *) &new[1];
-       new->semid = semid;
  
-       spin_lock(&ulp->lock);
-       un = lookup_undo(ulp, semid);
-       if (un) {
-               spin_unlock(&ulp->lock);
-               kfree(new);
-               sem_putref(sma);
-               goto out;
-       }
+       /* step 3: Acquire the lock on semaphore array */
         sem_lock_and_putref(sma);
         if (sma->sem_perm.deleted) {
                 sem_unlock(sma);
-               spin_unlock(&ulp->lock);
                 kfree(new);
                 un = ERR_PTR(-EIDRM);
                 goto out;
         }
-       new->proc_next = ulp->proc_list;
-       ulp->proc_list = new;
-       new->id_next = sma->undo;
-       sma->undo = new;
-       sem_unlock(sma);
+       spin_lock(&ulp->lock);
+
+       /*
+        * step 4: check for races: did someone else allocate the undo struct?
+        */
+       un = lookup_undo(ulp, semid);
+       if (un) {
+               kfree(new);
+               goto success;
+       }
+       /* step 5: initialize & link new undo structure */
+       new->semadj = (short *) &new[1];
+       new->ulp = ulp;
+       new->semid = semid;
+       assert_spin_locked(&ulp->lock);
+       list_add_rcu(&new->list_proc, &ulp->list_proc);
+       assert_spin_locked(&sma->sem_perm.lock);
+       list_add(&new->list_id, &sma->list_id);
         un = new;
+
+success:
         spin_unlock(&ulp->lock);
+       rcu_read_lock();
+       sem_unlock(sma);
  out:
         return un;
  }
@@ -1090,9 +1099,8 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
                         alter = 1;
         }
  
-retry_undos:
         if (undos) {
-               un = find_undo(ns, semid);
+               un = find_alloc_undo(ns, semid);
                 if (IS_ERR(un)) {
                         error = PTR_ERR(un);
                         goto out_free;
@@ -1102,19 +1110,37 @@ retry_undos:
  
         sma = sem_lock_check(ns, semid);
         if (IS_ERR(sma)) {
+               if (un)
+                       rcu_read_unlock();
                 error = PTR_ERR(sma);
                 goto out_free;
         }
  
         /*
-        * semid identifiers are not unique - find_undo may have
+        * semid identifiers are not unique - find_alloc_undo may have
          * allocated an undo structure, it was invalidated by an RMID
-        * and now a new array with received the same id. Check and retry.
+        * and now a new array with received the same id. Check and fail.
+        * This case can be detected checking un->semid. The existance of
+        * "un" itself is guaranteed by rcu.
          */
-       if (un && un->semid == -1) {
-               sem_unlock(sma);
-               goto retry_undos;
+       error = -EIDRM;
+       if (un) {
+               if (un->semid == -1) {
+                       rcu_read_unlock();
+                       goto out_unlock_free;
+               } else {
+                       /*
+                        * rcu lock can be released, "un" cannot disappear:
+                        * - sem_lock is acquired, thus IPC_RMID is
+                        *   impossible.
+                        * - exit_sem is impossible, it always operates on
+                        *   current (or a dead task).
+                        */
+
+                       rcu_read_unlock();
+               }
         }
+
         error = -EFBIG;
         if (max >= sma->sem_nsems)
                 goto out_unlock_free;
@@ -1138,17 +1164,15 @@ retry_undos:
          * task into the pending queue and go to sleep.
          */
                 
-       queue.sma = sma;
         queue.sops = sops;
         queue.nsops = nsops;
         queue.undo = un;
         queue.pid = task_tgid_vnr(current);
-       queue.id = semid;
         queue.alter = alter;
         if (alter)
-               append_to_queue(sma ,&queue);
+               list_add_tail(&queue.list, &sma->sem_pending);
         else
-               prepend_to_queue(sma ,&queue);
+               list_add(&queue.list, &sma->sem_pending);
  
         queue.status = -EINTR;
         queue.sleeper = current;
@@ -1174,7 +1198,6 @@ retry_undos:
  
         sma = sem_lock(ns, semid);
         if (IS_ERR(sma)) {
-               BUG_ON(queue.prev != NULL);
                 error = -EIDRM;
                 goto out_free;
         }
@@ -1192,7 +1215,7 @@ retry_undos:
          */
         if (timeout && jiffies_left == 0)
                 error = -EAGAIN;
-       remove_from_queue(sma,&queue);
+       list_del(&queue.list);
         goto out_unlock_free;
  
  out_unlock_free:
@@ -1243,56 +1266,62 @@ int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
   */
  void exit_sem(struct task_struct *tsk)
  {
-       struct sem_undo_list *undo_list;
-       struct sem_undo *u, **up;
-       struct ipc_namespace *ns;
+       struct sem_undo_list *ulp;
  
-       undo_list = tsk->sysvsem.undo_list;
-       if (!undo_list)
+       ulp = tsk->sysvsem.undo_list;
+       if (!ulp)
                 return;
         tsk->sysvsem.undo_list = NULL;
  
-       if (!atomic_dec_and_test(&undo_list->refcnt))
+       if (!atomic_dec_and_test(&ulp->refcnt))
                 return;
  
-       ns = tsk->nsproxy->ipc_ns;
-       /* There's no need to hold the semundo list lock, as current
-         * is the last task exiting for this undo list.
-        */
-       for (up = &undo_list->proc_list; (u = *up); *up = u->proc_next, kfree(u)) {
+       for (;;) {
                 struct sem_array *sma;
-               int nsems, i;
-               struct sem_undo *un, **unp;
+               struct sem_undo *un;
                 int semid;
-              
-               semid = u->semid;
+               int i;
  
-               if(semid == -1)
-                       continue;
-               sma = sem_lock(ns, semid);
+               rcu_read_lock();
+               un = list_entry(rcu_dereference(ulp->list_proc.next),
+                                       struct sem_undo, list_proc);
+               if (&un->list_proc == &ulp->list_proc)
+                       semid = -1;
+                else
+                       semid = un->semid;
+               rcu_read_unlock();
+
+               if (semid == -1)
+                       break;
+
+               sma = sem_lock_check(tsk->nsproxy->ipc_ns, un->semid);
+
+               /* exit_sem raced with IPC_RMID, nothing to do */
                 if (IS_ERR(sma))
                         continue;
  
-               if (u->semid == -1)
-                       goto next_entry;
+               un = lookup_undo(ulp, semid);
+               if (un == NULL) {
+                       /* exit_sem raced with IPC_RMID+semget() that created
+                        * exactly the same semid. Nothing to do.
+                        */
+                       sem_unlock(sma);
+                       continue;
+               }
  
-               BUG_ON(sem_checkid(sma, u->semid));
+               /* remove un from the linked lists */
+               assert_spin_locked(&sma->sem_perm.lock);
+               list_del(&un->list_id);
  
-               /* remove u from the sma->undo list */
-               for (unp = &sma->undo; (un = *unp); unp = &un->id_next) {
-                       if (u == un)
-                               goto found;
-               }
-               printk ("exit_sem undo list error id=%d\n", u->semid);
-               goto next_entry;
-found:
-               *unp = un->id_next;
-               /* perform adjustments registered in u */
-               nsems = sma->sem_nsems;
-               for (i = 0; i < nsems; i++) {
+               spin_lock(&ulp->lock);
+               list_del_rcu(&un->list_proc);
+               spin_unlock(&ulp->lock);
+
+               /* perform adjustments registered in un */
+               for (i = 0; i < sma->sem_nsems; i++) {
                         struct sem * semaphore = &sma->sem_base[i];
-                       if (u->semadj[i]) {
-                               semaphore->semval += u->semadj[i];
+                       if (un->semadj[i]) {
+                               semaphore->semval += un->semadj[i];
                                 /*
                                  * Range checks of the new semaphore value,
                                  * not defined by sus:
@@ -1316,10 +1345,11 @@ found:
                 sma->sem_otime = get_seconds();
                 /* maybe some queued-up processes were waiting for this */
                 update_queue(sma);
-next_entry:
                 sem_unlock(sma);
+
+               call_rcu(&un->rcu, free_un);
         }
-       kfree(undo_list);
+       kfree(ulp);
  }
  
  #ifdef CONFIG_PROC_FS
diff --git a/ipc/shm.c b/ipc/shm.c

index a726aebce7d7f9c679f468343aaf62a4354d805c..e77ec698cf408c2344d302ac693814d44d411180 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -111,24 +111,9 @@ void __init shm_init (void)
                                 IPC_SHM_IDS, sysvipc_shm_proc_show);
  }
  
-/*
- * shm_lock_(check_)down routines are called in the paths where the rw_mutex
- * is held to protect access to the idr tree.
- */
-static inline struct shmid_kernel *shm_lock_down(struct ipc_namespace *ns,
-                                               int id)
-{
-       struct kern_ipc_perm *ipcp = ipc_lock_down(&shm_ids(ns), id);
-
-       if (IS_ERR(ipcp))
-               return (struct shmid_kernel *)ipcp;
-
-       return container_of(ipcp, struct shmid_kernel, shm_perm);
-}
-
  /*
   * shm_lock_(check_) routines are called in the paths where the rw_mutex
- * is not held.
+ * is not necessarily held.
   */
  static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
  {
@@ -211,7 +196,7 @@ static void shm_close(struct vm_area_struct *vma)
  
         down_write(&shm_ids(ns).rw_mutex);
         /* remove from the list of attaches of the shm segment */
-       shp = shm_lock_down(ns, sfd->id);
+       shp = shm_lock(ns, sfd->id);
         BUG_ON(IS_ERR(shp));
         shp->shm_lprid = task_tgid_vnr(current);
         shp->shm_dtim = get_seconds();
@@ -932,7 +917,7 @@ invalid:
  
  out_nattch:
         down_write(&shm_ids(ns).rw_mutex);
-       shp = shm_lock_down(ns, shmid);
+       shp = shm_lock(ns, shmid);
         BUG_ON(IS_ERR(shp));
         shp->shm_nattch--;
         if(shp->shm_nattch == 0 &&
diff --git a/ipc/util.c b/ipc/util.c

index 3339177b336cae51c0b5a8085ad9a087891bb0b2..49b3ea615dc5fa991d921667f44172da02331a55 100644 (file)
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -688,10 +688,6 @@ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out)
   * Look for an id in the ipc ids idr and lock the associated ipc object.
   *
   * The ipc object is locked on exit.
- *
- * This is the routine that should be called when the rw_mutex is not already
- * held, i.e. idr tree not protected: it protects the idr tree in read mode
- * during the idr_find().
   */
  
  struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
@@ -699,18 +695,13 @@ struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
         struct kern_ipc_perm *out;
         int lid = ipcid_to_idx(id);
  
-       down_read(&ids->rw_mutex);
-
         rcu_read_lock();
         out = idr_find(&ids->ipcs_idr, lid);
         if (out == NULL) {
                 rcu_read_unlock();
-               up_read(&ids->rw_mutex);
                 return ERR_PTR(-EINVAL);
         }
  
-       up_read(&ids->rw_mutex);
-
         spin_lock(&out->lock);
         
         /* ipc_rmid() may have already freed the ID while ipc_lock
@@ -725,56 +716,6 @@ struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
         return out;
  }
  
-/**
- * ipc_lock_down - Lock an ipc structure with rw_sem held
- * @ids: IPC identifier set
- * @id: ipc id to look for
- *
- * Look for an id in the ipc ids idr and lock the associated ipc object.
- *
- * The ipc object is locked on exit.
- *
- * This is the routine that should be called when the rw_mutex is already
- * held, i.e. idr tree protected.
- */
-
-struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *ids, int id)
-{
-       struct kern_ipc_perm *out;
-       int lid = ipcid_to_idx(id);
-
-       rcu_read_lock();
-       out = idr_find(&ids->ipcs_idr, lid);
-       if (out == NULL) {
-               rcu_read_unlock();
-               return ERR_PTR(-EINVAL);
-       }
-
-       spin_lock(&out->lock);
-
-       /*
-        * No need to verify that the structure is still valid since the
-        * rw_mutex is held.
-        */
-       return out;
-}
-
-struct kern_ipc_perm *ipc_lock_check_down(struct ipc_ids *ids, int id)
-{
-       struct kern_ipc_perm *out;
-
-       out = ipc_lock_down(ids, id);
-       if (IS_ERR(out))
-               return out;
-
-       if (ipc_checkid(out, id)) {
-               ipc_unlock(out);
-               return ERR_PTR(-EIDRM);
-       }
-
-       return out;
-}
-
  struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id)
  {
         struct kern_ipc_perm *out;
@@ -846,7 +787,7 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_ids *ids, int id, int cmd,
         int err;
  
         down_write(&ids->rw_mutex);
-       ipcp = ipc_lock_check_down(ids, id);
+       ipcp = ipc_lock_check(ids, id);
         if (IS_ERR(ipcp)) {
                 err = PTR_ERR(ipcp);
                 goto out_up;
diff --git a/ipc/util.h b/ipc/util.h

index cdb966aebe0716e8375617431d5734ed45aa639f..3646b45a03c9366d0c1e9b25c950e0edcbaedeef 100644 (file)
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -102,11 +102,6 @@ void* ipc_rcu_alloc(int size);
  void ipc_rcu_getref(void *ptr);
  void ipc_rcu_putref(void *ptr);
  
-/*
- * ipc_lock_down: called with rw_mutex held
- * ipc_lock: called without that lock held
- */
-struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *, int);
  struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
  
  void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
@@ -155,7 +150,6 @@ static inline void ipc_unlock(struct kern_ipc_perm *perm)
         rcu_read_unlock();
  }
  
-struct kern_ipc_perm *ipc_lock_check_down(struct ipc_ids *ids, int id);
  struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id);
  int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
                         struct ipc_ops *ops, struct ipc_params *params);
diff --git a/kernel/Makefile b/kernel/Makefile

index 15ab63ffe64d31634d59e1caeff8909043827e97..54f69837d35a3fa26bfa94c798d9b48332d2d3ac 100644 (file)
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -2,7 +2,7 @@
  # Makefile for the linux kernel.
  #
  
-obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
+obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
             cpu.o exit.o itimer.o time.o softirq.o resource.o \
             sysctl.o capability.o ptrace.o timer.o user.o \
             signal.o sys.o kmod.o workqueue.o pid.o \
@@ -24,6 +24,7 @@ CFLAGS_REMOVE_sched_clock.o = -pg
  CFLAGS_REMOVE_sched.o = -mno-spe -pg
  endif
  
+obj-$(CONFIG_PROFILING) += profile.o
  obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
  obj-$(CONFIG_STACKTRACE) += stacktrace.o
  obj-y += time/
diff --git a/kernel/acct.c b/kernel/acct.c

index 91e1cfd734d286f922597108e54065ef691d31b2..dd68b905941818df3253e6f60558a49b9fbf2ef4 100644 (file)
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -75,37 +75,39 @@ int acct_parm[3] = {4, 2, 30};
  /*
   * External references and all of the globals.
   */
-static void do_acct_process(struct pid_namespace *ns, struct file *);
+static void do_acct_process(struct bsd_acct_struct *acct,
+               struct pid_namespace *ns, struct file *);
  
  /*
   * This structure is used so that all the data protected by lock
   * can be placed in the same cache line as the lock.  This primes
   * the cache line to have the data after getting the lock.
   */
-struct acct_glbs {
-       spinlock_t              lock;
+struct bsd_acct_struct {
         volatile int            active;
         volatile int            needcheck;
         struct file             *file;
         struct pid_namespace    *ns;
         struct timer_list       timer;
+       struct list_head        list;
  };
  
-static struct acct_glbs acct_globals __cacheline_aligned =
-       {__SPIN_LOCK_UNLOCKED(acct_globals.lock)};
+static DEFINE_SPINLOCK(acct_lock);
+static LIST_HEAD(acct_list);
  
  /*
   * Called whenever the timer says to check the free space.
   */
-static void acct_timeout(unsigned long unused)
+static void acct_timeout(unsigned long x)
  {
-       acct_globals.needcheck = 1;
+       struct bsd_acct_struct *acct = (struct bsd_acct_struct *)x;
+       acct->needcheck = 1;
  }
  
  /*
   * Check the amount of free space and suspend/resume accordingly.
   */
-static int check_free_space(struct file *file)
+static int check_free_space(struct bsd_acct_struct *acct, struct file *file)
  {
         struct kstatfs sbuf;
         int res;
@@ -113,11 +115,11 @@ static int check_free_space(struct file *file)
         sector_t resume;
         sector_t suspend;
  
-       spin_lock(&acct_globals.lock);
-       res = acct_globals.active;
-       if (!file || !acct_globals.needcheck)
+       spin_lock(&acct_lock);
+       res = acct->active;
+       if (!file || !acct->needcheck)
                 goto out;
-       spin_unlock(&acct_globals.lock);
+       spin_unlock(&acct_lock);
  
         /* May block */
         if (vfs_statfs(file->f_path.dentry, &sbuf))
@@ -136,35 +138,35 @@ static int check_free_space(struct file *file)
                 act = 0;
  
         /*
-        * If some joker switched acct_globals.file under us we'ld better be
+        * If some joker switched acct->file under us we'ld better be
          * silent and _not_ touch anything.
          */
-       spin_lock(&acct_globals.lock);
-       if (file != acct_globals.file) {
+       spin_lock(&acct_lock);
+       if (file != acct->file) {
                 if (act)
                         res = act>0;
                 goto out;
         }
  
-       if (acct_globals.active) {
+       if (acct->active) {
                 if (act < 0) {
-                       acct_globals.active = 0;
+                       acct->active = 0;
                         printk(KERN_INFO "Process accounting paused\n");
                 }
         } else {
                 if (act > 0) {
-                       acct_globals.active = 1;
+                       acct->active = 1;
                         printk(KERN_INFO "Process accounting resumed\n");
                 }
         }
  
-       del_timer(&acct_globals.timer);
-       acct_globals.needcheck = 0;
-       acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
-       add_timer(&acct_globals.timer);
-       res = acct_globals.active;
+       del_timer(&acct->timer);
+       acct->needcheck = 0;
+       acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ;
+       add_timer(&acct->timer);
+       res = acct->active;
  out:
-       spin_unlock(&acct_globals.lock);
+       spin_unlock(&acct_lock);
         return res;
  }
  
@@ -172,39 +174,41 @@ out:
   * Close the old accounting file (if currently open) and then replace
   * it with file (if non-NULL).
   *
- * NOTE: acct_globals.lock MUST be held on entry and exit.
+ * NOTE: acct_lock MUST be held on entry and exit.
   */
-static void acct_file_reopen(struct file *file)
+static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
+               struct pid_namespace *ns)
  {
         struct file *old_acct = NULL;
         struct pid_namespace *old_ns = NULL;
  
-       if (acct_globals.file) {
-               old_acct = acct_globals.file;
-               old_ns = acct_globals.ns;
-               del_timer(&acct_globals.timer);
-               acct_globals.active = 0;
-               acct_globals.needcheck = 0;
-               acct_globals.file = NULL;
+       if (acct->file) {
+               old_acct = acct->file;
+               old_ns = acct->ns;
+               del_timer(&acct->timer);
+               acct->active = 0;
+               acct->needcheck = 0;
+               acct->file = NULL;
+               acct->ns = NULL;
+               list_del(&acct->list);
         }
         if (file) {
-               acct_globals.file = file;
-               acct_globals.ns = get_pid_ns(task_active_pid_ns(current));
-               acct_globals.needcheck = 0;
-               acct_globals.active = 1;
+               acct->file = file;
+               acct->ns = ns;
+               acct->needcheck = 0;
+               acct->active = 1;
+               list_add(&acct->list, &acct_list);
                 /* It's been deleted if it was used before so this is safe */
-               init_timer(&acct_globals.timer);
-               acct_globals.timer.function = acct_timeout;
-               acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
-               add_timer(&acct_globals.timer);
+               setup_timer(&acct->timer, acct_timeout, (unsigned long)acct);
+               acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ;
+               add_timer(&acct->timer);
         }
         if (old_acct) {
                 mnt_unpin(old_acct->f_path.mnt);
-               spin_unlock(&acct_globals.lock);
-               do_acct_process(old_ns, old_acct);
+               spin_unlock(&acct_lock);
+               do_acct_process(acct, old_ns, old_acct);
                 filp_close(old_acct, NULL);
-               put_pid_ns(old_ns);
-               spin_lock(&acct_globals.lock);
+               spin_lock(&acct_lock);
         }
  }
  
@@ -212,6 +216,8 @@ static int acct_on(char *name)
  {
         struct file *file;
         int error;
+       struct pid_namespace *ns;
+       struct bsd_acct_struct *acct = NULL;
  
         /* Difference from BSD - they don't do O_APPEND */
         file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
@@ -228,18 +234,34 @@ static int acct_on(char *name)
                 return -EIO;
         }
  
+       ns = task_active_pid_ns(current);
+       if (ns->bacct == NULL) {
+               acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
+               if (acct == NULL) {
+                       filp_close(file, NULL);
+                       return -ENOMEM;
+               }
+       }
+
         error = security_acct(file);
         if (error) {
+               kfree(acct);
                 filp_close(file, NULL);
                 return error;
         }
  
-       spin_lock(&acct_globals.lock);
+       spin_lock(&acct_lock);
+       if (ns->bacct == NULL) {
+               ns->bacct = acct;
+               acct = NULL;
+       }
+
         mnt_pin(file->f_path.mnt);
-       acct_file_reopen(file);
-       spin_unlock(&acct_globals.lock);
+       acct_file_reopen(ns->bacct, file, ns);
+       spin_unlock(&acct_lock);
  
         mntput(file->f_path.mnt); /* it's pinned, now give up active reference */
+       kfree(acct);
  
         return 0;
  }
@@ -269,11 +291,17 @@ asmlinkage long sys_acct(const char __user *name)
                 error = acct_on(tmp);
                 putname(tmp);
         } else {
+               struct bsd_acct_struct *acct;
+
+               acct = task_active_pid_ns(current)->bacct;
+               if (acct == NULL)
+                       return 0;
+
                 error = security_acct(NULL);
                 if (!error) {
-                       spin_lock(&acct_globals.lock);
-                       acct_file_reopen(NULL);
-                       spin_unlock(&acct_globals.lock);
+                       spin_lock(&acct_lock);
+                       acct_file_reopen(acct, NULL, NULL);
+                       spin_unlock(&acct_lock);
                 }
         }
         return error;
@@ -288,10 +316,16 @@ asmlinkage long sys_acct(const char __user *name)
   */
  void acct_auto_close_mnt(struct vfsmount *m)
  {
-       spin_lock(&acct_globals.lock);
-       if (acct_globals.file && acct_globals.file->f_path.mnt == m)
-               acct_file_reopen(NULL);
-       spin_unlock(&acct_globals.lock);
+       struct bsd_acct_struct *acct;
+
+       spin_lock(&acct_lock);
+restart:
+       list_for_each_entry(acct, &acct_list, list)
+               if (acct->file && acct->file->f_path.mnt == m) {
+                       acct_file_reopen(acct, NULL, NULL);
+                       goto restart;
+               }
+       spin_unlock(&acct_lock);
  }
  
  /**
@@ -303,12 +337,31 @@ void acct_auto_close_mnt(struct vfsmount *m)
   */
  void acct_auto_close(struct super_block *sb)
  {
-       spin_lock(&acct_globals.lock);
-       if (acct_globals.file &&
-           acct_globals.file->f_path.mnt->mnt_sb == sb) {
-               acct_file_reopen(NULL);
+       struct bsd_acct_struct *acct;
+
+       spin_lock(&acct_lock);
+restart:
+       list_for_each_entry(acct, &acct_list, list)
+               if (acct->file && acct->file->f_path.mnt->mnt_sb == sb) {
+                       acct_file_reopen(acct, NULL, NULL);
+                       goto restart;
+               }
+       spin_unlock(&acct_lock);
+}
+
+void acct_exit_ns(struct pid_namespace *ns)
+{
+       struct bsd_acct_struct *acct;
+
+       spin_lock(&acct_lock);
+       acct = ns->bacct;
+       if (acct != NULL) {
+               if (acct->file != NULL)
+                       acct_file_reopen(acct, NULL, NULL);
+
+               kfree(acct);
         }
-       spin_unlock(&acct_globals.lock);
+       spin_unlock(&acct_lock);
  }
  
  /*
@@ -425,7 +478,8 @@ static u32 encode_float(u64 value)
  /*
   *  do_acct_process does all actual work. Caller holds the reference to file.
   */
-static void do_acct_process(struct pid_namespace *ns, struct file *file)
+static void do_acct_process(struct bsd_acct_struct *acct,
+               struct pid_namespace *ns, struct file *file)
  {
         struct pacct_struct *pacct = &current->signal->pacct;
         acct_t ac;
@@ -440,7 +494,7 @@ static void do_acct_process(struct pid_namespace *ns, struct file *file)
          * First check to see if there is enough free_space to continue
          * the process accounting system.
          */
-       if (!check_free_space(file))
+       if (!check_free_space(acct, file))
                 return;
  
         /*
@@ -577,34 +631,46 @@ void acct_collect(long exitcode, int group_dead)
         spin_unlock_irq(&current->sighand->siglock);
  }
  
-/**
- * acct_process - now just a wrapper around do_acct_process
- * @exitcode: task exit code
- *
- * handles process accounting for an exiting task
- */
-void acct_process(void)
+static void acct_process_in_ns(struct pid_namespace *ns)
  {
         struct file *file = NULL;
-       struct pid_namespace *ns;
+       struct bsd_acct_struct *acct;
  
+       acct = ns->bacct;
         /*
          * accelerate the common fastpath:
          */
-       if (!acct_globals.file)
+       if (!acct || !acct->file)
                 return;
  
-       spin_lock(&acct_globals.lock);
-       file = acct_globals.file;
+       spin_lock(&acct_lock);
+       file = acct->file;
         if (unlikely(!file)) {
-               spin_unlock(&acct_globals.lock);
+               spin_unlock(&acct_lock);
                 return;
         }
         get_file(file);
-       ns = get_pid_ns(acct_globals.ns);
-       spin_unlock(&acct_globals.lock);
+       spin_unlock(&acct_lock);
  
-       do_acct_process(ns, file);
+       do_acct_process(acct, ns, file);
         fput(file);
-       put_pid_ns(ns);
+}
+
+/**
+ * acct_process - now just a wrapper around acct_process_in_ns,
+ * which in turn is a wrapper around do_acct_process.
+ *
+ * handles process accounting for an exiting task
+ */
+void acct_process(void)
+{
+       struct pid_namespace *ns;
+
+       /*
+        * This loop is safe lockless, since current is still
+        * alive and holds its namespace, which in turn holds
+        * its parent.
+        */
+       for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent)
+               acct_process_in_ns(ns);
  }
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index 15ac0e1e4f4de9dde14a6fe17e9b3d6e059a4baf..66ec9fd21e0c8206b1f89e16119b513fdc3a8896 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -89,11 +89,7 @@ struct cgroupfs_root {
         /* Hierarchy-specific flags */
         unsigned long flags;
  
-       /* The path to use for release notifications. No locking
-        * between setting and use - so if userspace updates this
-        * while child cgroups exist, you could miss a
-        * notification. We ensure that it's always a valid
-        * NUL-terminated string */
+       /* The path to use for release notifications. */
         char release_agent_path[PATH_MAX];
  };
  
@@ -118,7 +114,7 @@ static int root_count;
   * extra work in the fork/exit path if none of the subsystems need to
   * be called.
   */
-static int need_forkexit_callback;
+static int need_forkexit_callback __read_mostly;
  static int need_mm_owner_callback __read_mostly;
  
  /* convenient tests for these bits */
@@ -220,7 +216,7 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
   * task until after the first call to cgroup_iter_start(). This
   * reduces the fork()/exit() overhead for people who have cgroups
   * compiled into their kernel but not actually in use */
-static int use_task_css_set_links;
+static int use_task_css_set_links __read_mostly;
  
  /* When we create or destroy a css_set, the operation simply
   * takes/releases a reference count on all the cgroups referenced
@@ -241,17 +237,20 @@ static int use_task_css_set_links;
   */
  static void unlink_css_set(struct css_set *cg)
  {
+       struct cg_cgroup_link *link;
+       struct cg_cgroup_link *saved_link;
+
         write_lock(&css_set_lock);
         hlist_del(&cg->hlist);
         css_set_count--;
-       while (!list_empty(&cg->cg_links)) {
-               struct cg_cgroup_link *link;
-               link = list_entry(cg->cg_links.next,
-                                 struct cg_cgroup_link, cg_link_list);
+
+       list_for_each_entry_safe(link, saved_link, &cg->cg_links,
+                                cg_link_list) {
                 list_del(&link->cg_link_list);
                 list_del(&link->cgrp_link_list);
                 kfree(link);
         }
+
         write_unlock(&css_set_lock);
  }
  
@@ -363,15 +362,14 @@ static struct css_set *find_existing_css_set(
  static int allocate_cg_links(int count, struct list_head *tmp)
  {
         struct cg_cgroup_link *link;
+       struct cg_cgroup_link *saved_link;
         int i;
         INIT_LIST_HEAD(tmp);
         for (i = 0; i < count; i++) {
                 link = kmalloc(sizeof(*link), GFP_KERNEL);
                 if (!link) {
-                       while (!list_empty(tmp)) {
-                               link = list_entry(tmp->next,
-                                                 struct cg_cgroup_link,
-                                                 cgrp_link_list);
+                       list_for_each_entry_safe(link, saved_link, tmp,
+                                                cgrp_link_list) {
                                 list_del(&link->cgrp_link_list);
                                 kfree(link);
                         }
@@ -384,11 +382,10 @@ static int allocate_cg_links(int count, struct list_head *tmp)
  
  static void free_cg_links(struct list_head *tmp)
  {
-       while (!list_empty(tmp)) {
-               struct cg_cgroup_link *link;
-               link = list_entry(tmp->next,
-                                 struct cg_cgroup_link,
-                                 cgrp_link_list);
+       struct cg_cgroup_link *link;
+       struct cg_cgroup_link *saved_link;
+
+       list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
                 list_del(&link->cgrp_link_list);
                 kfree(link);
         }
@@ -415,11 +412,11 @@ static struct css_set *find_css_set(
  
         /* First see if we already have a cgroup group that matches
          * the desired set */
-       write_lock(&css_set_lock);
+       read_lock(&css_set_lock);
         res = find_existing_css_set(oldcg, cgrp, template);
         if (res)
                 get_css_set(res);
-       write_unlock(&css_set_lock);
+       read_unlock(&css_set_lock);
  
         if (res)
                 return res;
@@ -507,10 +504,6 @@ static struct css_set *find_css_set(
   * knows that the cgroup won't be removed, as cgroup_rmdir()
   * needs that mutex.
   *
- * The cgroup_common_file_write handler for operations that modify
- * the cgroup hierarchy holds cgroup_mutex across the entire operation,
- * single threading all such cgroup modifications across the system.
- *
   * The fork and exit callbacks cgroup_fork() and cgroup_exit(), don't
   * (usually) take cgroup_mutex.  These are the two most performance
   * critical pieces of code here.  The exception occurs on cgroup_exit(),
@@ -1093,6 +1086,8 @@ static void cgroup_kill_sb(struct super_block *sb) {
         struct cgroupfs_root *root = sb->s_fs_info;
         struct cgroup *cgrp = &root->top_cgroup;
         int ret;
+       struct cg_cgroup_link *link;
+       struct cg_cgroup_link *saved_link;
  
         BUG_ON(!root);
  
@@ -1112,10 +1107,9 @@ static void cgroup_kill_sb(struct super_block *sb) {
          * root cgroup
          */
         write_lock(&css_set_lock);
-       while (!list_empty(&cgrp->css_sets)) {
-               struct cg_cgroup_link *link;
-               link = list_entry(cgrp->css_sets.next,
-                                 struct cg_cgroup_link, cgrp_link_list);
+
+       list_for_each_entry_safe(link, saved_link, &cgrp->css_sets,
+                                cgrp_link_list) {
                 list_del(&link->cg_link_list);
                 list_del(&link->cgrp_link_list);
                 kfree(link);
@@ -1281,18 +1275,14 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
  }
  
  /*
- * Attach task with pid 'pid' to cgroup 'cgrp'. Call with
- * cgroup_mutex, may take task_lock of task
+ * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex
+ * held. May take task_lock of task
   */
-static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf)
+static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
  {
-       pid_t pid;
         struct task_struct *tsk;
         int ret;
  
-       if (sscanf(pidbuf, "%d", &pid) != 1)
-               return -EIO;
-
         if (pid) {
                 rcu_read_lock();
                 tsk = find_task_by_vpid(pid);
@@ -1318,6 +1308,16 @@ static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf)
         return ret;
  }
  
+static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
+{
+       int ret;
+       if (!cgroup_lock_live_group(cgrp))
+               return -ENODEV;
+       ret = attach_task_by_pid(cgrp, pid);
+       cgroup_unlock();
+       return ret;
+}
+
  /* The various types of files and directories in a cgroup file system */
  enum cgroup_filetype {
         FILE_ROOT,
@@ -1327,12 +1327,54 @@ enum cgroup_filetype {
         FILE_RELEASE_AGENT,
  };
  
+/**
+ * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
+ * @cgrp: the cgroup to be checked for liveness
+ *
+ * On success, returns true; the lock should be later released with
+ * cgroup_unlock(). On failure returns false with no lock held.
+ */
+bool cgroup_lock_live_group(struct cgroup *cgrp)
+{
+       mutex_lock(&cgroup_mutex);
+       if (cgroup_is_removed(cgrp)) {
+               mutex_unlock(&cgroup_mutex);
+               return false;
+       }
+       return true;
+}
+
+static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
+                                     const char *buffer)
+{
+       BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
+       if (!cgroup_lock_live_group(cgrp))
+               return -ENODEV;
+       strcpy(cgrp->root->release_agent_path, buffer);
+       cgroup_unlock();
+       return 0;
+}
+
+static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
+                                    struct seq_file *seq)
+{
+       if (!cgroup_lock_live_group(cgrp))
+               return -ENODEV;
+       seq_puts(seq, cgrp->root->release_agent_path);
+       seq_putc(seq, '\n');
+       cgroup_unlock();
+       return 0;
+}
+
+/* A buffer size big enough for numbers or short strings */
+#define CGROUP_LOCAL_BUFFER_SIZE 64
+
  static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
                                 struct file *file,
                                 const char __user *userbuf,
                                 size_t nbytes, loff_t *unused_ppos)
  {
-       char buffer[64];
+       char buffer[CGROUP_LOCAL_BUFFER_SIZE];
         int retval = 0;
         char *end;
  
@@ -1361,68 +1403,36 @@ static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
         return retval;
  }
  
-static ssize_t cgroup_common_file_write(struct cgroup *cgrp,
-                                          struct cftype *cft,
-                                          struct file *file,
-                                          const char __user *userbuf,
-                                          size_t nbytes, loff_t *unused_ppos)
+static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
+                                  struct file *file,
+                                  const char __user *userbuf,
+                                  size_t nbytes, loff_t *unused_ppos)
  {
-       enum cgroup_filetype type = cft->private;
-       char *buffer;
+       char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
         int retval = 0;
+       size_t max_bytes = cft->max_write_len;
+       char *buffer = local_buffer;
  
-       if (nbytes >= PATH_MAX)
+       if (!max_bytes)
+               max_bytes = sizeof(local_buffer) - 1;
+       if (nbytes >= max_bytes)
                 return -E2BIG;
-
-       /* +1 for nul-terminator */
-       buffer = kmalloc(nbytes + 1, GFP_KERNEL);
-       if (buffer == NULL)
-               return -ENOMEM;
-
-       if (copy_from_user(buffer, userbuf, nbytes)) {
-               retval = -EFAULT;
-               goto out1;
+       /* Allocate a dynamic buffer if we need one */
+       if (nbytes >= sizeof(local_buffer)) {
+               buffer = kmalloc(nbytes + 1, GFP_KERNEL);
+               if (buffer == NULL)
+                       return -ENOMEM;
         }
-       buffer[nbytes] = 0;     /* nul-terminate */
-       strstrip(buffer);       /* strip -just- trailing whitespace */
-
-       mutex_lock(&cgroup_mutex);
+       if (nbytes && copy_from_user(buffer, userbuf, nbytes))
+               return -EFAULT;
  
-       /*
-        * This was already checked for in cgroup_file_write(), but
-        * check again now we're holding cgroup_mutex.
-        */
-       if (cgroup_is_removed(cgrp)) {
-               retval = -ENODEV;
-               goto out2;
-       }
-
-       switch (type) {
-       case FILE_TASKLIST:
-               retval = attach_task_by_pid(cgrp, buffer);
-               break;
-       case FILE_NOTIFY_ON_RELEASE:
-               clear_bit(CGRP_RELEASABLE, &cgrp->flags);
-               if (simple_strtoul(buffer, NULL, 10) != 0)
-                       set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
-               else
-                       clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
-               break;
-       case FILE_RELEASE_AGENT:
-               BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
-               strcpy(cgrp->root->release_agent_path, buffer);
-               break;
-       default:
-               retval = -EINVAL;
-               goto out2;
-       }
-
-       if (retval == 0)
+       buffer[nbytes] = 0;     /* nul-terminate */
+       strstrip(buffer);
+       retval = cft->write_string(cgrp, cft, buffer);
+       if (!retval)
                 retval = nbytes;
-out2:
-       mutex_unlock(&cgroup_mutex);
-out1:
-       kfree(buffer);
+       if (buffer != local_buffer)
+               kfree(buffer);
         return retval;
  }
  
@@ -1438,6 +1448,8 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
                 return cft->write(cgrp, cft, file, buf, nbytes, ppos);
         if (cft->write_u64 || cft->write_s64)
                 return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
+       if (cft->write_string)
+               return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
         if (cft->trigger) {
                 int ret = cft->trigger(cgrp, (unsigned int)cft->private);
                 return ret ? ret : nbytes;
@@ -1450,7 +1462,7 @@ static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
                                char __user *buf, size_t nbytes,
                                loff_t *ppos)
  {
-       char tmp[64];
+       char tmp[CGROUP_LOCAL_BUFFER_SIZE];
         u64 val = cft->read_u64(cgrp, cft);
         int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
  
@@ -1462,56 +1474,13 @@ static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
                                char __user *buf, size_t nbytes,
                                loff_t *ppos)
  {
-       char tmp[64];
+       char tmp[CGROUP_LOCAL_BUFFER_SIZE];
         s64 val = cft->read_s64(cgrp, cft);
         int len = sprintf(tmp, "%lld\n", (long long) val);
  
         return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
  }
  
-static ssize_t cgroup_common_file_read(struct cgroup *cgrp,
-                                         struct cftype *cft,
-                                         struct file *file,
-                                         char __user *buf,
-                                         size_t nbytes, loff_t *ppos)
-{
-       enum cgroup_filetype type = cft->private;
-       char *page;
-       ssize_t retval = 0;
-       char *s;
-
-       if (!(page = (char *)__get_free_page(GFP_KERNEL)))
-               return -ENOMEM;
-
-       s = page;
-
-       switch (type) {
-       case FILE_RELEASE_AGENT:
-       {
-               struct cgroupfs_root *root;
-               size_t n;
-               mutex_lock(&cgroup_mutex);
-               root = cgrp->root;
-               n = strnlen(root->release_agent_path,
-                           sizeof(root->release_agent_path));
-               n = min(n, (size_t) PAGE_SIZE);
-               strncpy(s, root->release_agent_path, n);
-               mutex_unlock(&cgroup_mutex);
-               s += n;
-               break;
-       }
-       default:
-               retval = -EINVAL;
-               goto out;
-       }
-       *s++ = '\n';
-
-       retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);
-out:
-       free_page((unsigned long)page);
-       return retval;
-}
-
  static ssize_t cgroup_file_read(struct file *file, char __user *buf,
                                    size_t nbytes, loff_t *ppos)
  {
@@ -1569,6 +1538,7 @@ int cgroup_seqfile_release(struct inode *inode, struct file *file)
  
  static struct file_operations cgroup_seqfile_operations = {
         .read = seq_read,
+       .write = cgroup_file_write,
         .llseek = seq_lseek,
         .release = cgroup_seqfile_release,
  };
@@ -1756,15 +1726,11 @@ int cgroup_add_files(struct cgroup *cgrp,
  int cgroup_task_count(const struct cgroup *cgrp)
  {
         int count = 0;
-       struct list_head *l;
+       struct cg_cgroup_link *link;
  
         read_lock(&css_set_lock);
-       l = cgrp->css_sets.next;
-       while (l != &cgrp->css_sets) {
-               struct cg_cgroup_link *link =
-                       list_entry(l, struct cg_cgroup_link, cgrp_link_list);
+       list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
                 count += atomic_read(&link->cg->ref.refcount);
-               l = l->next;
         }
         read_unlock(&css_set_lock);
         return count;
@@ -2227,6 +2193,18 @@ static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
         return notify_on_release(cgrp);
  }
  
+static int cgroup_write_notify_on_release(struct cgroup *cgrp,
+                                         struct cftype *cft,
+                                         u64 val)
+{
+       clear_bit(CGRP_RELEASABLE, &cgrp->flags);
+       if (val)
+               set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+       else
+               clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+       return 0;
+}
+
  /*
   * for the common functions, 'private' gives the type of file
   */
@@ -2235,7 +2213,7 @@ static struct cftype files[] = {
                 .name = "tasks",
                 .open = cgroup_tasks_open,
                 .read = cgroup_tasks_read,
-               .write = cgroup_common_file_write,
+               .write_u64 = cgroup_tasks_write,
                 .release = cgroup_tasks_release,
                 .private = FILE_TASKLIST,
         },
@@ -2243,15 +2221,16 @@ static struct cftype files[] = {
         {
                 .name = "notify_on_release",
                 .read_u64 = cgroup_read_notify_on_release,
-               .write = cgroup_common_file_write,
+               .write_u64 = cgroup_write_notify_on_release,
                 .private = FILE_NOTIFY_ON_RELEASE,
         },
  };
  
  static struct cftype cft_release_agent = {
         .name = "release_agent",
-       .read = cgroup_common_file_read,
-       .write = cgroup_common_file_write,
+       .read_seq_string = cgroup_release_agent_show,
+       .write_string = cgroup_release_agent_write,
+       .max_write_len = PATH_MAX,
         .private = FILE_RELEASE_AGENT,
  };
  
@@ -2869,16 +2848,17 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
   * cgroup_clone - clone the cgroup the given subsystem is attached to
   * @tsk: the task to be moved
   * @subsys: the given subsystem
+ * @nodename: the name for the new cgroup
   *
   * Duplicate the current cgroup in the hierarchy that the given
   * subsystem is attached to, and move this task into the new
   * child.
   */
-int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
+int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
+                                                       char *nodename)
  {
         struct dentry *dentry;
         int ret = 0;
-       char nodename[MAX_CGROUP_TYPE_NAMELEN];
         struct cgroup *parent, *child;
         struct inode *inode;
         struct css_set *cg;
@@ -2903,8 +2883,6 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
         cg = tsk->cgroups;
         parent = task_cgroup(tsk, subsys->subsys_id);
  
-       snprintf(nodename, MAX_CGROUP_TYPE_NAMELEN, "%d", tsk->pid);
-
         /* Pin the hierarchy */
         atomic_inc(&parent->root->sb->s_active);
  
@@ -3078,27 +3056,24 @@ static void cgroup_release_agent(struct work_struct *work)
         while (!list_empty(&release_list)) {
                 char *argv[3], *envp[3];
                 int i;
-               char *pathbuf;
+               char *pathbuf = NULL, *agentbuf = NULL;
                 struct cgroup *cgrp = list_entry(release_list.next,
                                                     struct cgroup,
                                                     release_list);
                 list_del_init(&cgrp->release_list);
                 spin_unlock(&release_list_lock);
                 pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
-               if (!pathbuf) {
-                       spin_lock(&release_list_lock);
-                       continue;
-               }
-
-               if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0) {
-                       kfree(pathbuf);
-                       spin_lock(&release_list_lock);
-                       continue;
-               }
+               if (!pathbuf)
+                       goto continue_free;
+               if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
+                       goto continue_free;
+               agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
+               if (!agentbuf)
+                       goto continue_free;
  
                 i = 0;
-               argv[i++] = cgrp->root->release_agent_path;
-               argv[i++] = (char *)pathbuf;
+               argv[i++] = agentbuf;
+               argv[i++] = pathbuf;
                 argv[i] = NULL;
  
                 i = 0;
@@ -3112,8 +3087,10 @@ static void cgroup_release_agent(struct work_struct *work)
                  * be a slow process */
                 mutex_unlock(&cgroup_mutex);
                 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
-               kfree(pathbuf);
                 mutex_lock(&cgroup_mutex);
+ continue_free:
+               kfree(pathbuf);
+               kfree(agentbuf);
                 spin_lock(&release_list_lock);
         }
         spin_unlock(&release_list_lock);
diff --git a/kernel/cpu.c b/kernel/cpu.c

index 2cc409ce0a8f93012ff93432f4effe6884b0b01c..10ba5f1004a5646bd9960bbe2156291edcb09fb0 100644 (file)
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -285,6 +285,11 @@ out_allowed:
         set_cpus_allowed_ptr(current, &old_allowed);
  out_release:
         cpu_hotplug_done();
+       if (!err) {
+               if (raw_notifier_call_chain(&cpu_chain, CPU_POST_DEAD | mod,
+                                           hcpu) == NOTIFY_BAD)
+                       BUG();
+       }
         return err;
  }
  
diff --git a/kernel/cpuset.c b/kernel/cpuset.c

index d5738910c34cfbad58d4a90777a2c242ff01ea14..91cf85b36dd577c2a231498575fcea1805ba44b7 100644 (file)
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -227,10 +227,6 @@ static struct cpuset top_cpuset = {
   * The task_struct fields mems_allowed and mems_generation may only
   * be accessed in the context of that task, so require no locks.
   *
- * The cpuset_common_file_write handler for operations that modify
- * the cpuset hierarchy holds cgroup_mutex across the entire operation,
- * single threading all such cpuset modifications across the system.
- *
   * The cpuset_common_file_read() handlers only hold callback_mutex across
   * small pieces of code, such as when reading out possibly multi-word
   * cpumasks and nodemasks.
@@ -369,7 +365,7 @@ void cpuset_update_task_memory_state(void)
                 my_cpusets_mem_gen = top_cpuset.mems_generation;
         } else {
                 rcu_read_lock();
-               my_cpusets_mem_gen = task_cs(current)->mems_generation;
+               my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
                 rcu_read_unlock();
         }
  
@@ -500,11 +496,16 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
  /*
   * rebuild_sched_domains()
   *
- * If the flag 'sched_load_balance' of any cpuset with non-empty
- * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
- * which has that flag enabled, or if any cpuset with a non-empty
- * 'cpus' is removed, then call this routine to rebuild the
- * scheduler's dynamic sched domains.
+ * This routine will be called to rebuild the scheduler's dynamic
+ * sched domains:
+ * - if the flag 'sched_load_balance' of any cpuset with non-empty
+ *   'cpus' changes,
+ * - or if the 'cpus' allowed changes in any cpuset which has that
+ *   flag enabled,
+ * - or if the 'sched_relax_domain_level' of any cpuset which has
+ *   that flag enabled and with non-empty 'cpus' changes,
+ * - or if any cpuset with non-empty 'cpus' is removed,
+ * - or if a cpu gets offlined.
   *
   * This routine builds a partial partition of the systems CPUs
   * (the set of non-overlappping cpumask_t's in the array 'part'
@@ -609,8 +610,13 @@ void rebuild_sched_domains(void)
         while (__kfifo_get(q, (void *)&cp, sizeof(cp))) {
                 struct cgroup *cont;
                 struct cpuset *child;   /* scans child cpusets of cp */
+
+               if (cpus_empty(cp->cpus_allowed))
+                       continue;
+
                 if (is_sched_load_balance(cp))
                         csa[csn++] = cp;
+
                 list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
                         child = cgroup_cs(cont);
                         __kfifo_put(q, (void *)&child, sizeof(cp));
@@ -703,36 +709,6 @@ done:
         /* Don't kfree(dattr) -- partition_sched_domains() does that. */
  }
  
-static inline int started_after_time(struct task_struct *t1,
-                                    struct timespec *time,
-                                    struct task_struct *t2)
-{
-       int start_diff = timespec_compare(&t1->start_time, time);
-       if (start_diff > 0) {
-               return 1;
-       } else if (start_diff < 0) {
-               return 0;
-       } else {
-               /*
-                * Arbitrarily, if two processes started at the same
-                * time, we'll say that the lower pointer value
-                * started first. Note that t2 may have exited by now
-                * so this may not be a valid pointer any longer, but
-                * that's fine - it still serves to distinguish
-                * between two tasks started (effectively)
-                * simultaneously.
-                */
-               return t1 > t2;
-       }
-}
-
-static inline int started_after(void *p1, void *p2)
-{
-       struct task_struct *t1 = p1;
-       struct task_struct *t2 = p2;
-       return started_after_time(t1, &t2->start_time, t2);
-}
-
  /**
   * cpuset_test_cpumask - test a task's cpus_allowed versus its cpuset's
   * @tsk: task to test
@@ -767,16 +743,50 @@ static void cpuset_change_cpumask(struct task_struct *tsk,
         set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed));
  }
  
+/**
+ * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
+ * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
+ *
+ * Called with cgroup_mutex held
+ *
+ * The cgroup_scan_tasks() function will scan all the tasks in a cgroup,
+ * calling callback functions for each.
+ *
+ * Return 0 if successful, -errno if not.
+ */
+static int update_tasks_cpumask(struct cpuset *cs)
+{
+       struct cgroup_scanner scan;
+       struct ptr_heap heap;
+       int retval;
+
+       /*
+        * cgroup_scan_tasks() will initialize heap->gt for us.
+        * heap_init() is still needed here for we should not change
+        * cs->cpus_allowed when heap_init() fails.
+        */
+       retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
+       if (retval)
+               return retval;
+
+       scan.cg = cs->css.cgroup;
+       scan.test_task = cpuset_test_cpumask;
+       scan.process_task = cpuset_change_cpumask;
+       scan.heap = &heap;
+       retval = cgroup_scan_tasks(&scan);
+
+       heap_free(&heap);
+       return retval;
+}
+
  /**
   * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it
   * @cs: the cpuset to consider
   * @buf: buffer of cpu numbers written to this cpuset
   */
-static int update_cpumask(struct cpuset *cs, char *buf)
+static int update_cpumask(struct cpuset *cs, const char *buf)
  {
         struct cpuset trialcs;
-       struct cgroup_scanner scan;
-       struct ptr_heap heap;
         int retval;
         int is_load_balanced;
  
@@ -792,7 +802,6 @@ static int update_cpumask(struct cpuset *cs, char *buf)
          * that parsing.  The validate_change() call ensures that cpusets
          * with tasks have cpus.
          */
-       buf = strstrip(buf);
         if (!*buf) {
                 cpus_clear(trialcs.cpus_allowed);
         } else {
@@ -811,10 +820,6 @@ static int update_cpumask(struct cpuset *cs, char *buf)
         if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed))
                 return 0;
  
-       retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after);
-       if (retval)
-               return retval;
-
         is_load_balanced = is_sched_load_balance(&trialcs);
  
         mutex_lock(&callback_mutex);
@@ -825,12 +830,9 @@ static int update_cpumask(struct cpuset *cs, char *buf)
          * Scan tasks in the cpuset, and update the cpumasks of any
          * that need an update.
          */
-       scan.cg = cs->css.cgroup;
-       scan.test_task = cpuset_test_cpumask;
-       scan.process_task = cpuset_change_cpumask;
-       scan.heap = &heap;
-       cgroup_scan_tasks(&scan);
-       heap_free(&heap);
+       retval = update_tasks_cpumask(cs);
+       if (retval < 0)
+               return retval;
  
         if (is_load_balanced)
                 rebuild_sched_domains();
@@ -886,74 +888,25 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
         mutex_unlock(&callback_mutex);
  }
  
-/*
- * Handle user request to change the 'mems' memory placement
- * of a cpuset.  Needs to validate the request, update the
- * cpusets mems_allowed and mems_generation, and for each
- * task in the cpuset, rebind any vma mempolicies and if
- * the cpuset is marked 'memory_migrate', migrate the tasks
- * pages to the new memory.
- *
- * Call with cgroup_mutex held.  May take callback_mutex during call.
- * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
- * lock each such tasks mm->mmap_sem, scan its vma's and rebind
- * their mempolicies to the cpusets new mems_allowed.
- */
-
  static void *cpuset_being_rebound;
  
-static int update_nodemask(struct cpuset *cs, char *buf)
+/**
+ * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset.
+ * @cs: the cpuset in which each task's mems_allowed mask needs to be changed
+ * @oldmem: old mems_allowed of cpuset cs
+ *
+ * Called with cgroup_mutex held
+ * Return 0 if successful, -errno if not.
+ */
+static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem)
  {
-       struct cpuset trialcs;
-       nodemask_t oldmem;
         struct task_struct *p;
         struct mm_struct **mmarray;
         int i, n, ntasks;
         int migrate;
         int fudge;
-       int retval;
         struct cgroup_iter it;
-
-       /*
-        * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
-        * it's read-only
-        */
-       if (cs == &top_cpuset)
-               return -EACCES;
-
-       trialcs = *cs;
-
-       /*
-        * An empty mems_allowed is ok iff there are no tasks in the cpuset.
-        * Since nodelist_parse() fails on an empty mask, we special case
-        * that parsing.  The validate_change() call ensures that cpusets
-        * with tasks have memory.
-        */
-       buf = strstrip(buf);
-       if (!*buf) {
-               nodes_clear(trialcs.mems_allowed);
-       } else {
-               retval = nodelist_parse(buf, trialcs.mems_allowed);
-               if (retval < 0)
-                       goto done;
-
-               if (!nodes_subset(trialcs.mems_allowed,
-                               node_states[N_HIGH_MEMORY]))
-                       return -EINVAL;
-       }
-       oldmem = cs->mems_allowed;
-       if (nodes_equal(oldmem, trialcs.mems_allowed)) {
-               retval = 0;             /* Too easy - nothing to do */
-               goto done;
-       }
-       retval = validate_change(cs, &trialcs);
-       if (retval < 0)
-               goto done;
-
-       mutex_lock(&callback_mutex);
-       cs->mems_allowed = trialcs.mems_allowed;
-       cs->mems_generation = cpuset_mems_generation++;
-       mutex_unlock(&callback_mutex);
+       int retval;
  
         cpuset_being_rebound = cs;              /* causes mpol_dup() rebind */
  
@@ -1020,7 +973,7 @@ static int update_nodemask(struct cpuset *cs, char *buf)
  
                 mpol_rebind_mm(mm, &cs->mems_allowed);
                 if (migrate)
-                       cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed);
+                       cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
                 mmput(mm);
         }
  
@@ -1032,6 +985,70 @@ done:
         return retval;
  }
  
+/*
+ * Handle user request to change the 'mems' memory placement
+ * of a cpuset.  Needs to validate the request, update the
+ * cpusets mems_allowed and mems_generation, and for each
+ * task in the cpuset, rebind any vma mempolicies and if
+ * the cpuset is marked 'memory_migrate', migrate the tasks
+ * pages to the new memory.
+ *
+ * Call with cgroup_mutex held.  May take callback_mutex during call.
+ * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
+ * lock each such tasks mm->mmap_sem, scan its vma's and rebind
+ * their mempolicies to the cpusets new mems_allowed.
+ */
+static int update_nodemask(struct cpuset *cs, const char *buf)
+{
+       struct cpuset trialcs;
+       nodemask_t oldmem;
+       int retval;
+
+       /*
+        * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
+        * it's read-only
+        */
+       if (cs == &top_cpuset)
+               return -EACCES;
+
+       trialcs = *cs;
+
+       /*
+        * An empty mems_allowed is ok iff there are no tasks in the cpuset.
+        * Since nodelist_parse() fails on an empty mask, we special case
+        * that parsing.  The validate_change() call ensures that cpusets
+        * with tasks have memory.
+        */
+       if (!*buf) {
+               nodes_clear(trialcs.mems_allowed);
+       } else {
+               retval = nodelist_parse(buf, trialcs.mems_allowed);
+               if (retval < 0)
+                       goto done;
+
+               if (!nodes_subset(trialcs.mems_allowed,
+                               node_states[N_HIGH_MEMORY]))
+                       return -EINVAL;
+       }
+       oldmem = cs->mems_allowed;
+       if (nodes_equal(oldmem, trialcs.mems_allowed)) {
+               retval = 0;             /* Too easy - nothing to do */
+               goto done;
+       }
+       retval = validate_change(cs, &trialcs);
+       if (retval < 0)
+               goto done;
+
+       mutex_lock(&callback_mutex);
+       cs->mems_allowed = trialcs.mems_allowed;
+       cs->mems_generation = cpuset_mems_generation++;
+       mutex_unlock(&callback_mutex);
+
+       retval = update_tasks_nodemask(cs, &oldmem);
+done:
+       return retval;
+}
+
  int current_cpuset_is_being_rebound(void)
  {
         return task_cs(current) == cpuset_being_rebound;
@@ -1044,7 +1061,8 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
  
         if (val != cs->relax_domain_level) {
                 cs->relax_domain_level = val;
-               rebuild_sched_domains();
+               if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs))
+                       rebuild_sched_domains();
         }
  
         return 0;
@@ -1256,72 +1274,14 @@ typedef enum {
         FILE_SPREAD_SLAB,
  } cpuset_filetype_t;
  
-static ssize_t cpuset_common_file_write(struct cgroup *cont,
-                                       struct cftype *cft,
-                                       struct file *file,
-                                       const char __user *userbuf,
-                                       size_t nbytes, loff_t *unused_ppos)
-{
-       struct cpuset *cs = cgroup_cs(cont);
-       cpuset_filetype_t type = cft->private;
-       char *buffer;
-       int retval = 0;
-
-       /* Crude upper limit on largest legitimate cpulist user might write. */
-       if (nbytes > 100U + 6 * max(NR_CPUS, MAX_NUMNODES))
-               return -E2BIG;
-
-       /* +1 for nul-terminator */
-       buffer = kmalloc(nbytes + 1, GFP_KERNEL);
-       if (!buffer)
-               return -ENOMEM;
-
-       if (copy_from_user(buffer, userbuf, nbytes)) {
-               retval = -EFAULT;
-               goto out1;
-       }
-       buffer[nbytes] = 0;     /* nul-terminate */
-
-       cgroup_lock();
-
-       if (cgroup_is_removed(cont)) {
-               retval = -ENODEV;
-               goto out2;
-       }
-
-       switch (type) {
-       case FILE_CPULIST:
-               retval = update_cpumask(cs, buffer);
-               break;
-       case FILE_MEMLIST:
-               retval = update_nodemask(cs, buffer);
-               break;
-       default:
-               retval = -EINVAL;
-               goto out2;
-       }
-
-       if (retval == 0)
-               retval = nbytes;
-out2:
-       cgroup_unlock();
-out1:
-       kfree(buffer);
-       return retval;
-}
-
  static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
  {
         int retval = 0;
         struct cpuset *cs = cgroup_cs(cgrp);
         cpuset_filetype_t type = cft->private;
  
-       cgroup_lock();
-
-       if (cgroup_is_removed(cgrp)) {
-               cgroup_unlock();
+       if (!cgroup_lock_live_group(cgrp))
                 return -ENODEV;
-       }
  
         switch (type) {
         case FILE_CPU_EXCLUSIVE:
@@ -1367,12 +1327,9 @@ static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val)
         struct cpuset *cs = cgroup_cs(cgrp);
         cpuset_filetype_t type = cft->private;
  
-       cgroup_lock();
-
-       if (cgroup_is_removed(cgrp)) {
-               cgroup_unlock();
+       if (!cgroup_lock_live_group(cgrp))
                 return -ENODEV;
-       }
+
         switch (type) {
         case FILE_SCHED_RELAX_DOMAIN_LEVEL:
                 retval = update_relax_domain_level(cs, val);
@@ -1385,6 +1342,32 @@ static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val)
         return retval;
  }
  
+/*
+ * Common handling for a write to a "cpus" or "mems" file.
+ */
+static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
+                               const char *buf)
+{
+       int retval = 0;
+
+       if (!cgroup_lock_live_group(cgrp))
+               return -ENODEV;
+
+       switch (cft->private) {
+       case FILE_CPULIST:
+               retval = update_cpumask(cgroup_cs(cgrp), buf);
+               break;
+       case FILE_MEMLIST:
+               retval = update_nodemask(cgroup_cs(cgrp), buf);
+               break;
+       default:
+               retval = -EINVAL;
+               break;
+       }
+       cgroup_unlock();
+       return retval;
+}
+
  /*
   * These ascii lists should be read in a single call, by using a user
   * buffer large enough to hold the entire map.  If read in smaller
@@ -1504,14 +1487,16 @@ static struct cftype files[] = {
         {
                 .name = "cpus",
                 .read = cpuset_common_file_read,
-               .write = cpuset_common_file_write,
+               .write_string = cpuset_write_resmask,
+               .max_write_len = (100U + 6 * NR_CPUS),
                 .private = FILE_CPULIST,
         },
  
         {
                 .name = "mems",
                 .read = cpuset_common_file_read,
-               .write = cpuset_common_file_write,
+               .write_string = cpuset_write_resmask,
+               .max_write_len = (100U + 6 * MAX_NUMNODES),
                 .private = FILE_MEMLIST,
         },
  
@@ -1792,7 +1777,7 @@ static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to)
         scan.scan.heap = NULL;
         scan.to = to->css.cgroup;
  
-       if (cgroup_scan_tasks((struct cgroup_scanner *)&scan))
+       if (cgroup_scan_tasks(&scan.scan))
                 printk(KERN_ERR "move_member_tasks_to_cpuset: "
                                 "cgroup_scan_tasks failed\n");
  }
@@ -1852,6 +1837,7 @@ static void scan_for_empty_cpusets(const struct cpuset *root)
         struct cpuset *child;   /* scans child cpusets of cp */
         struct list_head queue;
         struct cgroup *cont;
+       nodemask_t oldmems;
  
         INIT_LIST_HEAD(&queue);
  
@@ -1871,6 +1857,8 @@ static void scan_for_empty_cpusets(const struct cpuset *root)
                     nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
                         continue;
  
+               oldmems = cp->mems_allowed;
+
                 /* Remove offline cpus and mems from this cpuset. */
                 mutex_lock(&callback_mutex);
                 cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map);
@@ -1882,6 +1870,10 @@ static void scan_for_empty_cpusets(const struct cpuset *root)
                 if (cpus_empty(cp->cpus_allowed) ||
                      nodes_empty(cp->mems_allowed))
                         remove_tasks_in_empty_cpuset(cp);
+               else {
+                       update_tasks_cpumask(cp);
+                       update_tasks_nodemask(cp, &oldmems);
+               }
         }
  }
  
@@ -1974,7 +1966,6 @@ void __init cpuset_init_smp(void)
  }
  
  /**
-
   * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
   * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
   * @pmask: pointer to cpumask_t variable to receive cpus_allowed set.
diff --git a/kernel/delayacct.c b/kernel/delayacct.c

index 10e43fd8b721a7c28f44dbb60515efa9d0d4042d..b3179dad71be87d1f0d17258f82286abce3d9c47 100644 (file)
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -145,8 +145,11 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
         d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
         tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
         d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
+       tmp = d->freepages_delay_total + tsk->delays->freepages_delay;
+       d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
         d->blkio_count += tsk->delays->blkio_count;
         d->swapin_count += tsk->delays->swapin_count;
+       d->freepages_count += tsk->delays->freepages_count;
         spin_unlock_irqrestore(&tsk->delays->lock, flags);
  
  done:
@@ -165,3 +168,16 @@ __u64 __delayacct_blkio_ticks(struct task_struct *tsk)
         return ret;
  }
  
+void __delayacct_freepages_start(void)
+{
+       delayacct_start(&current->delays->freepages_start);
+}
+
+void __delayacct_freepages_end(void)
+{
+       delayacct_end(&current->delays->freepages_start,
+                       &current->delays->freepages_end,
+                       &current->delays->freepages_delay,
+                       &current->delays->freepages_count);
+}
+
diff --git a/kernel/exit.c b/kernel/exit.c

index 93d2711b938123d34f1c0011e8f91a589b465615..ad933bb29ec765888f4994db299ffc179d7bf4e8 100644 (file)
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -85,7 +85,6 @@ static void __exit_signal(struct task_struct *tsk)
         BUG_ON(!sig);
         BUG_ON(!atomic_read(&sig->count));
  
-       rcu_read_lock();
         sighand = rcu_dereference(tsk->sighand);
         spin_lock(&sighand->siglock);
  
@@ -121,6 +120,18 @@ static void __exit_signal(struct task_struct *tsk)
                 sig->nivcsw += tsk->nivcsw;
                 sig->inblock += task_io_get_inblock(tsk);
                 sig->oublock += task_io_get_oublock(tsk);
+#ifdef CONFIG_TASK_XACCT
+               sig->rchar += tsk->rchar;
+               sig->wchar += tsk->wchar;
+               sig->syscr += tsk->syscr;
+               sig->syscw += tsk->syscw;
+#endif /* CONFIG_TASK_XACCT */
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+               sig->ioac.read_bytes += tsk->ioac.read_bytes;
+               sig->ioac.write_bytes += tsk->ioac.write_bytes;
+               sig->ioac.cancelled_write_bytes +=
+                                       tsk->ioac.cancelled_write_bytes;
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
                 sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
                 sig = NULL; /* Marker for below. */
         }
@@ -136,7 +147,6 @@ static void __exit_signal(struct task_struct *tsk)
         tsk->signal = NULL;
         tsk->sighand = NULL;
         spin_unlock(&sighand->siglock);
-       rcu_read_unlock();
  
         __cleanup_sighand(sighand);
         clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
@@ -432,7 +442,7 @@ void daemonize(const char *name, ...)
          * We don't want to have TIF_FREEZE set if the system-wide hibernation
          * or suspend transition begins right now.
          */
-       current->flags |= PF_NOFREEZE;
+       current->flags |= (PF_NOFREEZE | PF_KTHREAD);
  
         if (current->nsproxy != &init_nsproxy) {
                 get_nsproxy(&init_nsproxy);
@@ -666,26 +676,40 @@ assign_new_owner:
  static void exit_mm(struct task_struct * tsk)
  {
         struct mm_struct *mm = tsk->mm;
+       struct core_state *core_state;
  
         mm_release(tsk, mm);
         if (!mm)
                 return;
         /*
          * Serialize with any possible pending coredump.
-        * We must hold mmap_sem around checking core_waiters
+        * We must hold mmap_sem around checking core_state
          * and clearing tsk->mm.  The core-inducing thread
-        * will increment core_waiters for each thread in the
+        * will increment ->nr_threads for each thread in the
          * group with ->mm != NULL.
          */
         down_read(&mm->mmap_sem);
-       if (mm->core_waiters) {
+       core_state = mm->core_state;
+       if (core_state) {
+               struct core_thread self;
                 up_read(&mm->mmap_sem);
-               down_write(&mm->mmap_sem);
-               if (!--mm->core_waiters)
-                       complete(mm->core_startup_done);
-               up_write(&mm->mmap_sem);
  
-               wait_for_completion(&mm->core_done);
+               self.task = tsk;
+               self.next = xchg(&core_state->dumper.next, &self);
+               /*
+                * Implies mb(), the result of xchg() must be visible
+                * to core_state->dumper.
+                */
+               if (atomic_dec_and_test(&core_state->nr_threads))
+                       complete(&core_state->startup);
+
+               for (;;) {
+                       set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+                       if (!self.task) /* see coredump_finish() */
+                               break;
+                       schedule();
+               }
+               __set_task_state(tsk, TASK_RUNNING);
                 down_read(&mm->mmap_sem);
         }
         atomic_inc(&mm->mm_count);
@@ -1354,6 +1378,21 @@ static int wait_task_zombie(struct task_struct *p, int options,
                 psig->coublock +=
                         task_io_get_oublock(p) +
                         sig->oublock + sig->coublock;
+#ifdef CONFIG_TASK_XACCT
+               psig->rchar += p->rchar + sig->rchar;
+               psig->wchar += p->wchar + sig->wchar;
+               psig->syscr += p->syscr + sig->syscr;
+               psig->syscw += p->syscw + sig->syscw;
+#endif /* CONFIG_TASK_XACCT */
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+               psig->ioac.read_bytes +=
+                       p->ioac.read_bytes + sig->ioac.read_bytes;
+               psig->ioac.write_bytes +=
+                       p->ioac.write_bytes + sig->ioac.write_bytes;
+               psig->ioac.cancelled_write_bytes +=
+                               p->ioac.cancelled_write_bytes +
+                               sig->ioac.cancelled_write_bytes;
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
                 spin_unlock_irq(&p->parent->sighand->siglock);
         }
  
diff --git a/kernel/fork.c b/kernel/fork.c

index 552c8d8e77ad5e3fb43f606081c57023bb25e4c6..b99d73e971a4f0f8f1e5c9d61654023e999bc8e1 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -93,6 +93,23 @@ int nr_processes(void)
  static struct kmem_cache *task_struct_cachep;
  #endif
  
+#ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+static inline struct thread_info *alloc_thread_info(struct task_struct *tsk)
+{
+#ifdef CONFIG_DEBUG_STACK_USAGE
+       gfp_t mask = GFP_KERNEL | __GFP_ZERO;
+#else
+       gfp_t mask = GFP_KERNEL;
+#endif
+       return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER);
+}
+
+static inline void free_thread_info(struct thread_info *ti)
+{
+       free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
+}
+#endif
+
  /* SLAB cache for signal_struct structures (tsk->signal) */
  static struct kmem_cache *signal_cachep;
  
@@ -383,7 +400,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
         INIT_LIST_HEAD(&mm->mmlist);
         mm->flags = (current->mm) ? current->mm->flags
                                   : MMF_DUMP_FILTER_DEFAULT;
-       mm->core_waiters = 0;
+       mm->core_state = NULL;
         mm->nr_ptes = 0;
         set_mm_counter(mm, file_rss, 0);
         set_mm_counter(mm, anon_rss, 0);
@@ -457,7 +474,7 @@ EXPORT_SYMBOL_GPL(mmput);
  /**
   * get_task_mm - acquire a reference to the task's mm
   *
- * Returns %NULL if the task has no mm.  Checks PF_BORROWED_MM (meaning
+ * Returns %NULL if the task has no mm.  Checks PF_KTHREAD (meaning
   * this kernel workthread has transiently adopted a user mm with use_mm,
   * to do its AIO) is not set and if so returns a reference to it, after
   * bumping up the use count.  User must release the mm via mmput()
@@ -470,7 +487,7 @@ struct mm_struct *get_task_mm(struct task_struct *task)
         task_lock(task);
         mm = task->mm;
         if (mm) {
-               if (task->flags & PF_BORROWED_MM)
+               if (task->flags & PF_KTHREAD)
                         mm = NULL;
                 else
                         atomic_inc(&mm->mm_users);
@@ -795,6 +812,12 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
         sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
         sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
         sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
+#ifdef CONFIG_TASK_XACCT
+       sig->rchar = sig->wchar = sig->syscr = sig->syscw = 0;
+#endif
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+       memset(&sig->ioac, 0, sizeof(sig->ioac));
+#endif
         sig->sum_sched_runtime = 0;
         INIT_LIST_HEAD(&sig->cpu_timers[0]);
         INIT_LIST_HEAD(&sig->cpu_timers[1]);
@@ -1090,6 +1113,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
         if (clone_flags & CLONE_THREAD)
                 p->tgid = current->tgid;
  
+       if (current->nsproxy != p->nsproxy) {
+               retval = ns_cgroup_clone(p, pid);
+               if (retval)
+                       goto bad_fork_free_pid;
+       }
+
         p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
         /*
          * Clear TID on mm_release()?
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c

index 5bc6e5ecc493bcf3d35e54cdcffca0fb79ac1303..f8914b92b664dc64e3b3dd0642a9aa30a9a72efe 100644 (file)
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -260,9 +260,7 @@ int set_irq_wake(unsigned int irq, unsigned int on)
                 }
         } else {
                 if (desc->wake_depth == 0) {
-                       printk(KERN_WARNING "Unbalanced IRQ %d "
-                                       "wake disable\n", irq);
-                       WARN_ON(1);
+                       WARN(1, "Unbalanced IRQ %d wake disable\n", irq);
                 } else if (--desc->wake_depth == 0) {
                         ret = set_irq_wake_real(irq, on);
                         if (ret)
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c

index 6fc0040f3e3abacade62c7d63f74d240974d80fe..38fc10ac75415a8ff4da69b68d90facf61eb5e41 100644 (file)
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -176,7 +176,7 @@ static unsigned long get_symbol_pos(unsigned long addr,
         high = kallsyms_num_syms;
  
         while (high - low > 1) {
-               mid = (low + high) / 2;
+               mid = low + (high - low) / 2;
                 if (kallsyms_addresses[mid] <= addr)
                         low = mid;
                 else
diff --git a/kernel/kmod.c b/kernel/kmod.c

index 2989f67c4446ab1638dc57134a4f8811cd47bc26..2456d1a0befbd45b91cd7cb7a97ae6d2fb270c4c 100644 (file)
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -352,16 +352,17 @@ static inline void register_pm_notifier_callback(void) {}
   * @path: path to usermode executable
   * @argv: arg vector for process
   * @envp: environment for process
+ * @gfp_mask: gfp mask for memory allocation
   *
   * Returns either %NULL on allocation failure, or a subprocess_info
   * structure.  This should be passed to call_usermodehelper_exec to
   * exec the process and free the structure.
   */
-struct subprocess_info *call_usermodehelper_setup(char *path,
-                                                 char **argv, char **envp)
+struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
+                                                 char **envp, gfp_t gfp_mask)
  {
         struct subprocess_info *sub_info;
-       sub_info = kzalloc(sizeof(struct subprocess_info),  GFP_ATOMIC);
+       sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask);
         if (!sub_info)
                 goto out;
  
@@ -494,7 +495,7 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp,
         struct subprocess_info *sub_info;
         int ret;
  
-       sub_info = call_usermodehelper_setup(path, argv, envp);
+       sub_info = call_usermodehelper_setup(path, argv, envp, GFP_KERNEL);
         if (sub_info == NULL)
                 return -ENOMEM;
  
diff --git a/kernel/kprobes.c b/kernel/kprobes.c

index 1485ca8d0e00503104e81248a7c5e30b58edfb79..75bc2cd9ebc6a7307aa15c404a914559bcab917b 100644 (file)
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -62,6 +62,7 @@
         addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name)))
  #endif
  
+static int kprobes_initialized;
  static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
  static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
  
@@ -69,8 +70,15 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
  static bool kprobe_enabled;
  
  DEFINE_MUTEX(kprobe_mutex);            /* Protects kprobe_table */
-DEFINE_SPINLOCK(kretprobe_lock);       /* Protects kretprobe_inst_table */
  static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
+static struct {
+       spinlock_t lock ____cacheline_aligned;
+} kretprobe_table_locks[KPROBE_TABLE_SIZE];
+
+static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
+{
+       return &(kretprobe_table_locks[hash].lock);
+}
  
  /*
   * Normally, functions that we'd want to prohibit kprobes in, are marked
@@ -368,26 +376,53 @@ void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
         return;
  }
  
-/* Called with kretprobe_lock held */
  void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
                                 struct hlist_head *head)
  {
+       struct kretprobe *rp = ri->rp;
+
         /* remove rp inst off the rprobe_inst_table */
         hlist_del(&ri->hlist);
-       if (ri->rp) {
-               /* remove rp inst off the used list */
-               hlist_del(&ri->uflist);
-               /* put rp inst back onto the free list */
-               INIT_HLIST_NODE(&ri->uflist);
-               hlist_add_head(&ri->uflist, &ri->rp->free_instances);
+       INIT_HLIST_NODE(&ri->hlist);
+       if (likely(rp)) {
+               spin_lock(&rp->lock);
+               hlist_add_head(&ri->hlist, &rp->free_instances);
+               spin_unlock(&rp->lock);
         } else
                 /* Unregistering */
                 hlist_add_head(&ri->hlist, head);
  }
  
-struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk)
+void kretprobe_hash_lock(struct task_struct *tsk,
+                        struct hlist_head **head, unsigned long *flags)
  {
-       return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)];
+       unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
+       spinlock_t *hlist_lock;
+
+       *head = &kretprobe_inst_table[hash];
+       hlist_lock = kretprobe_table_lock_ptr(hash);
+       spin_lock_irqsave(hlist_lock, *flags);
+}
+
+void kretprobe_table_lock(unsigned long hash, unsigned long *flags)
+{
+       spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
+       spin_lock_irqsave(hlist_lock, *flags);
+}
+
+void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags)
+{
+       unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
+       spinlock_t *hlist_lock;
+
+       hlist_lock = kretprobe_table_lock_ptr(hash);
+       spin_unlock_irqrestore(hlist_lock, *flags);
+}
+
+void kretprobe_table_unlock(unsigned long hash, unsigned long *flags)
+{
+       spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
+       spin_unlock_irqrestore(hlist_lock, *flags);
  }
  
  /*
@@ -401,17 +436,21 @@ void __kprobes kprobe_flush_task(struct task_struct *tk)
         struct kretprobe_instance *ri;
         struct hlist_head *head, empty_rp;
         struct hlist_node *node, *tmp;
-       unsigned long flags = 0;
+       unsigned long hash, flags = 0;
  
-       INIT_HLIST_HEAD(&empty_rp);
-       spin_lock_irqsave(&kretprobe_lock, flags);
-       head = kretprobe_inst_table_head(tk);
+       if (unlikely(!kprobes_initialized))
+               /* Early boot.  kretprobe_table_locks not yet initialized. */
+               return;
+
+       hash = hash_ptr(tk, KPROBE_HASH_BITS);
+       head = &kretprobe_inst_table[hash];
+       kretprobe_table_lock(hash, &flags);
         hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
                 if (ri->task == tk)
                         recycle_rp_inst(ri, &empty_rp);
         }
-       spin_unlock_irqrestore(&kretprobe_lock, flags);
-
+       kretprobe_table_unlock(hash, &flags);
+       INIT_HLIST_HEAD(&empty_rp);
         hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
                 hlist_del(&ri->hlist);
                 kfree(ri);
@@ -423,24 +462,29 @@ static inline void free_rp_inst(struct kretprobe *rp)
         struct kretprobe_instance *ri;
         struct hlist_node *pos, *next;
  
-       hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, uflist) {
-               hlist_del(&ri->uflist);
+       hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, hlist) {
+               hlist_del(&ri->hlist);
                 kfree(ri);
         }
  }
  
  static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
  {
-       unsigned long flags;
+       unsigned long flags, hash;
         struct kretprobe_instance *ri;
         struct hlist_node *pos, *next;
+       struct hlist_head *head;
+
         /* No race here */
-       spin_lock_irqsave(&kretprobe_lock, flags);
-       hlist_for_each_entry_safe(ri, pos, next, &rp->used_instances, uflist) {
-               ri->rp = NULL;
-               hlist_del(&ri->uflist);
+       for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) {
+               kretprobe_table_lock(hash, &flags);
+               head = &kretprobe_inst_table[hash];
+               hlist_for_each_entry_safe(ri, pos, next, head, hlist) {
+                       if (ri->rp == rp)
+                               ri->rp = NULL;
+               }
+               kretprobe_table_unlock(hash, &flags);
         }
-       spin_unlock_irqrestore(&kretprobe_lock, flags);
         free_rp_inst(rp);
  }
  
@@ -831,32 +875,37 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
                                            struct pt_regs *regs)
  {
         struct kretprobe *rp = container_of(p, struct kretprobe, kp);
-       unsigned long flags = 0;
+       unsigned long hash, flags = 0;
+       struct kretprobe_instance *ri;
  
         /*TODO: consider to only swap the RA after the last pre_handler fired */
-       spin_lock_irqsave(&kretprobe_lock, flags);
+       hash = hash_ptr(current, KPROBE_HASH_BITS);
+       spin_lock_irqsave(&rp->lock, flags);
         if (!hlist_empty(&rp->free_instances)) {
-               struct kretprobe_instance *ri;
-
                 ri = hlist_entry(rp->free_instances.first,
-                                struct kretprobe_instance, uflist);
+                               struct kretprobe_instance, hlist);
+               hlist_del(&ri->hlist);
+               spin_unlock_irqrestore(&rp->lock, flags);
+
                 ri->rp = rp;
                 ri->task = current;
  
                 if (rp->entry_handler && rp->entry_handler(ri, regs)) {
-                       spin_unlock_irqrestore(&kretprobe_lock, flags);
+                       spin_unlock_irqrestore(&rp->lock, flags);
                         return 0;
                 }
  
                 arch_prepare_kretprobe(ri, regs);
  
                 /* XXX(hch): why is there no hlist_move_head? */
-               hlist_del(&ri->uflist);
-               hlist_add_head(&ri->uflist, &ri->rp->used_instances);
-               hlist_add_head(&ri->hlist, kretprobe_inst_table_head(ri->task));
-       } else
+               INIT_HLIST_NODE(&ri->hlist);
+               kretprobe_table_lock(hash, &flags);
+               hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]);
+               kretprobe_table_unlock(hash, &flags);
+       } else {
                 rp->nmissed++;
-       spin_unlock_irqrestore(&kretprobe_lock, flags);
+               spin_unlock_irqrestore(&rp->lock, flags);
+       }
         return 0;
  }
  
@@ -892,7 +941,7 @@ static int __kprobes __register_kretprobe(struct kretprobe *rp,
                 rp->maxactive = NR_CPUS;
  #endif
         }
-       INIT_HLIST_HEAD(&rp->used_instances);
+       spin_lock_init(&rp->lock);
         INIT_HLIST_HEAD(&rp->free_instances);
         for (i = 0; i < rp->maxactive; i++) {
                 inst = kmalloc(sizeof(struct kretprobe_instance) +
@@ -901,8 +950,8 @@ static int __kprobes __register_kretprobe(struct kretprobe *rp,
                         free_rp_inst(rp);
                         return -ENOMEM;
                 }
-               INIT_HLIST_NODE(&inst->uflist);
-               hlist_add_head(&inst->uflist, &rp->free_instances);
+               INIT_HLIST_NODE(&inst->hlist);
+               hlist_add_head(&inst->hlist, &rp->free_instances);
         }
  
         rp->nmissed = 0;
@@ -1009,6 +1058,7 @@ static int __init init_kprobes(void)
         for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
                 INIT_HLIST_HEAD(&kprobe_table[i]);
                 INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
+               spin_lock_init(&(kretprobe_table_locks[i].lock));
         }
  
         /*
@@ -1050,6 +1100,7 @@ static int __init init_kprobes(void)
         err = arch_init_kprobes();
         if (!err)
                 err = register_die_notifier(&kprobe_exceptions_nb);
+       kprobes_initialized = (err == 0);
  
         if (!err)
                 init_test_probes();
@@ -1286,13 +1337,8 @@ EXPORT_SYMBOL_GPL(register_jprobe);
  EXPORT_SYMBOL_GPL(unregister_jprobe);
  EXPORT_SYMBOL_GPL(register_jprobes);
  EXPORT_SYMBOL_GPL(unregister_jprobes);
-#ifdef CONFIG_KPROBES
  EXPORT_SYMBOL_GPL(jprobe_return);
-#endif
-
-#ifdef CONFIG_KPROBES
  EXPORT_SYMBOL_GPL(register_kretprobe);
  EXPORT_SYMBOL_GPL(unregister_kretprobe);
  EXPORT_SYMBOL_GPL(register_kretprobes);
  EXPORT_SYMBOL_GPL(unregister_kretprobes);
-#endif
diff --git a/kernel/marker.c b/kernel/marker.c

index 1abfb923b761f46b3266ec471924ade4fc593ef2..971da531790316bbe4bff400d7dec77e46c76a42 100644 (file)
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -441,7 +441,7 @@ static int remove_marker(const char *name)
         hlist_del(&e->hlist);
         /* Make sure the call_rcu has been executed */
         if (e->rcu_pending)
-               rcu_barrier();
+               rcu_barrier_sched();
         kfree(e);
         return 0;
  }
@@ -476,7 +476,7 @@ static int marker_set_format(struct marker_entry **entry, const char *format)
         hlist_del(&(*entry)->hlist);
         /* Make sure the call_rcu has been executed */
         if ((*entry)->rcu_pending)
-               rcu_barrier();
+               rcu_barrier_sched();
         kfree(*entry);
         *entry = e;
         trace_mark(core_marker_format, "name %s format %s",
@@ -655,7 +655,7 @@ int marker_probe_register(const char *name, const char *format,
          * make sure it's executed now.
          */
         if (entry->rcu_pending)
-               rcu_barrier();
+               rcu_barrier_sched();
         old = marker_entry_add_probe(entry, probe, probe_private);
         if (IS_ERR(old)) {
                 ret = PTR_ERR(old);
@@ -670,10 +670,7 @@ int marker_probe_register(const char *name, const char *format,
         entry->rcu_pending = 1;
         /* write rcu_pending before calling the RCU callback */
         smp_wmb();
-#ifdef CONFIG_PREEMPT_RCU
-       synchronize_sched();    /* Until we have the call_rcu_sched() */
-#endif
-       call_rcu(&entry->rcu, free_old_closure);
+       call_rcu_sched(&entry->rcu, free_old_closure);
  end:
         mutex_unlock(&markers_mutex);
         return ret;
@@ -704,7 +701,7 @@ int marker_probe_unregister(const char *name,
         if (!entry)
                 goto end;
         if (entry->rcu_pending)
-               rcu_barrier();
+               rcu_barrier_sched();
         old = marker_entry_remove_probe(entry, probe, probe_private);
         mutex_unlock(&markers_mutex);
         marker_update_probes();         /* may update entry */
@@ -716,10 +713,7 @@ int marker_probe_unregister(const char *name,
         entry->rcu_pending = 1;
         /* write rcu_pending before calling the RCU callback */
         smp_wmb();
-#ifdef CONFIG_PREEMPT_RCU
-       synchronize_sched();    /* Until we have the call_rcu_sched() */
-#endif
-       call_rcu(&entry->rcu, free_old_closure);
+       call_rcu_sched(&entry->rcu, free_old_closure);
         remove_marker(name);    /* Ignore busy error message */
         ret = 0;
  end:
@@ -786,7 +780,7 @@ int marker_probe_unregister_private_data(marker_probe_func *probe,
                 goto end;
         }
         if (entry->rcu_pending)
-               rcu_barrier();
+               rcu_barrier_sched();
         old = marker_entry_remove_probe(entry, NULL, probe_private);
         mutex_unlock(&markers_mutex);
         marker_update_probes();         /* may update entry */
@@ -797,10 +791,7 @@ int marker_probe_unregister_private_data(marker_probe_func *probe,
         entry->rcu_pending = 1;
         /* write rcu_pending before calling the RCU callback */
         smp_wmb();
-#ifdef CONFIG_PREEMPT_RCU
-       synchronize_sched();    /* Until we have the call_rcu_sched() */
-#endif
-       call_rcu(&entry->rcu, free_old_closure);
+       call_rcu_sched(&entry->rcu, free_old_closure);
         remove_marker(entry->name);     /* Ignore busy error message */
  end:
         mutex_unlock(&markers_mutex);
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c

index 48d7ed6fc3a4d2dcc206d5eb304f55ae7436ac70..43c2111cd54de719917c0cdc9ace9e92445f4513 100644 (file)
--- a/kernel/ns_cgroup.c
+++ b/kernel/ns_cgroup.c
@@ -7,6 +7,7 @@
  #include <linux/module.h>
  #include <linux/cgroup.h>
  #include <linux/fs.h>
+#include <linux/proc_fs.h>
  #include <linux/slab.h>
  #include <linux/nsproxy.h>
  
@@ -24,9 +25,12 @@ static inline struct ns_cgroup *cgroup_to_ns(
                             struct ns_cgroup, css);
  }
  
-int ns_cgroup_clone(struct task_struct *task)
+int ns_cgroup_clone(struct task_struct *task, struct pid *pid)
  {
-       return cgroup_clone(task, &ns_subsys);
+       char name[PROC_NUMBUF];
+
+       snprintf(name, PROC_NUMBUF, "%d", pid_vnr(pid));
+       return cgroup_clone(task, &ns_subsys, name);
  }
  
  /*
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c

index adc785146a1cb81d6f7b3d676877921f10bb59e7..21575fc46d0597914d4b92b5749c53f59f9be4e1 100644 (file)
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -157,12 +157,6 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
                 goto out;
         }
  
-       err = ns_cgroup_clone(tsk);
-       if (err) {
-               put_nsproxy(new_ns);
-               goto out;
-       }
-
         tsk->nsproxy = new_ns;
  
  out:
@@ -209,7 +203,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
                 goto out;
         }
  
-       err = ns_cgroup_clone(current);
+       err = ns_cgroup_clone(current, task_pid(current));
         if (err)
                 put_nsproxy(*new_nsp);
  
diff --git a/kernel/panic.c b/kernel/panic.c

index 425567f45b9f776148f1767b2f4af2d357a0c2fd..12c5a0a6c89bee42e44c65ee41eb4695ae180983 100644 (file)
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -318,6 +318,28 @@ void warn_on_slowpath(const char *file, int line)
         add_taint(TAINT_WARN);
  }
  EXPORT_SYMBOL(warn_on_slowpath);
+
+
+void warn_slowpath(const char *file, int line, const char *fmt, ...)
+{
+       va_list args;
+       char function[KSYM_SYMBOL_LEN];
+       unsigned long caller = (unsigned long)__builtin_return_address(0);
+       sprint_symbol(function, caller);
+
+       printk(KERN_WARNING "------------[ cut here ]------------\n");
+       printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file,
+               line, function);
+       va_start(args, fmt);
+       vprintk(fmt, args);
+       va_end(args);
+
+       print_modules();
+       dump_stack();
+       print_oops_end_marker();
+       add_taint(TAINT_WARN);
+}
+EXPORT_SYMBOL(warn_slowpath);
  #endif
  
  #ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/kernel/pid.c b/kernel/pid.c

index 30bd5d4b2ac7b7f2f822ba2b69d2726395974df6..064e76afa507a252f6f4de8aec046f03c6cc62a7 100644 (file)
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -309,12 +309,6 @@ struct pid *find_vpid(int nr)
  }
  EXPORT_SYMBOL_GPL(find_vpid);
  
-struct pid *find_pid(int nr)
-{
-       return find_pid_ns(nr, &init_pid_ns);
-}
-EXPORT_SYMBOL_GPL(find_pid);
-
  /*
   * attach_pid() must be called with the tasklist_lock write-held.
   */
@@ -435,6 +429,7 @@ struct pid *find_get_pid(pid_t nr)
  
         return pid;
  }
+EXPORT_SYMBOL_GPL(find_get_pid);
  
  pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
  {
@@ -482,7 +477,7 @@ EXPORT_SYMBOL(task_session_nr_ns);
  /*
   * Used by proc to find the first pid that is greater then or equal to nr.
   *
- * If there is a pid at nr this function is exactly the same as find_pid.
+ * If there is a pid at nr this function is exactly the same as find_pid_ns.
   */
  struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
  {
@@ -497,7 +492,6 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
  
         return pid;
  }
-EXPORT_SYMBOL_GPL(find_get_pid);
  
  /*
   * The pid hash table is scaled according to the amount of memory in the
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c

index 98702b4b8851762969d9ddad76505e5739e8166c..ea567b78d1aa6111bf207ac0556ff1e35b088794 100644 (file)
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -12,6 +12,7 @@
  #include <linux/pid_namespace.h>
  #include <linux/syscalls.h>
  #include <linux/err.h>
+#include <linux/acct.h>
  
  #define BITS_PER_PAGE          (PAGE_SIZE*8)
  
@@ -71,7 +72,7 @@ static struct pid_namespace *create_pid_namespace(unsigned int level)
         struct pid_namespace *ns;
         int i;
  
-       ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL);
+       ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
         if (ns == NULL)
                 goto out;
  
@@ -84,17 +85,13 @@ static struct pid_namespace *create_pid_namespace(unsigned int level)
                 goto out_free_map;
  
         kref_init(&ns->kref);
-       ns->last_pid = 0;
-       ns->child_reaper = NULL;
         ns->level = level;
  
         set_bit(0, ns->pidmap[0].page);
         atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
  
-       for (i = 1; i < PIDMAP_ENTRIES; i++) {
-               ns->pidmap[i].page = NULL;
+       for (i = 1; i < PIDMAP_ENTRIES; i++)
                 atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
-       }
  
         return ns;
  
@@ -185,6 +182,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
  
         /* Child reaper for the pid namespace is going away */
         pid_ns->child_reaper = NULL;
+       acct_exit_ns(pid_ns);
         return;
  }
  
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c

index dbd8398ddb0b41fa1f391849b82199078e72de70..9a21681aa80f82f0123917725f341a526d484c89 100644 (file)
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -449,9 +449,6 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
                 spin_unlock_irqrestore(&idr_lock, flags);
         }
         sigqueue_free(tmr->sigq);
-       if (unlikely(tmr->it_process) &&
-           tmr->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
-               put_task_struct(tmr->it_process);
         kmem_cache_free(posix_timers_cache, tmr);
  }
  
@@ -856,11 +853,10 @@ retry_delete:
          * This keeps any tasks waiting on the spin lock from thinking
          * they got something (see the lock code above).
          */
-       if (timer->it_process) {
-               if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
-                       put_task_struct(timer->it_process);
-               timer->it_process = NULL;
-       }
+       if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
+               put_task_struct(timer->it_process);
+       timer->it_process = NULL;
+
         unlock_timer(timer, flags);
         release_posix_timer(timer, IT_ID_SET);
         return 0;
@@ -885,11 +881,10 @@ retry_delete:
          * This keeps any tasks waiting on the spin lock from thinking
          * they got something (see the lock code above).
          */
-       if (timer->it_process) {
-               if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
-                       put_task_struct(timer->it_process);
-               timer->it_process = NULL;
-       }
+       if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
+               put_task_struct(timer->it_process);
+       timer->it_process = NULL;
+
         unlock_timer(timer, flags);
         release_posix_timer(timer, IT_ID_SET);
  }
diff --git a/kernel/printk.c b/kernel/printk.c

index 3f7a2a94583bdd1a2d1af1e14d18eaea16a2c52f..a7f7559c5f6cbc2a06413622a09dd3a0bc21782d 100644 (file)
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1308,6 +1308,8 @@ void tty_write_message(struct tty_struct *tty, char *msg)
  }
  
  #if defined CONFIG_PRINTK
+
+DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10);
  /*
   * printk rate limiting, lifted from the networking subsystem.
   *
@@ -1315,22 +1317,9 @@ void tty_write_message(struct tty_struct *tty, char *msg)
   * every printk_ratelimit_jiffies to make a denial-of-service
   * attack impossible.
   */
-int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst)
-{
-       return __ratelimit(ratelimit_jiffies, ratelimit_burst);
-}
-EXPORT_SYMBOL(__printk_ratelimit);
-
-/* minimum time in jiffies between messages */
-int printk_ratelimit_jiffies = 5 * HZ;
-
-/* number of messages we send before ratelimiting */
-int printk_ratelimit_burst = 10;
-
  int printk_ratelimit(void)
  {
-       return __printk_ratelimit(printk_ratelimit_jiffies,
-                               printk_ratelimit_burst);
+       return __ratelimit(&printk_ratelimit_state);
  }
  EXPORT_SYMBOL(printk_ratelimit);
  
diff --git a/kernel/profile.c b/kernel/profile.c

index 58926411eb2a5c6ca43bb6fd18b140ddeb6d9515..cd26bed4cc26535ca7398eae07fb9b800b762f2e 100644 (file)
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -112,8 +112,6 @@ void __init profile_init(void)
  
  /* Profile event notifications */
  
-#ifdef CONFIG_PROFILING
-
  static BLOCKING_NOTIFIER_HEAD(task_exit_notifier);
  static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
  static BLOCKING_NOTIFIER_HEAD(munmap_notifier);
@@ -203,8 +201,6 @@ void unregister_timer_hook(int (*hook)(struct pt_regs *))
  }
  EXPORT_SYMBOL_GPL(unregister_timer_hook);
  
-#endif /* CONFIG_PROFILING */
-
  
  #ifdef CONFIG_SMP
  /*
diff --git a/kernel/res_counter.c b/kernel/res_counter.c

index d3c61b4ebef238c110ae3994e0d0fdc078fabfc8..f275c8eca772c4dfe6bcdda6b1253a65fefe3e83 100644 (file)
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -13,6 +13,7 @@
  #include <linux/slab.h>
  #include <linux/res_counter.h>
  #include <linux/uaccess.h>
+#include <linux/mm.h>
  
  void res_counter_init(struct res_counter *counter)
  {
@@ -102,44 +103,37 @@ u64 res_counter_read_u64(struct res_counter *counter, int member)
         return *res_counter_member(counter, member);
  }
  
-ssize_t res_counter_write(struct res_counter *counter, int member,
-               const char __user *userbuf, size_t nbytes, loff_t *pos,
-               int (*write_strategy)(char *st_buf, unsigned long long *val))
+int res_counter_memparse_write_strategy(const char *buf,
+                                       unsigned long long *res)
  {
-       int ret;
-       char *buf, *end;
-       unsigned long flags;
-       unsigned long long tmp, *val;
-
-       buf = kmalloc(nbytes + 1, GFP_KERNEL);
-       ret = -ENOMEM;
-       if (buf == NULL)
-               goto out;
+       char *end;
+       /* FIXME - make memparse() take const char* args */
+       *res = memparse((char *)buf, &end);
+       if (*end != '\0')
+               return -EINVAL;
  
-       buf[nbytes] = '\0';
-       ret = -EFAULT;
-       if (copy_from_user(buf, userbuf, nbytes))
-               goto out_free;
+       *res = PAGE_ALIGN(*res);
+       return 0;
+}
  
-       ret = -EINVAL;
+int res_counter_write(struct res_counter *counter, int member,
+                     const char *buf, write_strategy_fn write_strategy)
+{
+       char *end;
+       unsigned long flags;
+       unsigned long long tmp, *val;
  
-       strstrip(buf);
         if (write_strategy) {
-               if (write_strategy(buf, &tmp)) {
-                       goto out_free;
-               }
+               if (write_strategy(buf, &tmp))
+                       return -EINVAL;
         } else {
                 tmp = simple_strtoull(buf, &end, 10);
                 if (*end != '\0')
-                       goto out_free;
+                       return -EINVAL;
         }
         spin_lock_irqsave(&counter->lock, flags);
         val = res_counter_member(counter, member);
         *val = tmp;
         spin_unlock_irqrestore(&counter->lock, flags);
-       ret = nbytes;
-out_free:
-       kfree(buf);
-out:
-       return ret;
+       return 0;
  }
diff --git a/kernel/sched.c b/kernel/sched.c

index 6acf749d33369545310a94ff0fd52341d821bfd0..0047bd9b96aae2d0d86b0e1f528a29e6f2197b08 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4046,6 +4046,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
                 cpustat->nice = cputime64_add(cpustat->nice, tmp);
         else
                 cpustat->user = cputime64_add(cpustat->user, tmp);
+       /* Account for user time used */
+       acct_update_integrals(p);
  }
  
  /*
diff --git a/kernel/signal.c b/kernel/signal.c

index 6c0958e52ea7bdac212353c2b42bbd59e702ce45..82c3545596c54a6e39bfd5e6120c5c4b2fbe3562 100644 (file)
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -338,13 +338,9 @@ unblock_all_signals(void)
         spin_unlock_irqrestore(&current->sighand->siglock, flags);
  }
  
-static int collect_signal(int sig, struct sigpending *list, siginfo_t *info)
+static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
  {
         struct sigqueue *q, *first = NULL;
-       int still_pending = 0;
-
-       if (unlikely(!sigismember(&list->signal, sig)))
-               return 0;
  
         /*
          * Collect the siginfo appropriate to this signal.  Check if
@@ -352,33 +348,30 @@ static int collect_signal(int sig, struct sigpending *list, siginfo_t *info)
         */
         list_for_each_entry(q, &list->list, list) {
                 if (q->info.si_signo == sig) {
-                       if (first) {
-                               still_pending = 1;
-                               break;
-                       }
+                       if (first)
+                               goto still_pending;
                         first = q;
                 }
         }
+
+       sigdelset(&list->signal, sig);
+
         if (first) {
+still_pending:
                 list_del_init(&first->list);
                 copy_siginfo(info, &first->info);
                 __sigqueue_free(first);
-               if (!still_pending)
-                       sigdelset(&list->signal, sig);
         } else {
-
                 /* Ok, it wasn't in the queue.  This must be
                    a fast-pathed signal or we must have been
                    out of queue space.  So zero out the info.
                  */
-               sigdelset(&list->signal, sig);
                 info->si_signo = sig;
                 info->si_errno = 0;
                 info->si_code = 0;
                 info->si_pid = 0;
                 info->si_uid = 0;
         }
-       return 1;
  }
  
  static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
@@ -396,8 +389,7 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
                         }
                 }
  
-               if (!collect_signal(sig, pending, info))
-                       sig = 0;
+               collect_signal(sig, pending, info);
         }
  
         return sig;
@@ -462,8 +454,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
                  * is to alert stop-signal processing code when another
                  * processor has come along and cleared the flag.
                  */
-               if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT))
-                       tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
+               tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
         }
         if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
                 /*
@@ -1125,7 +1116,7 @@ EXPORT_SYMBOL_GPL(kill_pid_info_as_uid);
   * is probably wrong.  Should make it like BSD or SYSV.
   */
  
-static int kill_something_info(int sig, struct siginfo *info, int pid)
+static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
  {
         int ret;
  
@@ -1237,17 +1228,6 @@ int kill_pid(struct pid *pid, int sig, int priv)
  }
  EXPORT_SYMBOL(kill_pid);
  
-int
-kill_proc(pid_t pid, int sig, int priv)
-{
-       int ret;
-
-       rcu_read_lock();
-       ret = kill_pid_info(sig, __si_special(priv), find_pid(pid));
-       rcu_read_unlock();
-       return ret;
-}
-
  /*
   * These functions support sending signals using preallocated sigqueue
   * structures.  This is needed "because realtime applications cannot
@@ -1379,10 +1359,9 @@ void do_notify_parent(struct task_struct *tsk, int sig)
  
         info.si_uid = tsk->uid;
  
-       /* FIXME: find out whether or not this is supposed to be c*time. */
-       info.si_utime = cputime_to_jiffies(cputime_add(tsk->utime,
+       info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
                                                        tsk->signal->utime));
-       info.si_stime = cputime_to_jiffies(cputime_add(tsk->stime,
+       info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
                                                        tsk->signal->stime));
  
         info.si_status = tsk->exit_code & 0x7f;
@@ -1450,9 +1429,8 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
  
         info.si_uid = tsk->uid;
  
-       /* FIXME: find out whether or not this is supposed to be c*time. */
-       info.si_utime = cputime_to_jiffies(tsk->utime);
-       info.si_stime = cputime_to_jiffies(tsk->stime);
+       info.si_utime = cputime_to_clock_t(tsk->utime);
+       info.si_stime = cputime_to_clock_t(tsk->stime);
  
         info.si_code = why;
         switch (why) {
@@ -1491,10 +1469,10 @@ static inline int may_ptrace_stop(void)
          * is a deadlock situation, and pointless because our tracer
          * is dead so don't allow us to stop.
          * If SIGKILL was already sent before the caller unlocked
-        * ->siglock we must see ->core_waiters != 0. Otherwise it
+        * ->siglock we must see ->core_state != NULL. Otherwise it
          * is safe to enter schedule().
          */
-       if (unlikely(current->mm->core_waiters) &&
+       if (unlikely(current->mm->core_state) &&
             unlikely(current->mm == current->parent->mm))
                 return 0;
  
@@ -1507,9 +1485,8 @@ static inline int may_ptrace_stop(void)
   */
  static int sigkill_pending(struct task_struct *tsk)
  {
-       return ((sigismember(&tsk->pending.signal, SIGKILL) ||
-                sigismember(&tsk->signal->shared_pending.signal, SIGKILL)) &&
-               !unlikely(sigismember(&tsk->blocked, SIGKILL)));
+       return  sigismember(&tsk->pending.signal, SIGKILL) ||
+               sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
  }
  
  /*
@@ -1525,8 +1502,6 @@ static int sigkill_pending(struct task_struct *tsk)
   */
  static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
  {
-       int killed = 0;
-
         if (arch_ptrace_stop_needed(exit_code, info)) {
                 /*
                  * The arch code has something special to do before a
@@ -1542,7 +1517,8 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
                 spin_unlock_irq(&current->sighand->siglock);
                 arch_ptrace_stop(exit_code, info);
                 spin_lock_irq(&current->sighand->siglock);
-               killed = sigkill_pending(current);
+               if (sigkill_pending(current))
+                       return;
         }
  
         /*
@@ -1559,7 +1535,7 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
         __set_current_state(TASK_TRACED);
         spin_unlock_irq(&current->sighand->siglock);
         read_lock(&tasklist_lock);
-       if (!unlikely(killed) && may_ptrace_stop()) {
+       if (may_ptrace_stop()) {
                 do_notify_parent_cldstop(current, CLD_TRAPPED);
                 read_unlock(&tasklist_lock);
                 schedule();
@@ -1658,8 +1634,7 @@ static int do_signal_stop(int signr)
         } else {
                 struct task_struct *t;
  
-               if (unlikely((sig->flags & (SIGNAL_STOP_DEQUEUED | SIGNAL_UNKILLABLE))
-                                        != SIGNAL_STOP_DEQUEUED) ||
+               if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
                     unlikely(signal_group_exit(sig)))
                         return 0;
                 /*
@@ -1920,7 +1895,6 @@ EXPORT_SYMBOL(recalc_sigpending);
  EXPORT_SYMBOL_GPL(dequeue_signal);
  EXPORT_SYMBOL(flush_signals);
  EXPORT_SYMBOL(force_sig);
-EXPORT_SYMBOL(kill_proc);
  EXPORT_SYMBOL(ptrace_notify);
  EXPORT_SYMBOL(send_sig);
  EXPORT_SYMBOL(send_sig_info);
@@ -2196,7 +2170,7 @@ sys_rt_sigtimedwait(const sigset_t __user *uthese,
  }
  
  asmlinkage long
-sys_kill(int pid, int sig)
+sys_kill(pid_t pid, int sig)
  {
         struct siginfo info;
  
@@ -2209,7 +2183,7 @@ sys_kill(int pid, int sig)
         return kill_something_info(sig, &info, pid);
  }
  
-static int do_tkill(int tgid, int pid, int sig)
+static int do_tkill(pid_t tgid, pid_t pid, int sig)
  {
         int error;
         struct siginfo info;
@@ -2255,7 +2229,7 @@ static int do_tkill(int tgid, int pid, int sig)
   *  exists but it's not belonging to the target process anymore. This
   *  method solves the problem of threads exiting and PIDs getting reused.
   */
-asmlinkage long sys_tgkill(int tgid, int pid, int sig)
+asmlinkage long sys_tgkill(pid_t tgid, pid_t pid, int sig)
  {
         /* This is only valid for single tasks */
         if (pid <= 0 || tgid <= 0)
@@ -2268,7 +2242,7 @@ asmlinkage long sys_tgkill(int tgid, int pid, int sig)
   *  Send a signal to only one task, even if it's a CLONE_THREAD task.
   */
  asmlinkage long
-sys_tkill(int pid, int sig)
+sys_tkill(pid_t pid, int sig)
  {
         /* This is only valid for single tasks */
         if (pid <= 0)
@@ -2278,7 +2252,7 @@ sys_tkill(int pid, int sig)
  }
  
  asmlinkage long
-sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo)
+sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo)
  {
         siginfo_t info;
  
diff --git a/kernel/sys.c b/kernel/sys.c

index 14e97282eb6c941605913dc68c2d4e2778cbe3b6..0c9d3fa1f5ffc45d3dfe71daf459ff9994cb2f10 100644 (file)
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1343,8 +1343,6 @@ EXPORT_SYMBOL(in_egroup_p);
  
  DECLARE_RWSEM(uts_sem);
  
-EXPORT_SYMBOL(uts_sem);
-
  asmlinkage long sys_newuname(struct new_utsname __user * name)
  {
         int errno = 0;
@@ -1795,7 +1793,7 @@ int orderly_poweroff(bool force)
                 goto out;
         }
  
-       info = call_usermodehelper_setup(argv[0], argv, envp);
+       info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC);
         if (info == NULL) {
                 argv_free(argv);
                 goto out;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c

index bd66ac5406f398cd0c6544132f9864a3b2dbb3b0..08d6e1bb99ac351f28bfe768acf49c17e04b0593 100644 (file)
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -57,6 +57,7 @@ cond_syscall(compat_sys_set_robust_list);
  cond_syscall(sys_get_robust_list);
  cond_syscall(compat_sys_get_robust_list);
  cond_syscall(sys_epoll_create);
+cond_syscall(sys_epoll_create1);
  cond_syscall(sys_epoll_ctl);
  cond_syscall(sys_epoll_wait);
  cond_syscall(sys_epoll_pwait);
@@ -159,6 +160,7 @@ cond_syscall(sys_ioprio_get);
  cond_syscall(sys_signalfd);
  cond_syscall(sys_signalfd4);
  cond_syscall(compat_sys_signalfd);
+cond_syscall(compat_sys_signalfd4);
  cond_syscall(sys_timerfd_create);
  cond_syscall(sys_timerfd_settime);
  cond_syscall(sys_timerfd_gettime);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c

index 1a8299d1fe5987edbe86665c738162a4e22b16a3..35a50db9b6cee1267b5865f543a8a8d86157a65a 100644 (file)
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -624,7 +624,7 @@ static struct ctl_table kern_table[] = {
         {
                 .ctl_name       = KERN_PRINTK_RATELIMIT,
                 .procname       = "printk_ratelimit",
-               .data           = &printk_ratelimit_jiffies,
+               .data           = &printk_ratelimit_state.interval,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
                 .proc_handler   = &proc_dointvec_jiffies,
@@ -633,7 +633,7 @@ static struct ctl_table kern_table[] = {
         {
                 .ctl_name       = KERN_PRINTK_RATELIMIT_BURST,
                 .procname       = "printk_ratelimit_burst",
-               .data           = &printk_ratelimit_burst,
+               .data           = &printk_ratelimit_state.burst,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
                 .proc_handler   = &proc_dointvec,
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c

index c09350d564f2c4d3baa5b1510ed0d42aeabdc6ba..c35da23ab8fb025f0fd0a03b54f90df3fe246cad 100644 (file)
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -1532,6 +1532,8 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
                         sysctl_check_leaf(namespaces, table, &fail);
                 }
                 sysctl_check_bin_path(table, &fail);
+               if (table->mode > 0777)
+                       set_fail(&fail, table, "bogus .mode");
                 if (fail) {
                         set_fail(&fail, table, NULL);
                         error = -EINVAL;
diff --git a/kernel/taskstats.c b/kernel/taskstats.c

index 06b17547f4e76869f16fedddc00f3a880fc6c364..bd6be76303cf23b4c52b757a93bf1966bdbb4f0a 100644 (file)
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -35,7 +35,7 @@
   */
  #define TASKSTATS_CPUMASK_MAXLEN       (100+6*NR_CPUS)
  
-static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 };
+static DEFINE_PER_CPU(__u32, taskstats_seqnum);
  static int family_registered;
  struct kmem_cache *taskstats_cache;
  
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c

index 63528086337c0145b6e59e4a77c9e80b40686408..ce2d723c10e19d49495da09c585d122538bbbf89 100644 (file)
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -161,7 +161,7 @@ static void timer_notify(struct pt_regs *regs, int cpu)
                 __trace_special(tr, data, 2, regs->ip, 0);
  
                 while (i < sample_max_depth) {
-                       frame.next_fp = 0;
+                       frame.next_fp = NULL;
                         frame.return_address = 0;
                         if (!copy_stack_frame(fp, &frame))
                                 break;
diff --git a/kernel/tsacct.c b/kernel/tsacct.c

index 4ab1b584961b922afacc61f80750d1f0396ac0ec..3da47ccdc5e594b242dc30f1e3471a27822c53f8 100644 (file)
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -28,14 +28,14 @@
  void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
  {
         struct timespec uptime, ts;
-       s64 ac_etime;
+       u64 ac_etime;
  
         BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN);
  
         /* calculate task elapsed time in timespec */
         do_posix_clock_monotonic_gettime(&uptime);
         ts = timespec_sub(uptime, tsk->start_time);
-       /* rebase elapsed time to usec */
+       /* rebase elapsed time to usec (should never be negative) */
         ac_etime = timespec_to_ns(&ts);
         do_div(ac_etime, NSEC_PER_USEC);
         stats->ac_etime = ac_etime;
@@ -84,9 +84,9 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
  {
         struct mm_struct *mm;
  
-       /* convert pages-jiffies to Mbyte-usec */
-       stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB;
-       stats->virtmem = jiffies_to_usecs(p->acct_vm_mem1) * PAGE_SIZE / MB;
+       /* convert pages-usec to Mbyte-usec */
+       stats->coremem = p->acct_rss_mem1 * PAGE_SIZE / MB;
+       stats->virtmem = p->acct_vm_mem1 * PAGE_SIZE / MB;
         mm = get_task_mm(p);
         if (mm) {
                 /* adjust to KB unit */
@@ -118,12 +118,19 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
  void acct_update_integrals(struct task_struct *tsk)
  {
         if (likely(tsk->mm)) {
-               long delta = cputime_to_jiffies(
-                       cputime_sub(tsk->stime, tsk->acct_stimexpd));
+               cputime_t time, dtime;
+               struct timeval value;
+               u64 delta;
+
+               time = tsk->stime + tsk->utime;
+               dtime = cputime_sub(time, tsk->acct_timexpd);
+               jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
+               delta = value.tv_sec;
+               delta = delta * USEC_PER_SEC + value.tv_usec;
  
                 if (delta == 0)
                         return;
-               tsk->acct_stimexpd = tsk->stime;
+               tsk->acct_timexpd = time;
                 tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
                 tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
         }
@@ -135,7 +142,7 @@ void acct_update_integrals(struct task_struct *tsk)
   */
  void acct_clear_integrals(struct task_struct *tsk)
  {
-       tsk->acct_stimexpd = 0;
+       tsk->acct_timexpd = 0;
         tsk->acct_rss_mem1 = 0;
         tsk->acct_vm_mem1 = 0;
  }
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index 6fd158b2102688c254b4dd23614eb8783c078289..ec7e4f62aaff4e71051eefb7d684e8ff4868a5ad 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -125,7 +125,7 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
  }
  
  static void insert_work(struct cpu_workqueue_struct *cwq,
-                               struct work_struct *work, int tail)
+                       struct work_struct *work, struct list_head *head)
  {
         set_wq_data(work, cwq);
         /*
@@ -133,10 +133,7 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
          * result of list_add() below, see try_to_grab_pending().
          */
         smp_wmb();
-       if (tail)
-               list_add_tail(&work->entry, &cwq->worklist);
-       else
-               list_add(&work->entry, &cwq->worklist);
+       list_add_tail(&work->entry, head);
         wake_up(&cwq->more_work);
  }
  
@@ -146,7 +143,7 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
         unsigned long flags;
  
         spin_lock_irqsave(&cwq->lock, flags);
-       insert_work(cwq, work, 1);
+       insert_work(cwq, work, &cwq->worklist);
         spin_unlock_irqrestore(&cwq->lock, flags);
  }
  
@@ -162,14 +159,11 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
   */
  int queue_work(struct workqueue_struct *wq, struct work_struct *work)
  {
-       int ret = 0;
+       int ret;
+
+       ret = queue_work_on(get_cpu(), wq, work);
+       put_cpu();
  
-       if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
-               BUG_ON(!list_empty(&work->entry));
-               __queue_work(wq_per_cpu(wq, get_cpu()), work);
-               put_cpu();
-               ret = 1;
-       }
         return ret;
  }
  EXPORT_SYMBOL_GPL(queue_work);
@@ -361,14 +355,14 @@ static void wq_barrier_func(struct work_struct *work)
  }
  
  static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
-                                       struct wq_barrier *barr, int tail)
+                       struct wq_barrier *barr, struct list_head *head)
  {
         INIT_WORK(&barr->work, wq_barrier_func);
         __set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));
  
         init_completion(&barr->done);
  
-       insert_work(cwq, &barr->work, tail);
+       insert_work(cwq, &barr->work, head);
  }
  
  static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
@@ -388,7 +382,7 @@ static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
                 active = 0;
                 spin_lock_irq(&cwq->lock);
                 if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) {
-                       insert_wq_barrier(cwq, &barr, 1);
+                       insert_wq_barrier(cwq, &barr, &cwq->worklist);
                         active = 1;
                 }
                 spin_unlock_irq(&cwq->lock);
@@ -426,6 +420,57 @@ void flush_workqueue(struct workqueue_struct *wq)
  }
  EXPORT_SYMBOL_GPL(flush_workqueue);
  
+/**
+ * flush_work - block until a work_struct's callback has terminated
+ * @work: the work which is to be flushed
+ *
+ * Returns false if @work has already terminated.
+ *
+ * It is expected that, prior to calling flush_work(), the caller has
+ * arranged for the work to not be requeued, otherwise it doesn't make
+ * sense to use this function.
+ */
+int flush_work(struct work_struct *work)
+{
+       struct cpu_workqueue_struct *cwq;
+       struct list_head *prev;
+       struct wq_barrier barr;
+
+       might_sleep();
+       cwq = get_wq_data(work);
+       if (!cwq)
+               return 0;
+
+       lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+       lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
+
+       prev = NULL;
+       spin_lock_irq(&cwq->lock);
+       if (!list_empty(&work->entry)) {
+               /*
+                * See the comment near try_to_grab_pending()->smp_rmb().
+                * If it was re-queued under us we are not going to wait.
+                */
+               smp_rmb();
+               if (unlikely(cwq != get_wq_data(work)))
+                       goto out;
+               prev = &work->entry;
+       } else {
+               if (cwq->current_work != work)
+                       goto out;
+               prev = &cwq->worklist;
+       }
+       insert_wq_barrier(cwq, &barr, prev->next);
+out:
+       spin_unlock_irq(&cwq->lock);
+       if (!prev)
+               return 0;
+
+       wait_for_completion(&barr.done);
+       return 1;
+}
+EXPORT_SYMBOL_GPL(flush_work);
+
  /*
   * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
   * so this work can't be re-armed in any way.
@@ -473,7 +518,7 @@ static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
  
         spin_lock_irq(&cwq->lock);
         if (unlikely(cwq->current_work == work)) {
-               insert_wq_barrier(cwq, &barr, 0);
+               insert_wq_barrier(cwq, &barr, cwq->worklist.next);
                 running = 1;
         }
         spin_unlock_irq(&cwq->lock);
@@ -644,10 +689,10 @@ int schedule_on_each_cpu(work_func_t func)
                 struct work_struct *work = per_cpu_ptr(works, cpu);
  
                 INIT_WORK(work, func);
-               set_bit(WORK_STRUCT_PENDING, work_data_bits(work));
-               __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), work);
+               schedule_work_on(cpu, work);
         }
-       flush_workqueue(keventd_wq);
+       for_each_online_cpu(cpu)
+               flush_work(per_cpu_ptr(works, cpu));
         put_online_cpus();
         free_percpu(works);
         return 0;
@@ -784,7 +829,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
                 err = create_workqueue_thread(cwq, singlethread_cpu);
                 start_workqueue_thread(cwq, -1);
         } else {
-               get_online_cpus();
+               cpu_maps_update_begin();
                 spin_lock(&workqueue_lock);
                 list_add(&wq->list, &workqueues);
                 spin_unlock(&workqueue_lock);
@@ -796,7 +841,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
                         err = create_workqueue_thread(cwq, cpu);
                         start_workqueue_thread(cwq, cpu);
                 }
-               put_online_cpus();
+               cpu_maps_update_done();
         }
  
         if (err) {
@@ -810,8 +855,8 @@ EXPORT_SYMBOL_GPL(__create_workqueue_key);
  static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
  {
         /*
-        * Our caller is either destroy_workqueue() or CPU_DEAD,
-        * get_online_cpus() protects cwq->thread.
+        * Our caller is either destroy_workqueue() or CPU_POST_DEAD,
+        * cpu_add_remove_lock protects cwq->thread.
          */
         if (cwq->thread == NULL)
                 return;
@@ -821,7 +866,7 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
  
         flush_cpu_workqueue(cwq);
         /*
-        * If the caller is CPU_DEAD and cwq->worklist was not empty,
+        * If the caller is CPU_POST_DEAD and cwq->worklist was not empty,
          * a concurrent flush_workqueue() can insert a barrier after us.
          * However, in that case run_workqueue() won't return and check
          * kthread_should_stop() until it flushes all work_struct's.
@@ -845,14 +890,14 @@ void destroy_workqueue(struct workqueue_struct *wq)
         const cpumask_t *cpu_map = wq_cpu_map(wq);
         int cpu;
  
-       get_online_cpus();
+       cpu_maps_update_begin();
         spin_lock(&workqueue_lock);
         list_del(&wq->list);
         spin_unlock(&workqueue_lock);
  
         for_each_cpu_mask_nr(cpu, *cpu_map)
                 cleanup_workqueue_thread(per_cpu_ptr(wq->cpu_wq, cpu));
-       put_online_cpus();
+       cpu_maps_update_done();
  
         free_percpu(wq->cpu_wq);
         kfree(wq);
@@ -866,6 +911,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
         unsigned int cpu = (unsigned long)hcpu;
         struct cpu_workqueue_struct *cwq;
         struct workqueue_struct *wq;
+       int ret = NOTIFY_OK;
  
         action &= ~CPU_TASKS_FROZEN;
  
@@ -873,7 +919,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
         case CPU_UP_PREPARE:
                 cpu_set(cpu, cpu_populated_map);
         }
-
+undo:
         list_for_each_entry(wq, &workqueues, list) {
                 cwq = per_cpu_ptr(wq->cpu_wq, cpu);
  
@@ -883,7 +929,9 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
                                 break;
                         printk(KERN_ERR "workqueue [%s] for %i failed\n",
                                 wq->name, cpu);
-                       return NOTIFY_BAD;
+                       action = CPU_UP_CANCELED;
+                       ret = NOTIFY_BAD;
+                       goto undo;
  
                 case CPU_ONLINE:
                         start_workqueue_thread(cwq, cpu);
@@ -891,7 +939,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
  
                 case CPU_UP_CANCELED:
                         start_workqueue_thread(cwq, -1);
-               case CPU_DEAD:
+               case CPU_POST_DEAD:
                         cleanup_workqueue_thread(cwq);
                         break;
                 }
@@ -899,11 +947,11 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
  
         switch (action) {
         case CPU_UP_CANCELED:
-       case CPU_DEAD:
+       case CPU_POST_DEAD:
                 cpu_clear(cpu, cpu_populated_map);
         }
  
-       return NOTIFY_OK;
+       return ret;
  }
  
  void __init init_workqueues(void)
diff --git a/lib/cmdline.c b/lib/cmdline.c

index f596c08d213a0726c5f395209a3e20d730c3ea2c..5ba8a942a478fe8d75f895ea7f26f876810772a5 100644 (file)
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -116,7 +116,7 @@ char *get_options(const char *str, int nints, int *ints)
  /**
   *     memparse - parse a string with mem suffixes into a number
   *     @ptr: Where parse begins
- *     @retptr: (output) Pointer to next char after parse completes
+ *     @retptr: (output) Optional pointer to next char after parse completes
   *
   *     Parses a string into a number.  The number stored at @ptr is
   *     potentially suffixed with %K (for kilobytes, or 1024 bytes),
@@ -126,11 +126,13 @@ char *get_options(const char *str, int nints, int *ints)
   *     megabyte, or one gigabyte, respectively.
   */
  
-unsigned long long memparse (char *ptr, char **retptr)
+unsigned long long memparse(char *ptr, char **retptr)
  {
-       unsigned long long ret = simple_strtoull (ptr, retptr, 0);
+       char *endptr;   /* local pointer to end of parsed string */
  
-       switch (**retptr) {
+       unsigned long long ret = simple_strtoull(ptr, &endptr, 0);
+
+       switch (*endptr) {
         case 'G':
         case 'g':
                 ret <<= 10;
@@ -140,10 +142,14 @@ unsigned long long memparse (char *ptr, char **retptr)
         case 'K':
         case 'k':
                 ret <<= 10;
-               (*retptr)++;
+               endptr++;
         default:
                 break;
         }
+
+       if (retptr)
+               *retptr = endptr;
+
         return ret;
  }
  
diff --git a/lib/idr.c b/lib/idr.c

index 7a02e173f02773c2bc5fc3bfa763e269ef1b9f7a..3476f8203e975f5fcba7df106ac288aae9524d4c 100644 (file)
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -6,6 +6,8 @@
   * Modified by George Anzinger to reuse immediately and to use
   * find bit instructions.  Also removed _irq on spinlocks.
   *
+ * Modified by Nadia Derbey to make it RCU safe.
+ *
   * Small id to pointer translation service.
   *
   * It uses a radix tree like structure as a sparse array indexed
@@ -35,7 +37,7 @@
  
  static struct kmem_cache *idr_layer_cache;
  
-static struct idr_layer *alloc_layer(struct idr *idp)
+static struct idr_layer *get_from_free_list(struct idr *idp)
  {
         struct idr_layer *p;
         unsigned long flags;
@@ -50,15 +52,28 @@ static struct idr_layer *alloc_layer(struct idr *idp)
         return(p);
  }
  
+static void idr_layer_rcu_free(struct rcu_head *head)
+{
+       struct idr_layer *layer;
+
+       layer = container_of(head, struct idr_layer, rcu_head);
+       kmem_cache_free(idr_layer_cache, layer);
+}
+
+static inline void free_layer(struct idr_layer *p)
+{
+       call_rcu(&p->rcu_head, idr_layer_rcu_free);
+}
+
  /* only called when idp->lock is held */
-static void __free_layer(struct idr *idp, struct idr_layer *p)
+static void __move_to_free_list(struct idr *idp, struct idr_layer *p)
  {
         p->ary[0] = idp->id_free;
         idp->id_free = p;
         idp->id_free_cnt++;
  }
  
-static void free_layer(struct idr *idp, struct idr_layer *p)
+static void move_to_free_list(struct idr *idp, struct idr_layer *p)
  {
         unsigned long flags;
  
@@ -66,7 +81,7 @@ static void free_layer(struct idr *idp, struct idr_layer *p)
          * Depends on the return element being zeroed.
          */
         spin_lock_irqsave(&idp->lock, flags);
-       __free_layer(idp, p);
+       __move_to_free_list(idp, p);
         spin_unlock_irqrestore(&idp->lock, flags);
  }
  
@@ -96,7 +111,7 @@ static void idr_mark_full(struct idr_layer **pa, int id)
   * @gfp_mask:  memory allocation flags
   *
   * This function should be called prior to locking and calling the
- * following function.  It preallocates enough memory to satisfy
+ * idr_get_new* functions. It preallocates enough memory to satisfy
   * the worst possible allocation.
   *
   * If the system is REALLY out of memory this function returns 0,
@@ -109,7 +124,7 @@ int idr_pre_get(struct idr *idp, gfp_t gfp_mask)
                 new = kmem_cache_alloc(idr_layer_cache, gfp_mask);
                 if (new == NULL)
                         return (0);
-               free_layer(idp, new);
+               move_to_free_list(idp, new);
         }
         return 1;
  }
@@ -143,7 +158,7 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
                         /* if already at the top layer, we need to grow */
                         if (!(p = pa[l])) {
                                 *starting_id = id;
-                               return -2;
+                               return IDR_NEED_TO_GROW;
                         }
  
                         /* If we need to go up one layer, continue the
@@ -160,16 +175,17 @@ static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
                         id = ((id >> sh) ^ n ^ m) << sh;
                 }
                 if ((id >= MAX_ID_BIT) || (id < 0))
-                       return -3;
+                       return IDR_NOMORE_SPACE;
                 if (l == 0)
                         break;
                 /*
                  * Create the layer below if it is missing.
                  */
                 if (!p->ary[m]) {
-                       if (!(new = alloc_layer(idp)))
+                       new = get_from_free_list(idp);
+                       if (!new)
                                 return -1;
-                       p->ary[m] = new;
+                       rcu_assign_pointer(p->ary[m], new);
                         p->count++;
                 }
                 pa[l--] = p;
@@ -192,7 +208,7 @@ build_up:
         p = idp->top;
         layers = idp->layers;
         if (unlikely(!p)) {
-               if (!(p = alloc_layer(idp)))
+               if (!(p = get_from_free_list(idp)))
                         return -1;
                 layers = 1;
         }
@@ -204,7 +220,7 @@ build_up:
                 layers++;
                 if (!p->count)
                         continue;
-               if (!(new = alloc_layer(idp))) {
+               if (!(new = get_from_free_list(idp))) {
                         /*
                          * The allocation failed.  If we built part of
                          * the structure tear it down.
@@ -214,7 +230,7 @@ build_up:
                                 p = p->ary[0];
                                 new->ary[0] = NULL;
                                 new->bitmap = new->count = 0;
-                               __free_layer(idp, new);
+                               __move_to_free_list(idp, new);
                         }
                         spin_unlock_irqrestore(&idp->lock, flags);
                         return -1;
@@ -225,10 +241,10 @@ build_up:
                         __set_bit(0, &new->bitmap);
                 p = new;
         }
-       idp->top = p;
+       rcu_assign_pointer(idp->top, p);
         idp->layers = layers;
         v = sub_alloc(idp, &id, pa);
-       if (v == -2)
+       if (v == IDR_NEED_TO_GROW)
                 goto build_up;
         return(v);
  }
@@ -244,7 +260,8 @@ static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id)
                  * Successfully found an empty slot.  Install the user
                  * pointer and mark the slot full.
                  */
-               pa[0]->ary[id & IDR_MASK] = (struct idr_layer *)ptr;
+               rcu_assign_pointer(pa[0]->ary[id & IDR_MASK],
+                               (struct idr_layer *)ptr);
                 pa[0]->count++;
                 idr_mark_full(pa, id);
         }
@@ -277,12 +294,8 @@ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id)
          * This is a cheap hack until the IDR code can be fixed to
          * return proper error values.
          */
-       if (rv < 0) {
-               if (rv == -1)
-                       return -EAGAIN;
-               else /* Will be -3 */
-                       return -ENOSPC;
-       }
+       if (rv < 0)
+               return _idr_rc_to_errno(rv);
         *id = rv;
         return 0;
  }
@@ -312,12 +325,8 @@ int idr_get_new(struct idr *idp, void *ptr, int *id)
          * This is a cheap hack until the IDR code can be fixed to
          * return proper error values.
          */
-       if (rv < 0) {
-               if (rv == -1)
-                       return -EAGAIN;
-               else /* Will be -3 */
-                       return -ENOSPC;
-       }
+       if (rv < 0)
+               return _idr_rc_to_errno(rv);
         *id = rv;
         return 0;
  }
@@ -325,7 +334,8 @@ EXPORT_SYMBOL(idr_get_new);
  
  static void idr_remove_warning(int id)
  {
-       printk("idr_remove called for id=%d which is not allocated.\n", id);
+       printk(KERN_WARNING
+               "idr_remove called for id=%d which is not allocated.\n", id);
         dump_stack();
  }
  
@@ -334,6 +344,7 @@ static void sub_remove(struct idr *idp, int shift, int id)
         struct idr_layer *p = idp->top;
         struct idr_layer **pa[MAX_LEVEL];
         struct idr_layer ***paa = &pa[0];
+       struct idr_layer *to_free;
         int n;
  
         *paa = NULL;
@@ -349,13 +360,18 @@ static void sub_remove(struct idr *idp, int shift, int id)
         n = id & IDR_MASK;
         if (likely(p != NULL && test_bit(n, &p->bitmap))){
                 __clear_bit(n, &p->bitmap);
-               p->ary[n] = NULL;
+               rcu_assign_pointer(p->ary[n], NULL);
+               to_free = NULL;
                 while(*paa && ! --((**paa)->count)){
-                       free_layer(idp, **paa);
+                       if (to_free)
+                               free_layer(to_free);
+                       to_free = **paa;
                         **paa-- = NULL;
                 }
                 if (!*paa)
                         idp->layers = 0;
+               if (to_free)
+                       free_layer(to_free);
         } else
                 idr_remove_warning(id);
  }
@@ -368,22 +384,34 @@ static void sub_remove(struct idr *idp, int shift, int id)
  void idr_remove(struct idr *idp, int id)
  {
         struct idr_layer *p;
+       struct idr_layer *to_free;
  
         /* Mask off upper bits we don't use for the search. */
         id &= MAX_ID_MASK;
  
         sub_remove(idp, (idp->layers - 1) * IDR_BITS, id);
         if (idp->top && idp->top->count == 1 && (idp->layers > 1) &&
-           idp->top->ary[0]) {  // We can drop a layer
-
+           idp->top->ary[0]) {
+               /*
+                * Single child at leftmost slot: we can shrink the tree.
+                * This level is not needed anymore since when layers are
+                * inserted, they are inserted at the top of the existing
+                * tree.
+                */
+               to_free = idp->top;
                 p = idp->top->ary[0];
-               idp->top->bitmap = idp->top->count = 0;
-               free_layer(idp, idp->top);
-               idp->top = p;
+               rcu_assign_pointer(idp->top, p);
                 --idp->layers;
+               to_free->bitmap = to_free->count = 0;
+               free_layer(to_free);
         }
         while (idp->id_free_cnt >= IDR_FREE_MAX) {
-               p = alloc_layer(idp);
+               p = get_from_free_list(idp);
+               /*
+                * Note: we don't call the rcu callback here, since the only
+                * layers that fall into the freelist are those that have been
+                * preallocated.
+                */
                 kmem_cache_free(idr_layer_cache, p);
         }
         return;
@@ -424,15 +452,13 @@ void idr_remove_all(struct idr *idp)
  
                 id += 1 << n;
                 while (n < fls(id)) {
-                       if (p) {
-                               memset(p, 0, sizeof *p);
-                               free_layer(idp, p);
-                       }
+                       if (p)
+                               free_layer(p);
                         n += IDR_BITS;
                         p = *--paa;
                 }
         }
-       idp->top = NULL;
+       rcu_assign_pointer(idp->top, NULL);
         idp->layers = 0;
  }
  EXPORT_SYMBOL(idr_remove_all);
@@ -444,7 +470,7 @@ EXPORT_SYMBOL(idr_remove_all);
  void idr_destroy(struct idr *idp)
  {
         while (idp->id_free_cnt) {
-               struct idr_layer *p = alloc_layer(idp);
+               struct idr_layer *p = get_from_free_list(idp);
                 kmem_cache_free(idr_layer_cache, p);
         }
  }
@@ -459,7 +485,8 @@ EXPORT_SYMBOL(idr_destroy);
   * return indicates that @id is not valid or you passed %NULL in
   * idr_get_new().
   *
- * The caller must serialize idr_find() vs idr_get_new() and idr_remove().
+ * This function can be called under rcu_read_lock(), given that the leaf
+ * pointers lifetimes are correctly managed.
   */
  void *idr_find(struct idr *idp, int id)
  {
@@ -467,7 +494,7 @@ void *idr_find(struct idr *idp, int id)
         struct idr_layer *p;
  
         n = idp->layers * IDR_BITS;
-       p = idp->top;
+       p = rcu_dereference(idp->top);
  
         /* Mask off upper bits we don't use for the search. */
         id &= MAX_ID_MASK;
@@ -477,7 +504,7 @@ void *idr_find(struct idr *idp, int id)
  
         while (n > 0 && p) {
                 n -= IDR_BITS;
-               p = p->ary[(id >> n) & IDR_MASK];
+               p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
         }
         return((void *)p);
  }
@@ -510,7 +537,7 @@ int idr_for_each(struct idr *idp,
         struct idr_layer **paa = &pa[0];
  
         n = idp->layers * IDR_BITS;
-       p = idp->top;
+       p = rcu_dereference(idp->top);
         max = 1 << n;
  
         id = 0;
@@ -518,7 +545,7 @@ int idr_for_each(struct idr *idp,
                 while (n > 0 && p) {
                         n -= IDR_BITS;
                         *paa++ = p;
-                       p = p->ary[(id >> n) & IDR_MASK];
+                       p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
                 }
  
                 if (p) {
@@ -548,7 +575,7 @@ EXPORT_SYMBOL(idr_for_each);
   * A -ENOENT return indicates that @id was not found.
   * A -EINVAL return indicates that @id was not within valid constraints.
   *
- * The caller must serialize vs idr_find(), idr_get_new(), and idr_remove().
+ * The caller must serialize with writers.
   */
  void *idr_replace(struct idr *idp, void *ptr, int id)
  {
@@ -574,7 +601,7 @@ void *idr_replace(struct idr *idp, void *ptr, int id)
                 return ERR_PTR(-ENOENT);
  
         old_p = p->ary[n];
-       p->ary[n] = ptr;
+       rcu_assign_pointer(p->ary[n], ptr);
  
         return old_p;
  }
@@ -694,12 +721,8 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
   restart:
         /* get vacant slot */
         t = idr_get_empty_slot(&ida->idr, idr_id, pa);
-       if (t < 0) {
-               if (t == -1)
-                       return -EAGAIN;
-               else /* will be -3 */
-                       return -ENOSPC;
-       }
+       if (t < 0)
+               return _idr_rc_to_errno(t);
  
         if (t * IDA_BITMAP_BITS >= MAX_ID_BIT)
                 return -ENOSPC;
@@ -720,7 +743,8 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
                         return -EAGAIN;
  
                 memset(bitmap, 0, sizeof(struct ida_bitmap));
-               pa[0]->ary[idr_id & IDR_MASK] = (void *)bitmap;
+               rcu_assign_pointer(pa[0]->ary[idr_id & IDR_MASK],
+                               (void *)bitmap);
                 pa[0]->count++;
         }
  
@@ -749,7 +773,7 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
          * allocation.
          */
         if (ida->idr.id_free_cnt || ida->free_bitmap) {
-               struct idr_layer *p = alloc_layer(&ida->idr);
+               struct idr_layer *p = get_from_free_list(&ida->idr);
                 if (p)
                         kmem_cache_free(idr_layer_cache, p);
         }
diff --git a/lib/inflate.c b/lib/inflate.c

index 9762294be06257b6d6e6873dbd6b0503f21029c1..1a8e8a97812817f12460dbc880ce8fd4c53cf1cf 100644 (file)
--- a/lib/inflate.c
+++ b/lib/inflate.c
@@ -230,6 +230,45 @@ STATIC const ush mask_bits[] = {
  #define NEEDBITS(n) {while(k<(n)){b|=((ulg)NEXTBYTE())<<k;k+=8;}}
  #define DUMPBITS(n) {b>>=(n);k-=(n);}
  
+#ifndef NO_INFLATE_MALLOC
+/* A trivial malloc implementation, adapted from
+ *  malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
+ */
+
+static unsigned long malloc_ptr;
+static int malloc_count;
+
+static void *malloc(int size)
+{
+       void *p;
+
+       if (size < 0)
+               error("Malloc error");
+       if (!malloc_ptr)
+               malloc_ptr = free_mem_ptr;
+
+       malloc_ptr = (malloc_ptr + 3) & ~3;     /* Align */
+
+       p = (void *)malloc_ptr;
+       malloc_ptr += size;
+
+       if (free_mem_end_ptr && malloc_ptr >= free_mem_end_ptr)
+               error("Out of memory");
+
+       malloc_count++;
+       return p;
+}
+
+static void free(void *where)
+{
+       malloc_count--;
+       if (!malloc_count)
+               malloc_ptr = free_mem_ptr;
+}
+#else
+#define malloc(a) kmalloc(a, GFP_KERNEL)
+#define free(a) kfree(a)
+#endif
  
  /*
     Huffman code decoding is performed using a multi-level table lookup.
@@ -1045,7 +1084,6 @@ STATIC int INIT inflate(void)
    int e;                /* last block flag */
    int r;                /* result code */
    unsigned h;           /* maximum struct huft's malloc'ed */
-  void *ptr;
  
    /* initialize window, bit buffer */
    wp = 0;
@@ -1057,12 +1095,12 @@ STATIC int INIT inflate(void)
    h = 0;
    do {
      hufts = 0;
-    gzip_mark(&ptr);
-    if ((r = inflate_block(&e)) != 0) {
-      gzip_release(&ptr);          
-      return r;
-    }
-    gzip_release(&ptr);
+#ifdef ARCH_HAS_DECOMP_WDOG
+    arch_decomp_wdog();
+#endif
+    r = inflate_block(&e);
+    if (r)
+           return r;
      if (hufts > h)
        h = hufts;
    } while (!e);
diff --git a/lib/kobject.c b/lib/kobject.c

index 744401571ed76cbd48717f1aa1970804fc33cd9a..bd732ffebc85f36cfc30e70fa124f0566701ecba 100644 (file)
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -164,9 +164,8 @@ static int kobject_add_internal(struct kobject *kobj)
                 return -ENOENT;
  
         if (!kobj->name || !kobj->name[0]) {
-               pr_debug("kobject: (%p): attempted to be registered with empty "
+               WARN(1, "kobject: (%p): attempted to be registered with empty "
                          "name!\n", kobj);
-               WARN_ON(1);
                 return -EINVAL;
         }
  
@@ -583,12 +582,10 @@ static void kobject_release(struct kref *kref)
  void kobject_put(struct kobject *kobj)
  {
         if (kobj) {
-               if (!kobj->state_initialized) {
-                       printk(KERN_WARNING "kobject: '%s' (%p): is not "
+               if (!kobj->state_initialized)
+                       WARN(1, KERN_WARNING "kobject: '%s' (%p): is not "
                                "initialized, yet kobject_put() is being "
                                "called.\n", kobject_name(kobj), kobj);
-                       WARN_ON(1);
-               }
                 kref_put(&kobj->kref, kobject_release);
         }
  }
diff --git a/lib/list_debug.c b/lib/list_debug.c

index 4350ba9655bd182c966c1fc40909ace292c8a4a6..1a39f4e3ae1f1a93a8cb87cae13ce77b5c475d96 100644 (file)
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -20,18 +20,14 @@ void __list_add(struct list_head *new,
                               struct list_head *prev,
                               struct list_head *next)
  {
-       if (unlikely(next->prev != prev)) {
-               printk(KERN_ERR "list_add corruption. next->prev should be "
-                       "prev (%p), but was %p. (next=%p).\n",
-                       prev, next->prev, next);
-               BUG();
-       }
-       if (unlikely(prev->next != next)) {
-               printk(KERN_ERR "list_add corruption. prev->next should be "
-                       "next (%p), but was %p. (prev=%p).\n",
-                       next, prev->next, prev);
-               BUG();
-       }
+       WARN(next->prev != prev,
+               "list_add corruption. next->prev should be "
+               "prev (%p), but was %p. (next=%p).\n",
+               prev, next->prev, next);
+       WARN(prev->next != next,
+               "list_add corruption. prev->next should be "
+               "next (%p), but was %p. (prev=%p).\n",
+               next, prev->next, prev);
         next->prev = new;
         new->next = next;
         new->prev = prev;
@@ -39,20 +35,6 @@ void __list_add(struct list_head *new,
  }
  EXPORT_SYMBOL(__list_add);
  
-/**
- * list_add - add a new entry
- * @new: new entry to be added
- * @head: list head to add it after
- *
- * Insert a new entry after the specified head.
- * This is good for implementing stacks.
- */
-void list_add(struct list_head *new, struct list_head *head)
-{
-       __list_add(new, head, head->next);
-}
-EXPORT_SYMBOL(list_add);
-
  /**
   * list_del - deletes entry from list.
   * @entry: the element to delete from the list.
@@ -61,16 +43,12 @@ EXPORT_SYMBOL(list_add);
   */
  void list_del(struct list_head *entry)
  {
-       if (unlikely(entry->prev->next != entry)) {
-               printk(KERN_ERR "list_del corruption. prev->next should be %p, "
-                               "but was %p\n", entry, entry->prev->next);
-               BUG();
-       }
-       if (unlikely(entry->next->prev != entry)) {
-               printk(KERN_ERR "list_del corruption. next->prev should be %p, "
-                               "but was %p\n", entry, entry->next->prev);
-               BUG();
-       }
+       WARN(entry->prev->next != entry,
+               "list_del corruption. prev->next should be %p, "
+               "but was %p\n", entry, entry->prev->next);
+       WARN(entry->next->prev != entry,
+               "list_del corruption. next->prev should be %p, "
+               "but was %p\n", entry, entry->next->prev);
         __list_del(entry->prev, entry->next);
         entry->next = LIST_POISON1;
         entry->prev = LIST_POISON2;
diff --git a/lib/lzo/lzo1x_decompress.c b/lib/lzo/lzo1x_decompress.c

index 77f0f9b775a9c12ab2c1b42aaac31a0cf883e1d7..5dc6b29c1575f80903a3a9facc55642facbd0462 100644 (file)
--- a/lib/lzo/lzo1x_decompress.c
+++ b/lib/lzo/lzo1x_decompress.c
@@ -138,8 +138,7 @@ match:
                                         t += 31 + *ip++;
                                 }
                                 m_pos = op - 1;
-                               m_pos -= le16_to_cpu(get_unaligned(
-                                       (const unsigned short *)ip)) >> 2;
+                               m_pos -= get_unaligned_le16(ip) >> 2;
                                 ip += 2;
                         } else if (t >= 16) {
                                 m_pos = op;
@@ -157,8 +156,7 @@ match:
                                         }
                                         t += 7 + *ip++;
                                 }
-                               m_pos -= le16_to_cpu(get_unaligned(
-                                       (const unsigned short *)ip)) >> 2;
+                               m_pos -= get_unaligned_le16(ip) >> 2;
                                 ip += 2;
                                 if (m_pos == op)
                                         goto eof_found;
diff --git a/lib/ratelimit.c b/lib/ratelimit.c

index 485e3040dcd4cc7eedc1f5981fd4b8bd572572b6..35136671b215f68222f498e086248fb8df51e64b 100644 (file)
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -3,6 +3,9 @@
   *
   * Isolated from kernel/printk.c by Dave Young <hidave.darkstar@gmail.com>
   *
+ * 2008-05-01 rewrite the function and use a ratelimit_state data struct as
+ * parameter. Now every user can use their own standalone ratelimit_state.
+ *
   * This file is released under the GPLv2.
   *
   */
@@ -11,41 +14,43 @@
  #include <linux/jiffies.h>
  #include <linux/module.h>
  
+static DEFINE_SPINLOCK(ratelimit_lock);
+static unsigned long flags;
+
  /*
   * __ratelimit - rate limiting
- * @ratelimit_jiffies: minimum time in jiffies between two callbacks
- * @ratelimit_burst: number of callbacks we do before ratelimiting
+ * @rs: ratelimit_state data
   *
- * This enforces a rate limit: not more than @ratelimit_burst callbacks
- * in every ratelimit_jiffies
+ * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks
+ * in every @rs->ratelimit_jiffies
   */
-int __ratelimit(int ratelimit_jiffies, int ratelimit_burst)
+int __ratelimit(struct ratelimit_state *rs)
  {
-       static DEFINE_SPINLOCK(ratelimit_lock);
-       static unsigned toks = 10 * 5 * HZ;
-       static unsigned long last_msg;
-       static int missed;
-       unsigned long flags;
-       unsigned long now = jiffies;
+       if (!rs->interval)
+               return 1;
  
         spin_lock_irqsave(&ratelimit_lock, flags);
-       toks += now - last_msg;
-       last_msg = now;
-       if (toks > (ratelimit_burst * ratelimit_jiffies))
-               toks = ratelimit_burst * ratelimit_jiffies;
-       if (toks >= ratelimit_jiffies) {
-               int lost = missed;
+       if (!rs->begin)
+               rs->begin = jiffies;
  
-               missed = 0;
-               toks -= ratelimit_jiffies;
-               spin_unlock_irqrestore(&ratelimit_lock, flags);
-               if (lost)
-                       printk(KERN_WARNING "%s: %d messages suppressed\n",
-                               __func__, lost);
-               return 1;
+       if (time_is_before_jiffies(rs->begin + rs->interval)) {
+               if (rs->missed)
+                       printk(KERN_WARNING "%s: %d callbacks suppressed\n",
+                               __func__, rs->missed);
+               rs->begin = 0;
+               rs->printed = 0;
+               rs->missed = 0;
         }
-       missed++;
+       if (rs->burst && rs->burst > rs->printed)
+               goto print;
+
+       rs->missed++;
         spin_unlock_irqrestore(&ratelimit_lock, flags);
         return 0;
+
+print:
+       rs->printed++;
+       spin_unlock_irqrestore(&ratelimit_lock, flags);
+       return 1;
  }
  EXPORT_SYMBOL(__ratelimit);
diff --git a/mm/filemap.c b/mm/filemap.c

index 7675b91f4f63c96fac54ae5b0027790e3cf384e1..2d3ec1ffc66e1621b3db99a4105fb92115804e82 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -115,7 +115,7 @@ void __remove_from_page_cache(struct page *page)
  {
         struct address_space *mapping = page->mapping;
  
-       mem_cgroup_uncharge_page(page);
+       mem_cgroup_uncharge_cache_page(page);
         radix_tree_delete(&mapping->page_tree, page->index);
         page->mapping = NULL;
         mapping->nrpages--;
@@ -474,12 +474,12 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
                         mapping->nrpages++;
                         __inc_zone_page_state(page, NR_FILE_PAGES);
                 } else
-                       mem_cgroup_uncharge_page(page);
+                       mem_cgroup_uncharge_cache_page(page);
  
                 write_unlock_irq(&mapping->tree_lock);
                 radix_tree_preload_end();
         } else
-               mem_cgroup_uncharge_page(page);
+               mem_cgroup_uncharge_cache_page(page);
  out:
         return error;
  }
@@ -2563,9 +2563,8 @@ EXPORT_SYMBOL(generic_file_aio_write);
   * Otherwise return zero.
   *
   * The @gfp_mask argument specifies whether I/O may be performed to release
- * this page (__GFP_IO), and whether the call may block (__GFP_WAIT).
+ * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS).
   *
- * NOTE: @gfp_mask may go away, and this function may become non-blocking.
   */
  int try_to_release_page(struct page *page, gfp_t gfp_mask)
  {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index 41341c414194b6e6fbc25b92bd8ae4eb1ca384d6..a8bf4ab01f86b5d0b2bab44bccb4ec5351a23d79 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1026,6 +1026,17 @@ static void __init report_hugepages(void)
         }
  }
  
+static unsigned int cpuset_mems_nr(unsigned int *array)
+{
+       int node;
+       unsigned int nr = 0;
+
+       for_each_node_mask(node, cpuset_current_mems_allowed)
+               nr += array[node];
+
+       return nr;
+}
+
  #ifdef CONFIG_SYSCTL
  #ifdef CONFIG_HIGHMEM
  static void try_to_free_low(struct hstate *h, unsigned long count)
@@ -1375,17 +1386,6 @@ static int __init hugetlb_default_setup(char *s)
  }
  __setup("default_hugepagesz=", hugetlb_default_setup);
  
-static unsigned int cpuset_mems_nr(unsigned int *array)
-{
-       int node;
-       unsigned int nr = 0;
-
-       for_each_node_mask(node, cpuset_current_mems_allowed)
-               nr += array[node];
-
-       return nr;
-}
-
  int hugetlb_sysctl_handler(struct ctl_table *table, int write,
                            struct file *file, void __user *buffer,
                            size_t *length, loff_t *ppos)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index e46451e1d9b793563b08d8ef015927b484b0be1d..fba566c513220dcf8c30133a617e5d27328a23f2 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -35,9 +35,9 @@
  
  #include <asm/uaccess.h>
  
-struct cgroup_subsys mem_cgroup_subsys;
-static const int MEM_CGROUP_RECLAIM_RETRIES = 5;
-static struct kmem_cache *page_cgroup_cache;
+struct cgroup_subsys mem_cgroup_subsys __read_mostly;
+static struct kmem_cache *page_cgroup_cache __read_mostly;
+#define MEM_CGROUP_RECLAIM_RETRIES     5
  
  /*
   * Statistics for memory cgroup.
@@ -166,7 +166,6 @@ struct page_cgroup {
         struct list_head lru;           /* per cgroup LRU list */
         struct page *page;
         struct mem_cgroup *mem_cgroup;
-       int ref_cnt;                    /* cached, mapped, migrating */
         int flags;
  };
  #define PAGE_CGROUP_FLAG_CACHE (0x1)   /* charged as cache */
@@ -185,6 +184,7 @@ static enum zone_type page_cgroup_zid(struct page_cgroup *pc)
  enum charge_type {
         MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
         MEM_CGROUP_CHARGE_TYPE_MAPPED,
+       MEM_CGROUP_CHARGE_TYPE_FORCE,   /* used by force_empty */
  };
  
  /*
@@ -296,7 +296,7 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
                 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1;
  
         mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false);
-       list_del_init(&pc->lru);
+       list_del(&pc->lru);
  }
  
  static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
@@ -354,6 +354,9 @@ void mem_cgroup_move_lists(struct page *page, bool active)
         struct mem_cgroup_per_zone *mz;
         unsigned long flags;
  
+       if (mem_cgroup_subsys.disabled)
+               return;
+
         /*
          * We cannot lock_page_cgroup while holding zone's lru_lock,
          * because other holders of lock_page_cgroup can be interrupted
@@ -524,7 +527,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
   * < 0 if the cgroup is over its limit
   */
  static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
-                               gfp_t gfp_mask, enum charge_type ctype)
+                               gfp_t gfp_mask, enum charge_type ctype,
+                               struct mem_cgroup *memcg)
  {
         struct mem_cgroup *mem;
         struct page_cgroup *pc;
@@ -532,35 +536,8 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
         unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
         struct mem_cgroup_per_zone *mz;
  
-       if (mem_cgroup_subsys.disabled)
-               return 0;
-
-       /*
-        * Should page_cgroup's go to their own slab?
-        * One could optimize the performance of the charging routine
-        * by saving a bit in the page_flags and using it as a lock
-        * to see if the cgroup page already has a page_cgroup associated
-        * with it
-        */
-retry:
-       lock_page_cgroup(page);
-       pc = page_get_page_cgroup(page);
-       /*
-        * The page_cgroup exists and
-        * the page has already been accounted.
-        */
-       if (pc) {
-               VM_BUG_ON(pc->page != page);
-               VM_BUG_ON(pc->ref_cnt <= 0);
-
-               pc->ref_cnt++;
-               unlock_page_cgroup(page);
-               goto done;
-       }
-       unlock_page_cgroup(page);
-
-       pc = kmem_cache_zalloc(page_cgroup_cache, gfp_mask);
-       if (pc == NULL)
+       pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask);
+       if (unlikely(pc == NULL))
                 goto err;
  
         /*
@@ -569,16 +546,18 @@ retry:
          * thread group leader migrates. It's possible that mm is not
          * set, if so charge the init_mm (happens for pagecache usage).
          */
-       if (!mm)
-               mm = &init_mm;
-
-       rcu_read_lock();
-       mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
-       /*
-        * For every charge from the cgroup, increment reference count
-        */
-       css_get(&mem->css);
-       rcu_read_unlock();
+       if (likely(!memcg)) {
+               rcu_read_lock();
+               mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
+               /*
+                * For every charge from the cgroup, increment reference count
+                */
+               css_get(&mem->css);
+               rcu_read_unlock();
+       } else {
+               mem = memcg;
+               css_get(&memcg->css);
+       }
  
         while (res_counter_charge(&mem->res, PAGE_SIZE)) {
                 if (!(gfp_mask & __GFP_WAIT))
@@ -603,25 +582,24 @@ retry:
                 }
         }
  
-       pc->ref_cnt = 1;
         pc->mem_cgroup = mem;
         pc->page = page;
-       pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
+       /*
+        * If a page is accounted as a page cache, insert to inactive list.
+        * If anon, insert to active list.
+        */
         if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE)
                 pc->flags = PAGE_CGROUP_FLAG_CACHE;
+       else
+               pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
  
         lock_page_cgroup(page);
-       if (page_get_page_cgroup(page)) {
+       if (unlikely(page_get_page_cgroup(page))) {
                 unlock_page_cgroup(page);
-               /*
-                * Another charge has been added to this page already.
-                * We take lock_page_cgroup(page) again and read
-                * page->cgroup, increment refcnt.... just retry is OK.
-                */
                 res_counter_uncharge(&mem->res, PAGE_SIZE);
                 css_put(&mem->css);
                 kmem_cache_free(page_cgroup_cache, pc);
-               goto retry;
+               goto done;
         }
         page_assign_page_cgroup(page, pc);
  
@@ -642,24 +620,65 @@ err:
  
  int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
  {
+       if (mem_cgroup_subsys.disabled)
+               return 0;
+
+       /*
+        * If already mapped, we don't have to account.
+        * If page cache, page->mapping has address_space.
+        * But page->mapping may have out-of-use anon_vma pointer,
+        * detecit it by PageAnon() check. newly-mapped-anon's page->mapping
+        * is NULL.
+        */
+       if (page_mapped(page) || (page->mapping && !PageAnon(page)))
+               return 0;
+       if (unlikely(!mm))
+               mm = &init_mm;
         return mem_cgroup_charge_common(page, mm, gfp_mask,
-                               MEM_CGROUP_CHARGE_TYPE_MAPPED);
+                               MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
  }
  
  int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
                                 gfp_t gfp_mask)
  {
-       if (!mm)
+       if (mem_cgroup_subsys.disabled)
+               return 0;
+
+       /*
+        * Corner case handling. This is called from add_to_page_cache()
+        * in usual. But some FS (shmem) precharges this page before calling it
+        * and call add_to_page_cache() with GFP_NOWAIT.
+        *
+        * For GFP_NOWAIT case, the page may be pre-charged before calling
+        * add_to_page_cache(). (See shmem.c) check it here and avoid to call
+        * charge twice. (It works but has to pay a bit larger cost.)
+        */
+       if (!(gfp_mask & __GFP_WAIT)) {
+               struct page_cgroup *pc;
+
+               lock_page_cgroup(page);
+               pc = page_get_page_cgroup(page);
+               if (pc) {
+                       VM_BUG_ON(pc->page != page);
+                       VM_BUG_ON(!pc->mem_cgroup);
+                       unlock_page_cgroup(page);
+                       return 0;
+               }
+               unlock_page_cgroup(page);
+       }
+
+       if (unlikely(!mm))
                 mm = &init_mm;
+
         return mem_cgroup_charge_common(page, mm, gfp_mask,
-                               MEM_CGROUP_CHARGE_TYPE_CACHE);
+                               MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
  }
  
  /*
- * Uncharging is always a welcome operation, we never complain, simply
- * uncharge.
+ * uncharge if !page_mapped(page)
   */
-void mem_cgroup_uncharge_page(struct page *page)
+static void
+__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
  {
         struct page_cgroup *pc;
         struct mem_cgroup *mem;
@@ -674,98 +693,151 @@ void mem_cgroup_uncharge_page(struct page *page)
          */
         lock_page_cgroup(page);
         pc = page_get_page_cgroup(page);
-       if (!pc)
+       if (unlikely(!pc))
                 goto unlock;
  
         VM_BUG_ON(pc->page != page);
-       VM_BUG_ON(pc->ref_cnt <= 0);
  
-       if (--(pc->ref_cnt) == 0) {
-               mz = page_cgroup_zoneinfo(pc);
-               spin_lock_irqsave(&mz->lru_lock, flags);
-               __mem_cgroup_remove_list(mz, pc);
-               spin_unlock_irqrestore(&mz->lru_lock, flags);
+       if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
+           && ((pc->flags & PAGE_CGROUP_FLAG_CACHE)
+               || page_mapped(page)))
+               goto unlock;
  
-               page_assign_page_cgroup(page, NULL);
-               unlock_page_cgroup(page);
+       mz = page_cgroup_zoneinfo(pc);
+       spin_lock_irqsave(&mz->lru_lock, flags);
+       __mem_cgroup_remove_list(mz, pc);
+       spin_unlock_irqrestore(&mz->lru_lock, flags);
  
-               mem = pc->mem_cgroup;
-               res_counter_uncharge(&mem->res, PAGE_SIZE);
-               css_put(&mem->css);
+       page_assign_page_cgroup(page, NULL);
+       unlock_page_cgroup(page);
  
-               kmem_cache_free(page_cgroup_cache, pc);
-               return;
-       }
+       mem = pc->mem_cgroup;
+       res_counter_uncharge(&mem->res, PAGE_SIZE);
+       css_put(&mem->css);
  
+       kmem_cache_free(page_cgroup_cache, pc);
+       return;
  unlock:
         unlock_page_cgroup(page);
  }
  
+void mem_cgroup_uncharge_page(struct page *page)
+{
+       __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
+}
+
+void mem_cgroup_uncharge_cache_page(struct page *page)
+{
+       VM_BUG_ON(page_mapped(page));
+       __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
+}
+
  /*
- * Returns non-zero if a page (under migration) has valid page_cgroup member.
- * Refcnt of page_cgroup is incremented.
+ * Before starting migration, account against new page.
   */
-int mem_cgroup_prepare_migration(struct page *page)
+int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
  {
         struct page_cgroup *pc;
+       struct mem_cgroup *mem = NULL;
+       enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
+       int ret = 0;
  
         if (mem_cgroup_subsys.disabled)
                 return 0;
  
         lock_page_cgroup(page);
         pc = page_get_page_cgroup(page);
-       if (pc)
-               pc->ref_cnt++;
+       if (pc) {
+               mem = pc->mem_cgroup;
+               css_get(&mem->css);
+               if (pc->flags & PAGE_CGROUP_FLAG_CACHE)
+                       ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
+       }
         unlock_page_cgroup(page);
-       return pc != NULL;
+       if (mem) {
+               ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL,
+                       ctype, mem);
+               css_put(&mem->css);
+       }
+       return ret;
  }
  
-void mem_cgroup_end_migration(struct page *page)
+/* remove redundant charge if migration failed*/
+void mem_cgroup_end_migration(struct page *newpage)
  {
-       mem_cgroup_uncharge_page(page);
+       /*
+        * At success, page->mapping is not NULL.
+        * special rollback care is necessary when
+        * 1. at migration failure. (newpage->mapping is cleared in this case)
+        * 2. the newpage was moved but not remapped again because the task
+        *    exits and the newpage is obsolete. In this case, the new page
+        *    may be a swapcache. So, we just call mem_cgroup_uncharge_page()
+        *    always for avoiding mess. The  page_cgroup will be removed if
+        *    unnecessary. File cache pages is still on radix-tree. Don't
+        *    care it.
+        */
+       if (!newpage->mapping)
+               __mem_cgroup_uncharge_common(newpage,
+                                        MEM_CGROUP_CHARGE_TYPE_FORCE);
+       else if (PageAnon(newpage))
+               mem_cgroup_uncharge_page(newpage);
  }
  
  /*
- * We know both *page* and *newpage* are now not-on-LRU and PG_locked.
- * And no race with uncharge() routines because page_cgroup for *page*
- * has extra one reference by mem_cgroup_prepare_migration.
+ * A call to try to shrink memory usage under specified resource controller.
+ * This is typically used for page reclaiming for shmem for reducing side
+ * effect of page allocation from shmem, which is used by some mem_cgroup.
   */
-void mem_cgroup_page_migration(struct page *page, struct page *newpage)
+int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
  {
-       struct page_cgroup *pc;
-       struct mem_cgroup_per_zone *mz;
-       unsigned long flags;
+       struct mem_cgroup *mem;
+       int progress = 0;
+       int retry = MEM_CGROUP_RECLAIM_RETRIES;
  
-       lock_page_cgroup(page);
-       pc = page_get_page_cgroup(page);
-       if (!pc) {
-               unlock_page_cgroup(page);
-               return;
-       }
+       if (mem_cgroup_subsys.disabled)
+               return 0;
  
-       mz = page_cgroup_zoneinfo(pc);
-       spin_lock_irqsave(&mz->lru_lock, flags);
-       __mem_cgroup_remove_list(mz, pc);
-       spin_unlock_irqrestore(&mz->lru_lock, flags);
+       rcu_read_lock();
+       mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
+       css_get(&mem->css);
+       rcu_read_unlock();
  
-       page_assign_page_cgroup(page, NULL);
-       unlock_page_cgroup(page);
+       do {
+               progress = try_to_free_mem_cgroup_pages(mem, gfp_mask);
+       } while (!progress && --retry);
  
-       pc->page = newpage;
-       lock_page_cgroup(newpage);
-       page_assign_page_cgroup(newpage, pc);
+       css_put(&mem->css);
+       if (!retry)
+               return -ENOMEM;
+       return 0;
+}
  
-       mz = page_cgroup_zoneinfo(pc);
-       spin_lock_irqsave(&mz->lru_lock, flags);
-       __mem_cgroup_add_list(mz, pc);
-       spin_unlock_irqrestore(&mz->lru_lock, flags);
+int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val)
+{
+
+       int retry_count = MEM_CGROUP_RECLAIM_RETRIES;
+       int progress;
+       int ret = 0;
  
-       unlock_page_cgroup(newpage);
+       while (res_counter_set_limit(&memcg->res, val)) {
+               if (signal_pending(current)) {
+                       ret = -EINTR;
+                       break;
+               }
+               if (!retry_count) {
+                       ret = -EBUSY;
+                       break;
+               }
+               progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL);
+               if (!progress)
+                       retry_count--;
+       }
+       return ret;
  }
  
+
  /*
   * This routine traverse page_cgroup in given list and drop them all.
- * This routine ignores page_cgroup->ref_cnt.
   * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
   */
  #define FORCE_UNCHARGE_BATCH   (128)
@@ -790,12 +862,20 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
                 page = pc->page;
                 get_page(page);
                 spin_unlock_irqrestore(&mz->lru_lock, flags);
-               mem_cgroup_uncharge_page(page);
-               put_page(page);
-               if (--count <= 0) {
-                       count = FORCE_UNCHARGE_BATCH;
+               /*
+                * Check if this page is on LRU. !LRU page can be found
+                * if it's under page migration.
+                */
+               if (PageLRU(page)) {
+                       __mem_cgroup_uncharge_common(page,
+                                       MEM_CGROUP_CHARGE_TYPE_FORCE);
+                       put_page(page);
+                       if (--count <= 0) {
+                               count = FORCE_UNCHARGE_BATCH;
+                               cond_resched();
+                       }
+               } else
                         cond_resched();
-               }
                 spin_lock_irqsave(&mz->lru_lock, flags);
         }
         spin_unlock_irqrestore(&mz->lru_lock, flags);
@@ -810,9 +890,6 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem)
         int ret = -EBUSY;
         int node, zid;
  
-       if (mem_cgroup_subsys.disabled)
-               return 0;
-
         css_get(&mem->css);
         /*
          * page reclaim code (kswapd etc..) will move pages between
@@ -838,32 +915,34 @@ out:
         return ret;
  }
  
-static int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp)
-{
-       *tmp = memparse(buf, &buf);
-       if (*buf != '\0')
-               return -EINVAL;
-
-       /*
-        * Round up the value to the closest page size
-        */
-       *tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT;
-       return 0;
-}
-
  static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
  {
         return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res,
                                     cft->private);
  }
-
-static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
-                               struct file *file, const char __user *userbuf,
-                               size_t nbytes, loff_t *ppos)
+/*
+ * The user of this function is...
+ * RES_LIMIT.
+ */
+static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
+                           const char *buffer)
  {
-       return res_counter_write(&mem_cgroup_from_cont(cont)->res,
-                               cft->private, userbuf, nbytes, ppos,
-                               mem_cgroup_write_strategy);
+       struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+       unsigned long long val;
+       int ret;
+
+       switch (cft->private) {
+       case RES_LIMIT:
+               /* This function does all necessary parse...reuse it */
+               ret = res_counter_memparse_write_strategy(buffer, &val);
+               if (!ret)
+                       ret = mem_cgroup_resize_limit(memcg, val);
+               break;
+       default:
+               ret = -EINVAL; /* should be BUG() ? */
+               break;
+       }
+       return ret;
  }
  
  static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
@@ -940,7 +1019,7 @@ static struct cftype mem_cgroup_files[] = {
         {
                 .name = "limit_in_bytes",
                 .private = RES_LIMIT,
-               .write = mem_cgroup_write,
+               .write_string = mem_cgroup_write,
                 .read_u64 = mem_cgroup_read,
         },
         {
@@ -1070,8 +1149,6 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss,
  static int mem_cgroup_populate(struct cgroup_subsys *ss,
                                 struct cgroup *cont)
  {
-       if (mem_cgroup_subsys.disabled)
-               return 0;
         return cgroup_add_files(cont, ss, mem_cgroup_files,
                                         ARRAY_SIZE(mem_cgroup_files));
  }
@@ -1084,9 +1161,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
         struct mm_struct *mm;
         struct mem_cgroup *mem, *old_mem;
  
-       if (mem_cgroup_subsys.disabled)
-               return;
-
         mm = get_task_mm(p);
         if (mm == NULL)
                 return;
diff --git a/mm/migrate.c b/mm/migrate.c

index 376cceba82f98e7110520acc885f6bce0687482b..d8c65a65c61d5701b8ecbc9185ac864ef564707c 100644 (file)
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -358,6 +358,9 @@ static int migrate_page_move_mapping(struct address_space *mapping,
         __inc_zone_page_state(newpage, NR_FILE_PAGES);
  
         write_unlock_irq(&mapping->tree_lock);
+       if (!PageSwapCache(newpage)) {
+               mem_cgroup_uncharge_cache_page(page);
+       }
  
         return 0;
  }
@@ -611,7 +614,6 @@ static int move_to_new_page(struct page *newpage, struct page *page)
                 rc = fallback_migrate_page(mapping, newpage, page);
  
         if (!rc) {
-               mem_cgroup_page_migration(page, newpage);
                 remove_migration_ptes(page, newpage);
         } else
                 newpage->mapping = NULL;
@@ -641,6 +643,14 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
                 /* page was freed from under us. So we are done. */
                 goto move_newpage;
  
+       charge = mem_cgroup_prepare_migration(page, newpage);
+       if (charge == -ENOMEM) {
+               rc = -ENOMEM;
+               goto move_newpage;
+       }
+       /* prepare cgroup just returns 0 or -ENOMEM */
+       BUG_ON(charge);
+
         rc = -EAGAIN;
         if (TestSetPageLocked(page)) {
                 if (!force)
@@ -692,19 +702,14 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
                 goto rcu_unlock;
         }
  
-       charge = mem_cgroup_prepare_migration(page);
         /* Establish migration ptes or remove ptes */
         try_to_unmap(page, 1);
  
         if (!page_mapped(page))
                 rc = move_to_new_page(newpage, page);
  
-       if (rc) {
+       if (rc)
                 remove_migration_ptes(page, page);
-               if (charge)
-                       mem_cgroup_end_migration(page);
-       } else if (charge)
-               mem_cgroup_end_migration(newpage);
  rcu_unlock:
         if (rcu_locked)
                 rcu_read_unlock();
@@ -725,6 +730,8 @@ unlock:
         }
  
  move_newpage:
+       if (!charge)
+               mem_cgroup_end_migration(newpage);
         /*
          * Move the new page to the LRU. If migration was not successful
          * then this will free the page.
diff --git a/mm/pdflush.c b/mm/pdflush.c

index 9d834aa4b9795264d87eef283c2492a99d67597b..0cbe0c60c6bff39d60f0f3994258de7931d6dd4d 100644 (file)
--- a/mm/pdflush.c
+++ b/mm/pdflush.c
@@ -130,7 +130,7 @@ static int __pdflush(struct pdflush_work *my_work)
                  * Thread creation: For how long have there been zero
                  * available threads?
                  */
-               if (jiffies - last_empty_jifs > 1 * HZ) {
+               if (time_after(jiffies, last_empty_jifs + 1 * HZ)) {
                         /* unlocked list_empty() test is OK here */
                         if (list_empty(&pdflush_list)) {
                                 /* unlocked test is OK here */
@@ -151,7 +151,7 @@ static int __pdflush(struct pdflush_work *my_work)
                 if (nr_pdflush_threads <= MIN_PDFLUSH_THREADS)
                         continue;
                 pdf = list_entry(pdflush_list.prev, struct pdflush_work, list);
-               if (jiffies - pdf->when_i_went_to_sleep > 1 * HZ) {
+               if (time_after(jiffies, pdf->when_i_went_to_sleep + 1 * HZ)) {
                         /* Limit exit rate */
                         pdf->when_i_went_to_sleep = jiffies;
                         break;                                  /* exeunt */
diff --git a/mm/rmap.c b/mm/rmap.c

index bf0a5b7cfb8e7a97369271ed235a714bd11dfd12..abbd29f7c43f4311621895d9ee9fbdaca51adb5f 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -576,14 +576,8 @@ void page_add_anon_rmap(struct page *page,
         VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
         if (atomic_inc_and_test(&page->_mapcount))
                 __page_set_anon_rmap(page, vma, address);
-       else {
+       else
                 __page_check_anon_rmap(page, vma, address);
-               /*
-                * We unconditionally charged during prepare, we uncharge here
-                * This takes care of balancing the reference counts
-                */
-               mem_cgroup_uncharge_page(page);
-       }
  }
  
  /**
@@ -614,12 +608,6 @@ void page_add_file_rmap(struct page *page)
  {
         if (atomic_inc_and_test(&page->_mapcount))
                 __inc_zone_page_state(page, NR_FILE_MAPPED);
-       else
-               /*
-                * We unconditionally charged during prepare, we uncharge here
-                * This takes care of balancing the reference counts
-                */
-               mem_cgroup_uncharge_page(page);
  }
  
  #ifdef CONFIG_DEBUG_VM
diff --git a/mm/shmem.c b/mm/shmem.c

index 9ffbea9b79e15370b540ffcce2207e5714091e0e..f92fea94d037b4b469cc8e55096f0e7d5f57cbf0 100644 (file)
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -922,20 +922,26 @@ found:
         error = 1;
         if (!inode)
                 goto out;
-       /* Precharge page while we can wait, compensate afterwards */
+       /* Precharge page using GFP_KERNEL while we can wait */
         error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
         if (error)
                 goto out;
         error = radix_tree_preload(GFP_KERNEL);
-       if (error)
-               goto uncharge;
+       if (error) {
+               mem_cgroup_uncharge_cache_page(page);
+               goto out;
+       }
         error = 1;
  
         spin_lock(&info->lock);
         ptr = shmem_swp_entry(info, idx, NULL);
-       if (ptr && ptr->val == entry.val)
+       if (ptr && ptr->val == entry.val) {
                 error = add_to_page_cache(page, inode->i_mapping,
                                                 idx, GFP_NOWAIT);
+               /* does mem_cgroup_uncharge_cache_page on error */
+       } else  /* we must compensate for our precharge above */
+               mem_cgroup_uncharge_cache_page(page);
+
         if (error == -EEXIST) {
                 struct page *filepage = find_get_page(inode->i_mapping, idx);
                 error = 1;
@@ -961,8 +967,6 @@ found:
                 shmem_swp_unmap(ptr);
         spin_unlock(&info->lock);
         radix_tree_preload_end();
-uncharge:
-       mem_cgroup_uncharge_page(page);
  out:
         unlock_page(page);
         page_cache_release(page);
@@ -1311,17 +1315,14 @@ repeat:
                         shmem_swp_unmap(entry);
                         spin_unlock(&info->lock);
                         unlock_page(swappage);
+                       page_cache_release(swappage);
                         if (error == -ENOMEM) {
                                 /* allow reclaim from this memory cgroup */
-                               error = mem_cgroup_cache_charge(swappage,
-                                       current->mm, gfp & ~__GFP_HIGHMEM);
-                               if (error) {
-                                       page_cache_release(swappage);
+                               error = mem_cgroup_shrink_usage(current->mm,
+                                                               gfp);
+                               if (error)
                                         goto failed;
-                               }
-                               mem_cgroup_uncharge_page(swappage);
                         }
-                       page_cache_release(swappage);
                         goto repeat;
                 }
         } else if (sgp == SGP_READ && !filepage) {
@@ -1358,6 +1359,8 @@ repeat:
                 }
  
                 if (!filepage) {
+                       int ret;
+
                         spin_unlock(&info->lock);
                         filepage = shmem_alloc_page(gfp, info, idx);
                         if (!filepage) {
@@ -1386,10 +1389,18 @@ repeat:
                                 swap = *entry;
                                 shmem_swp_unmap(entry);
                         }
-                       if (error || swap.val || 0 != add_to_page_cache_lru(
-                                       filepage, mapping, idx, GFP_NOWAIT)) {
+                       ret = error || swap.val;
+                       if (ret)
+                               mem_cgroup_uncharge_cache_page(filepage);
+                       else
+                               ret = add_to_page_cache_lru(filepage, mapping,
+                                               idx, GFP_NOWAIT);
+                       /*
+                        * At add_to_page_cache_lru() failure, uncharge will
+                        * be done automatically.
+                        */
+                       if (ret) {
                                 spin_unlock(&info->lock);
-                               mem_cgroup_uncharge_page(filepage);
                                 page_cache_release(filepage);
                                 shmem_unacct_blocks(info->flags, 1);
                                 shmem_free_blocks(inode, 1);
@@ -1398,7 +1409,6 @@ repeat:
                                         goto failed;
                                 goto repeat;
                         }
-                       mem_cgroup_uncharge_page(filepage);
                         info->flags |= SHMEM_PAGEIN;
                 }
  
diff --git a/mm/vmscan.c b/mm/vmscan.c

index 967d30ccd92b8f3a453951104a761986b6d88a6a..26672c6cd3cef37bd4b8d8df8512b814957b71f3 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -38,6 +38,7 @@
  #include <linux/kthread.h>
  #include <linux/freezer.h>
  #include <linux/memcontrol.h>
+#include <linux/delayacct.h>
  
  #include <asm/tlbflush.h>
  #include <asm/div64.h>
@@ -1316,6 +1317,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
         struct zone *zone;
         enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
  
+       delayacct_freepages_start();
+
         if (scan_global_lru(sc))
                 count_vm_event(ALLOCSTALL);
         /*
@@ -1396,6 +1399,8 @@ out:
         } else
                 mem_cgroup_record_reclaim_priority(sc->mem_cgroup, priority);
  
+       delayacct_freepages_end();
+
         return ret;
  }
  
diff --git a/net/802/psnap.c b/net/802/psnap.c

index ea46439314468e78f5a20539de38e123dc4cd5a9..b3cfe5a14fcaee0cfb56401286574d1a2e94a74b 100644 (file)
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -31,11 +31,9 @@ static struct llc_sap *snap_sap;
   */
  static struct datalink_proto *find_snap_client(unsigned char *desc)
  {
-       struct list_head *entry;
         struct datalink_proto *proto = NULL, *p;
  
-       list_for_each_rcu(entry, &snap_list) {
-               p = list_entry(entry, struct datalink_proto, node);
+       list_for_each_entry_rcu(p, &snap_list, node) {
                 if (!memcmp(p->type, desc, 5)) {
                         proto = p;
                         break;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c

index a570e2af22cb78d0994d41d19e83a29a5e7c5527..f686467ff12ba085f12b140f340a639699a7e049 100644 (file)
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -67,7 +67,7 @@ static struct ctl_table net_core_table[] = {
         {
                 .ctl_name       = NET_CORE_MSG_COST,
                 .procname       = "message_cost",
-               .data           = &net_msg_cost,
+               .data           = &net_ratelimit_state.interval,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
                 .proc_handler   = &proc_dointvec_jiffies,
@@ -76,7 +76,7 @@ static struct ctl_table net_core_table[] = {
         {
                 .ctl_name       = NET_CORE_MSG_BURST,
                 .procname       = "message_burst",
-               .data           = &net_msg_burst,
+               .data           = &net_ratelimit_state.burst,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
                 .proc_handler   = &proc_dointvec,
diff --git a/net/core/utils.c b/net/core/utils.c

index 8031eb59054e057f4556f0cf4f4d251020f57b0d..72e0ebe964a08d03be218b8c17aa061818579f2f 100644 (file)
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -31,17 +31,16 @@
  #include <asm/system.h>
  #include <asm/uaccess.h>
  
-int net_msg_cost __read_mostly = 5*HZ;
-int net_msg_burst __read_mostly = 10;
  int net_msg_warn __read_mostly = 1;
  EXPORT_SYMBOL(net_msg_warn);
  
+DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10);
  /*
   * All net warning printk()s should be guarded by this function.
   */
  int net_ratelimit(void)
  {
-       return __printk_ratelimit(net_msg_cost, net_msg_burst);
+       return __ratelimit(&net_ratelimit_state);
  }
  EXPORT_SYMBOL(net_ratelimit);
  
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c

index dd919d84285ffc8874fe659e96c24c76e56501f5..f440a9f54924e8d802b55d4bd59170c10b28d666 100644 (file)
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -264,7 +264,6 @@ static inline int inet_netns_ok(struct net *net, int protocol)
  static int inet_create(struct net *net, struct socket *sock, int protocol)
  {
         struct sock *sk;
-       struct list_head *p;
         struct inet_protosw *answer;
         struct inet_sock *inet;
         struct proto *answer_prot;
@@ -281,13 +280,12 @@ static int inet_create(struct net *net, struct socket *sock, int protocol)
         sock->state = SS_UNCONNECTED;
  
         /* Look for the requested type/protocol pair. */
-       answer = NULL;
  lookup_protocol:
         err = -ESOCKTNOSUPPORT;
         rcu_read_lock();
-       list_for_each_rcu(p, &inetsw[sock->type]) {
-               answer = list_entry(p, struct inet_protosw, list);
+       list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {
  
+               err = 0;
                 /* Check the non-wild match. */
                 if (protocol == answer->protocol) {
                         if (protocol != IPPROTO_IP)
@@ -302,10 +300,9 @@ lookup_protocol:
                                 break;
                 }
                 err = -EPROTONOSUPPORT;
-               answer = NULL;
         }
  
-       if (unlikely(answer == NULL)) {
+       if (unlikely(err)) {
                 if (try_loading_module < 2) {
                         rcu_read_unlock();
                         /*
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c

index 3d828bc4b1cf97cbfcbd1df9e76c5b357b37a9c4..60461ad7fa6ff0deaad6d6d27f463594ae0d7825 100644 (file)
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -83,7 +83,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol)
         struct inet_sock *inet;
         struct ipv6_pinfo *np;
         struct sock *sk;
-       struct list_head *p;
         struct inet_protosw *answer;
         struct proto *answer_prot;
         unsigned char answer_flags;
@@ -97,13 +96,12 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol)
                 build_ehash_secret();
  
         /* Look for the requested type/protocol pair. */
-       answer = NULL;
  lookup_protocol:
         err = -ESOCKTNOSUPPORT;
         rcu_read_lock();
-       list_for_each_rcu(p, &inetsw6[sock->type]) {
-               answer = list_entry(p, struct inet_protosw, list);
+       list_for_each_entry_rcu(answer, &inetsw6[sock->type], list) {
  
+               err = 0;
                 /* Check the non-wild match. */
                 if (protocol == answer->protocol) {
                         if (protocol != IPPROTO_IP)
@@ -118,10 +116,9 @@ lookup_protocol:
                                 break;
                 }
                 err = -EPROTONOSUPPORT;
-               answer = NULL;
         }
  
-       if (!answer) {
+       if (err) {
                 if (try_loading_module < 2) {
                         rcu_read_unlock();
                         /*
diff --git a/net/sysctl_net.c b/net/sysctl_net.c

index 007c1a6708ee4652dd97a9f21c45f407b75327f0..63ada437fc2f0196e42fb349ee552a54a25ecc7f 100644 (file)
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -35,8 +35,22 @@ net_ctl_header_lookup(struct ctl_table_root *root, struct nsproxy *namespaces)
         return &namespaces->net_ns->sysctl_table_headers;
  }
  
+/* Return standard mode bits for table entry. */
+static int net_ctl_permissions(struct ctl_table_root *root,
+                              struct nsproxy *nsproxy,
+                              struct ctl_table *table)
+{
+       /* Allow network administrator to have same access as root. */
+       if (capable(CAP_NET_ADMIN)) {
+               int mode = (table->mode >> 6) & 7;
+               return (mode << 6) | (mode << 3) | mode;
+       }
+       return table->mode;
+}
+
  static struct ctl_table_root net_sysctl_root = {
         .lookup = net_ctl_header_lookup,
+       .permissions = net_ctl_permissions,
  };
  
  static LIST_HEAD(net_sysctl_ro_tables);
diff --git a/scripts/Makefile.fwinst b/scripts/Makefile.fwinst

index c972c0f54ce0aba88771ef2a727b425ad50abd83..f63a663de1589106a05897270b56709e5400034a 100644 (file)
--- a/scripts/Makefile.fwinst
+++ b/scripts/Makefile.fwinst
@@ -17,14 +17,15 @@ include $(srctree)/$(obj)/Makefile
  
  include scripts/Makefile.host
  
-mod-fw := $(addprefix $(INSTALL_FW_PATH)/,$(fw-shipped-m))
-
+mod-fw := $(fw-shipped-m)
  # If CONFIG_FIRMWARE_IN_KERNEL isn't set, then install the 
  # firmware for in-kernel drivers too.
  ifndef CONFIG_FIRMWARE_IN_KERNEL
-mod-fw += $(addprefix $(INSTALL_FW_PATH)/,$(fw-shipped-y))
+mod-fw += $(fw-shipped-y)
  endif
  
+installed-mod-fw := $(addprefix $(INSTALL_FW_PATH)/,$(mod-fw))
+
  installed-fw := $(addprefix $(INSTALL_FW_PATH)/,$(fw-shipped-all))
  installed-fw-dirs := $(sort $(dir $(installed-fw))) $(INSTALL_FW_PATH)/.
  
@@ -49,7 +50,8 @@ PHONY +=  __fw_install __fw_modinst FORCE
  .PHONY: $(PHONY)
  
  __fw_install: $(installed-fw)
-__fw_modinst: $(mod-fw)
+__fw_modinst: $(installed-mod-fw)
+__fw_modbuild: $(addprefix $(obj)/,$(mod-fw))
  
  FORCE:
  
diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl

index 340ad6920511aa08a52f5360c54836c682e2deed..3eca62566d6b4468412c9ff2afdbb3b5675dc7b6 100755 (executable)
--- a/scripts/checkstack.pl
+++ b/scripts/checkstack.pl
@@ -26,12 +26,17 @@
  # $& (whole re) matches the complete objdump line with the stack growth
  # $1 (first bracket) matches the size of the stack growth
  #
+# $dre is similar, but for dynamic stack redutions:
+# $& (whole re) matches the complete objdump line with the stack growth
+# $1 (first bracket) matches the dynamic amount of the stack growth
+#
  # use anything else and feel the pain ;)
-my (@stack, $re, $x, $xs);
+my (@stack, $re, $dre, $x, $xs);
  {
         my $arch = shift;
         if ($arch eq "") {
                 $arch = `uname -m`;
+               chomp($arch);
         }
  
         $x      = "[0-9a-f]";   # hex character
@@ -46,9 +51,11 @@ my (@stack, $re, $x, $xs);
         } elsif ($arch =~ /^i[3456]86$/) {
                 #c0105234:       81 ec ac 05 00 00       sub    $0x5ac,%esp
                 $re = qr/^.*[as][du][db]    \$(0x$x{1,8}),\%esp$/o;
+               $dre = qr/^.*[as][du][db]    (%.*),\%esp$/o;
         } elsif ($arch eq 'x86_64') {
                 #    2f60:      48 81 ec e8 05 00 00    sub    $0x5e8,%rsp
                 $re = qr/^.*[as][du][db]    \$(0x$x{1,8}),\%rsp$/o;
+               $dre = qr/^.*[as][du][db]    (\%.*),\%rsp$/o;
         } elsif ($arch eq 'ia64') {
                 #e0000000044011fc:       01 0f fc 8c     adds r12=-384,r12
                 $re = qr/.*adds.*r12=-(([0-9]{2}|[3-9])[0-9]{2}),r12/o;
@@ -85,7 +92,7 @@ my (@stack, $re, $x, $xs);
                 #   0:   00 e8 38 01     LINK 0x4e0;
                 $re = qr/.*[[:space:]]LINK[[:space:]]*(0x$x{1,8})/o;
         } else {
-               print("wrong or unknown architecture\n");
+               print("wrong or unknown architecture \"$arch\"\n");
                 exit
         }
  }
@@ -141,6 +148,22 @@ while (my $line = <STDIN>) {
                 next if ($size < 100);
                 push @stack, "$intro$size\n";
         }
+       elsif (defined $dre && $line =~ m/$dre/) {
+               my $size = "Dynamic ($1)";
+
+               next if $line !~ m/^($xs*)/;
+               my $addr = $1;
+               $addr =~ s/ /0/g;
+               $addr = "0x$addr";
+
+               my $intro = "$addr $func [$file]:";
+               my $padlen = 56 - length($intro);
+               while ($padlen > 0) {
+                       $intro .= '     ';
+                       $padlen -= 8;
+               }
+               push @stack, "$intro$size\n";
+       }
  }
  
  print sort bysize @stack;
diff --git a/security/device_cgroup.c b/security/device_cgroup.c

index ddd92cec78ed81c44bcfc4d9c530662a179381f2..7bd296cca041aaa7e62a3f923182ff7ac2adf5ef 100644 (file)
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -41,6 +41,7 @@ struct dev_whitelist_item {
         short type;
         short access;
         struct list_head list;
+       struct rcu_head rcu;
  };
  
  struct dev_cgroup {
@@ -59,6 +60,11 @@ static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup)
         return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id));
  }
  
+static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
+{
+       return css_to_devcgroup(task_subsys_state(task, devices_subsys_id));
+}
+
  struct cgroup_subsys devices_subsys;
  
  static int devcgroup_can_attach(struct cgroup_subsys *ss,
@@ -128,11 +134,19 @@ static int dev_whitelist_add(struct dev_cgroup *dev_cgroup,
         }
  
         if (whcopy != NULL)
-               list_add_tail(&whcopy->list, &dev_cgroup->whitelist);
+               list_add_tail_rcu(&whcopy->list, &dev_cgroup->whitelist);
         spin_unlock(&dev_cgroup->lock);
         return 0;
  }
  
+static void whitelist_item_free(struct rcu_head *rcu)
+{
+       struct dev_whitelist_item *item;
+
+       item = container_of(rcu, struct dev_whitelist_item, rcu);
+       kfree(item);
+}
+
  /*
   * called under cgroup_lock()
   * since the list is visible to other tasks, we need the spinlock also
@@ -156,8 +170,8 @@ static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup,
  remove:
                 walk->access &= ~wh->access;
                 if (!walk->access) {
-                       list_del(&walk->list);
-                       kfree(walk);
+                       list_del_rcu(&walk->list);
+                       call_rcu(&walk->rcu, whitelist_item_free);
                 }
         }
         spin_unlock(&dev_cgroup->lock);
@@ -188,7 +202,7 @@ static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss,
                 }
                 wh->minor = wh->major = ~0;
                 wh->type = DEV_ALL;
-               wh->access = ACC_MKNOD | ACC_READ | ACC_WRITE;
+               wh->access = ACC_MASK;
                 list_add(&wh->list, &dev_cgroup->whitelist);
         } else {
                 parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup);
@@ -250,11 +264,10 @@ static char type_to_char(short type)
  
  static void set_majmin(char *str, unsigned m)
  {
-       memset(str, 0, MAJMINLEN);
         if (m == ~0)
-               sprintf(str, "*");
+               strcpy(str, "*");
         else
-               snprintf(str, MAJMINLEN, "%u", m);
+               sprintf(str, "%u", m);
  }
  
  static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft,
@@ -264,15 +277,15 @@ static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft,
         struct dev_whitelist_item *wh;
         char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
  
-       spin_lock(&devcgroup->lock);
-       list_for_each_entry(wh, &devcgroup->whitelist, list) {
+       rcu_read_lock();
+       list_for_each_entry_rcu(wh, &devcgroup->whitelist, list) {
                 set_access(acc, wh->access);
                 set_majmin(maj, wh->major);
                 set_majmin(min, wh->minor);
                 seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type),
                            maj, min, acc);
         }
-       spin_unlock(&devcgroup->lock);
+       rcu_read_unlock();
  
         return 0;
  }
@@ -312,10 +325,10 @@ static int may_access_whitelist(struct dev_cgroup *c,
   * when adding a new allow rule to a device whitelist, the rule
   * must be allowed in the parent device
   */
-static int parent_has_perm(struct cgroup *childcg,
+static int parent_has_perm(struct dev_cgroup *childcg,
                                   struct dev_whitelist_item *wh)
  {
-       struct cgroup *pcg = childcg->parent;
+       struct cgroup *pcg = childcg->css.cgroup->parent;
         struct dev_cgroup *parent;
         int ret;
  
@@ -341,39 +354,19 @@ static int parent_has_perm(struct cgroup *childcg,
   * new access is only allowed if you're in the top-level cgroup, or your
   * parent cgroup has the access you're asking for.
   */
-static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft,
-                               struct file *file, const char __user *userbuf,
-                               size_t nbytes, loff_t *ppos)
+static int devcgroup_update_access(struct dev_cgroup *devcgroup,
+                                  int filetype, const char *buffer)
  {
-       struct cgroup *cur_cgroup;
-       struct dev_cgroup *devcgroup, *cur_devcgroup;
-       int filetype = cft->private;
-       char *buffer, *b;
+       struct dev_cgroup *cur_devcgroup;
+       const char *b;
+       char *endp;
         int retval = 0, count;
         struct dev_whitelist_item wh;
  
         if (!capable(CAP_SYS_ADMIN))
                 return -EPERM;
  
-       devcgroup = cgroup_to_devcgroup(cgroup);
-       cur_cgroup = task_cgroup(current, devices_subsys.subsys_id);
-       cur_devcgroup = cgroup_to_devcgroup(cur_cgroup);
-
-       buffer = kmalloc(nbytes+1, GFP_KERNEL);
-       if (!buffer)
-               return -ENOMEM;
-
-       if (copy_from_user(buffer, userbuf, nbytes)) {
-               retval = -EFAULT;
-               goto out1;
-       }
-       buffer[nbytes] = 0;     /* nul-terminate */
-
-       cgroup_lock();
-       if (cgroup_is_removed(cgroup)) {
-               retval = -ENODEV;
-               goto out2;
-       }
+       cur_devcgroup = task_devcgroup(current);
  
         memset(&wh, 0, sizeof(wh));
         b = buffer;
@@ -392,32 +385,23 @@ static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft,
                 wh.type = DEV_CHAR;
                 break;
         default:
-               retval = -EINVAL;
-               goto out2;
+               return -EINVAL;
         }
         b++;
-       if (!isspace(*b)) {
-               retval = -EINVAL;
-               goto out2;
-       }
+       if (!isspace(*b))
+               return -EINVAL;
         b++;
         if (*b == '*') {
                 wh.major = ~0;
                 b++;
         } else if (isdigit(*b)) {
-               wh.major = 0;
-               while (isdigit(*b)) {
-                       wh.major = wh.major*10+(*b-'0');
-                       b++;
-               }
+               wh.major = simple_strtoul(b, &endp, 10);
+               b = endp;
         } else {
-               retval = -EINVAL;
-               goto out2;
-       }
-       if (*b != ':') {
-               retval = -EINVAL;
-               goto out2;
+               return -EINVAL;
         }
+       if (*b != ':')
+               return -EINVAL;
         b++;
  
         /* read minor */
@@ -425,19 +409,13 @@ static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft,
                 wh.minor = ~0;
                 b++;
         } else if (isdigit(*b)) {
-               wh.minor = 0;
-               while (isdigit(*b)) {
-                       wh.minor = wh.minor*10+(*b-'0');
-                       b++;
-               }
+               wh.minor = simple_strtoul(b, &endp, 10);
+               b = endp;
         } else {
-               retval = -EINVAL;
-               goto out2;
-       }
-       if (!isspace(*b)) {
-               retval = -EINVAL;
-               goto out2;
+               return -EINVAL;
         }
+       if (!isspace(*b))
+               return -EINVAL;
         for (b++, count = 0; count < 3; count++, b++) {
                 switch (*b) {
                 case 'r':
@@ -454,8 +432,7 @@ static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft,
                         count = 3;
                         break;
                 default:
-                       retval = -EINVAL;
-                       goto out2;
+                       return -EINVAL;
                 }
         }
  
@@ -463,38 +440,39 @@ handle:
         retval = 0;
         switch (filetype) {
         case DEVCG_ALLOW:
-               if (!parent_has_perm(cgroup, &wh))
-                       retval = -EPERM;
-               else
-                       retval = dev_whitelist_add(devcgroup, &wh);
-               break;
+               if (!parent_has_perm(devcgroup, &wh))
+                       return -EPERM;
+               return dev_whitelist_add(devcgroup, &wh);
         case DEVCG_DENY:
                 dev_whitelist_rm(devcgroup, &wh);
                 break;
         default:
-               retval = -EINVAL;
-               goto out2;
+               return -EINVAL;
         }
+       return 0;
+}
  
-       if (retval == 0)
-               retval = nbytes;
-
-out2:
+static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft,
+                                 const char *buffer)
+{
+       int retval;
+       if (!cgroup_lock_live_group(cgrp))
+               return -ENODEV;
+       retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp),
+                                        cft->private, buffer);
         cgroup_unlock();
-out1:
-       kfree(buffer);
         return retval;
  }
  
  static struct cftype dev_cgroup_files[] = {
         {
                 .name = "allow",
-               .write  = devcgroup_access_write,
+               .write_string  = devcgroup_access_write,
                 .private = DEVCG_ALLOW,
         },
         {
                 .name = "deny",
-               .write = devcgroup_access_write,
+               .write_string = devcgroup_access_write,
                 .private = DEVCG_DENY,
         },
         {
@@ -535,8 +513,8 @@ int devcgroup_inode_permission(struct inode *inode, int mask)
         if (!dev_cgroup)
                 return 0;
  
-       spin_lock(&dev_cgroup->lock);
-       list_for_each_entry(wh, &dev_cgroup->whitelist, list) {
+       rcu_read_lock();
+       list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) {
                 if (wh->type & DEV_ALL)
                         goto acc_check;
                 if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode))
@@ -552,10 +530,10 @@ acc_check:
                         continue;
                 if ((mask & MAY_READ) && !(wh->access & ACC_READ))
                         continue;
-               spin_unlock(&dev_cgroup->lock);
+               rcu_read_unlock();
                 return 0;
         }
-       spin_unlock(&dev_cgroup->lock);
+       rcu_read_unlock();
  
         return -EPERM;
  }
@@ -570,7 +548,7 @@ int devcgroup_inode_mknod(int mode, dev_t dev)
         if (!dev_cgroup)
                 return 0;
  
-       spin_lock(&dev_cgroup->lock);
+       rcu_read_lock();
         list_for_each_entry(wh, &dev_cgroup->whitelist, list) {
                 if (wh->type & DEV_ALL)
                         goto acc_check;
@@ -585,9 +563,9 @@ int devcgroup_inode_mknod(int mode, dev_t dev)
  acc_check:
                 if (!(wh->access & ACC_MKNOD))
                         continue;
-               spin_unlock(&dev_cgroup->lock);
+               rcu_read_unlock();
                 return 0;
         }
-       spin_unlock(&dev_cgroup->lock);
+       rcu_read_unlock();
         return -EPERM;
  }
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 26 Jul 2008 00:29:03 +0000 (17:29 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 26 Jul 2008 00:29:03 +0000 (17:29 -0700)