- directory with info on telephony (e.g. voice over IP) support.
time_interpolators.txt
- info on time interpolators.
-tipar.txt
- - information about Parallel link cable for Texas Instruments handhelds.
tty.txt
- guide to the locking policies of the tty layer.
uml/
So, you can either get rid of GNU emacs, or change it to use saner
values. To do the latter, you can stick the following in your .emacs file:
-(defun linux-c-mode ()
- "C mode with adjusted defaults for use with the Linux kernel."
- (interactive)
- (c-mode)
- (c-set-style "K&R")
- (setq tab-width 8)
- (setq indent-tabs-mode t)
- (setq c-basic-offset 8))
-
-This will define the M-x linux-c-mode command. When hacking on a
-module, if you put the string -*- linux-c -*- somewhere on the first
-two lines, this mode will be automatically invoked. Also, you may want
-to add
-
-(setq auto-mode-alist (cons '("/usr/src/linux.*/.*\\.[ch]$" . linux-c-mode)
- auto-mode-alist))
-
-to your .emacs file if you want to have linux-c-mode switched on
-automagically when you edit source files under /usr/src/linux.
+(defun c-lineup-arglist-tabs-only (ignored)
+ "Line up argument lists by tabs, not spaces"
+ (let* ((anchor (c-langelem-pos c-syntactic-element))
+ (column (c-langelem-2nd-pos c-syntactic-element))
+ (offset (- (1+ column) anchor))
+ (steps (floor offset c-basic-offset)))
+ (* (max steps 1)
+ c-basic-offset)))
+
+(add-hook 'c-mode-hook
+ (lambda ()
+ (let ((filename (buffer-file-name)))
+ ;; Enable kernel mode for the appropriate files
+ (when (and filename
+ (string-match "~/src/linux-trees" filename))
+ (setq indent-tabs-mode t)
+ (c-set-style "linux")
+ (c-set-offset 'arglist-cont-nonempty
+ '(c-lineup-gcc-asm-reg
+ c-lineup-arglist-tabs-only))))))
+
+This will make emacs go better with the kernel coding style for C
+files below ~/src/linux-trees.
But even if you fail in getting emacs to do sane formatting, not
everything is lost: use "indent".
<revhistory>
<revision>
- <revnumber>1.0 </revnumber>
+ <revnumber>1.0</revnumber>
<date>May 30, 2001</date>
<revremark>Initial revision posted to linux-kernel</revremark>
</revision>
<revision>
- <revnumber>1.1 </revnumber>
+ <revnumber>1.1</revnumber>
<date>June 3, 2001</date>
<revremark>Revised after comments from linux-kernel</revremark>
</revision>
a) waiting for a CPU (while being runnable)
b) completion of synchronous block I/O initiated by the task
c) swapping in pages
+d) memory reclaim
and makes these statistics available to userspace through
the taskstats interface.
include/linux/taskstats.h
for a description of the fields pertaining to delay accounting.
It will generally be in the form of counters returning the cumulative
-delay seen for cpu, sync block I/O, swapin etc.
+delay seen for cpu, sync block I/O, swapin, memory reclaim etc.
Taking the difference of two successive readings of a given
counter (say cpu_delay_total) for a task will give the delay
7876 92005750 100000000 24001500
IO count delay total
0 0
-MEM count delay total
+SWAP count delay total
+ 0 0
+RECLAIM count delay total
0 0
Get delays seen in executing a given simple command
6 4000250 4000000 0
IO count delay total
0 0
-MEM count delay total
+SWAP count delay total
+ 0 0
+RECLAIM count delay total
0 0
" %15llu%15llu%15llu%15llu\n"
"IO %15s%15s\n"
" %15llu%15llu\n"
- "MEM %15s%15s\n"
+ "SWAP %15s%15s\n"
+ " %15llu%15llu\n"
+ "RECLAIM %12s%15s\n"
" %15llu%15llu\n",
"count", "real total", "virtual total", "delay total",
t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
t->cpu_delay_total,
"count", "delay total",
t->blkio_count, t->blkio_delay_total,
- "count", "delay total", t->swapin_count, t->swapin_delay_total);
+ "count", "delay total", t->swapin_count, t->swapin_delay_total,
+ "count", "delay total",
+ t->freepages_count, t->freepages_delay_total);
}
void task_context_switch_counts(struct taskstats *t)
5) Time accounting for SMT machines
+6) Extended delay accounting fields for memory reclaim
+
Future extension should add fields to the end of the taskstats struct, and
should not change the relative position of each field within the struct.
__u64 ac_utimescaled; /* utime scaled on frequency etc */
__u64 ac_stimescaled; /* stime scaled on frequency etc */
__u64 cpu_scaled_run_real_total; /* scaled cpu_run_real_total */
+
+6) Extended delay accounting fields for memory reclaim
+ /* Delay waiting for memory reclaim */
+ __u64 freepages_count;
+ __u64 freepages_delay_total;
}
--- /dev/null
+===============================================================
+== BT8XXGPIO driver ==
+== ==
+== A driver for a selfmade cheap BT8xx based PCI GPIO-card ==
+== ==
+== For advanced documentation, see ==
+== http://www.bu3sch.de/btgpio.php ==
+===============================================================
+
+
+A generic digital 24-port PCI GPIO card can be built out of an ordinary
+Brooktree bt848, bt849, bt878 or bt879 based analog TV tuner card. The
+Brooktree chip is used in old analog Hauppauge WinTV PCI cards. You can easily
+find them used for low prices on the net.
+
+The bt8xx chip does have 24 digital GPIO ports.
+These ports are accessible via 24 pins on the SMD chip package.
+
+
+==============================================
+== How to physically access the GPIO pins ==
+==============================================
+
+The are several ways to access these pins. One might unsolder the whole chip
+and put it on a custom PCI board, or one might only unsolder each individual
+GPIO pin and solder that to some tiny wire. As the chip package really is tiny
+there are some advanced soldering skills needed in any case.
+
+The physical pinouts are drawn in the following ASCII art.
+The GPIO pins are marked with G00-G23
+
+ G G G G G G G G G G G G G G G G G G
+ 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7
+ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+ ---------------------------------------------------------------------------
+ --| ^ ^ |--
+ --| pin 86 pin 67 |--
+ --| |--
+ --| pin 61 > |-- G18
+ --| |-- G19
+ --| |-- G20
+ --| |-- G21
+ --| |-- G22
+ --| pin 56 > |-- G23
+ --| |--
+ --| Brooktree 878/879 |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| O |--
+ --| |--
+ ---------------------------------------------------------------------------
+ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+ ^
+ This is pin 1
+
1. Add support for accounting huge pages (as a separate controller)
2. Make per-cgroup scanner reclaim not-shared pages first
3. Teach controller to account for shared-pages
-4. Start reclamation when the limit is lowered
-5. Start reclamation in the background when the limit is
+4. Start reclamation in the background when the limit is
not yet hit but the usage is getting closer
Summary
set of DIMMs for channels 0 and 1.
-Within each of the 'mc','mcX' and 'csrowX' directories are several
+Within each of the 'mcX' and 'csrowX' directories are several
EDAC control and attribute files.
-
-============================================================================
-DIRECTORY 'mc'
-
-In directory 'mc' are EDAC system overall control and attribute files:
-
-
-Panic on UE control file:
-
- 'edac_mc_panic_on_ue'
-
- An uncorrectable error will cause a machine panic. This is usually
- desirable. It is a bad idea to continue when an uncorrectable error
- occurs - it is indeterminate what was uncorrected and the operating
- system context might be so mangled that continuing will lead to further
- corruption. If the kernel has MCE configured, then EDAC will never
- notice the UE.
-
- LOAD TIME: module/kernel parameter: panic_on_ue=[0|1]
-
- RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_panic_on_ue
-
-
-Log UE control file:
-
- 'edac_mc_log_ue'
-
- Generate kernel messages describing uncorrectable errors. These errors
- are reported through the system message log system. UE statistics
- will be accumulated even when UE logging is disabled.
-
- LOAD TIME: module/kernel parameter: log_ue=[0|1]
-
- RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_log_ue
-
-
-Log CE control file:
-
- 'edac_mc_log_ce'
-
- Generate kernel messages describing correctable errors. These
- errors are reported through the system message log system.
- CE statistics will be accumulated even when CE logging is disabled.
-
- LOAD TIME: module/kernel parameter: log_ce=[0|1]
-
- RUN TIME: echo "1" >/sys/devices/system/edac/mc/edac_mc_log_ce
-
-
-Polling period control file:
-
- 'edac_mc_poll_msec'
-
- The time period, in milliseconds, for polling for error information.
- Too small a value wastes resources. Too large a value might delay
- necessary handling of errors and might loose valuable information for
- locating the error. 1000 milliseconds (once each second) is the current
- default. Systems which require all the bandwidth they can get, may
- increase this.
-
- LOAD TIME: module/kernel parameter: poll_msec=[0|1]
-
- RUN TIME: echo "1000" >/sys/devices/system/edac/mc/edac_mc_poll_msec
-
-
============================================================================
'mcX' DIRECTORIES
motherboard specific and determination of this information
must occur in userland at this time.
-
============================================================================
SYSTEM LOGGING
driver-specific error message.
-
============================================================================
PCI Bus Parity Detection
echo "0" >/sys/devices/system/edac/pci/check_pci_parity
+Parity Count:
+
+ 'pci_parity_count'
+
+ This attribute file will display the number of parity errors that
+ have been detected.
+
+
+============================================================================
+MODULE PARAMETERS
+
+Panic on UE control file:
+
+ 'edac_mc_panic_on_ue'
+
+ An uncorrectable error will cause a machine panic. This is usually
+ desirable. It is a bad idea to continue when an uncorrectable error
+ occurs - it is indeterminate what was uncorrected and the operating
+ system context might be so mangled that continuing will lead to further
+ corruption. If the kernel has MCE configured, then EDAC will never
+ notice the UE.
+
+ LOAD TIME: module/kernel parameter: edac_mc_panic_on_ue=[0|1]
+
+ RUN TIME: echo "1" > /sys/module/edac_core/parameters/edac_mc_panic_on_ue
+
+
+Log UE control file:
+
+ 'edac_mc_log_ue'
+
+ Generate kernel messages describing uncorrectable errors. These errors
+ are reported through the system message log system. UE statistics
+ will be accumulated even when UE logging is disabled.
+
+ LOAD TIME: module/kernel parameter: edac_mc_log_ue=[0|1]
+
+ RUN TIME: echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ue
+
+
+Log CE control file:
+
+ 'edac_mc_log_ce'
+
+ Generate kernel messages describing correctable errors. These
+ errors are reported through the system message log system.
+ CE statistics will be accumulated even when CE logging is disabled.
+
+ LOAD TIME: module/kernel parameter: edac_mc_log_ce=[0|1]
+
+ RUN TIME: echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ce
+
+
+Polling period control file:
+
+ 'edac_mc_poll_msec'
+
+ The time period, in milliseconds, for polling for error information.
+ Too small a value wastes resources. Too large a value might delay
+ necessary handling of errors and might loose valuable information for
+ locating the error. 1000 milliseconds (once each second) is the current
+ default. Systems which require all the bandwidth they can get, may
+ increase this.
+
+ LOAD TIME: module/kernel parameter: edac_mc_poll_msec=[0|1]
+
+ RUN TIME: echo "1000" > /sys/module/edac_core/parameters/edac_mc_poll_msec
+
Panic on PCI PARITY Error:
error has been detected.
- module/kernel parameter: panic_on_pci_parity=[0|1]
+ module/kernel parameter: edac_panic_on_pci_pe=[0|1]
Enable:
- echo "1" >/sys/devices/system/edac/pci/panic_on_pci_parity
+ echo "1" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe
Disable:
- echo "0" >/sys/devices/system/edac/pci/panic_on_pci_parity
-
-
-Parity Count:
-
- 'pci_parity_count'
-
- This attribute file will display the number of parity errors that
- have been detected.
+ echo "0" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe
---------------------------
-What: find_task_by_pid
-When: 2.6.26
-Why: With pid namespaces, calling this funciton will return the
- wrong task when called from inside a namespace.
-
- The best way to save a task pid and find a task by this
- pid later, is to find this task's struct pid pointer (or get
- it directly from the task) and call pid_task() later.
-
- If someone really needs to get a task by its pid_t, then
- he most likely needs the find_task_by_vpid() to get the
- task from the same namespace as the current task is in, but
- this may be not so in general.
-
-Who: Pavel Emelyanov <xemul@openvz.org>
-
----------------------------
-
What: ACPI procfs interface
When: July 2008
Why: ACPI sysfs conversion should be finished by January 2008.
emulate the Windows 95 rule for create.
Default setting is `lower'.
+tz=UTC -- Interpret timestamps as UTC rather than local time.
+ This option disables the conversion of timestamps
+ between local time (as used by Windows on FAT) and UTC
+ (which Linux uses internally). This is particuluarly
+ useful when mounting devices (like digital cameras)
+ that are set to UTC in order to avoid the pitfalls of
+ local time.
+
<bool>: 0,1,yes,no,true,false
TODO
Dynamic definition of GPIOs is not currently standard; for example, as
a side effect of configuring an add-on board with some GPIO expanders.
-These calls are purely for kernel space, but a userspace API could be built
-on top of them.
-
GPIO implementor's framework (OPTIONAL)
=======================================
As noted earlier, there is an optional implementation framework making it
easier for platforms to support different kinds of GPIO controller using
-the same programming interface.
+the same programming interface. This framework is called "gpiolib".
As a debugging aid, if debugfs is available a /sys/kernel/debug/gpio file
will be found there. That will list all the controllers registered through
Platform Support
----------------
-To support this framework, a platform's Kconfig will "select HAVE_GPIO_LIB"
+To support this framework, a platform's Kconfig will "select" either
+ARCH_REQUIRE_GPIOLIB or ARCH_WANT_OPTIONAL_GPIOLIB
and arrange that its <asm/gpio.h> includes <asm-generic/gpio.h> and defines
three functions: gpio_get_value(), gpio_set_value(), and gpio_cansleep().
They may also want to provide a custom value for ARCH_NR_GPIOS.
+ARCH_REQUIRE_GPIOLIB means that the gpio-lib code will always get compiled
+into the kernel on that architecture.
+
+ARCH_WANT_OPTIONAL_GPIOLIB means the gpio-lib code defaults to off and the user
+can enable it and build it into the kernel optionally.
+
+If neither of these options are selected, the platform does not support
+GPIOs through GPIO-lib and the code cannot be enabled by the user.
+
Trivial implementations of those functions can directly use framework
code, which always dispatches through the gpio_chip:
calls for that GPIO can work. One way to address such dependencies is for
such gpio_chip controllers to provide setup() and teardown() callbacks to
board specific code; those board specific callbacks would register devices
-once all the necessary resources are available.
+once all the necessary resources are available, and remove them later when
+the GPIO controller device becomes unavailable.
+
+
+Sysfs Interface for Userspace (OPTIONAL)
+========================================
+Platforms which use the "gpiolib" implementors framework may choose to
+configure a sysfs user interface to GPIOs. This is different from the
+debugfs interface, since it provides control over GPIO direction and
+value instead of just showing a gpio state summary. Plus, it could be
+present on production systems without debugging support.
+
+Given approprate hardware documentation for the system, userspace could
+know for example that GPIO #23 controls the write protect line used to
+protect boot loader segments in flash memory. System upgrade procedures
+may need to temporarily remove that protection, first importing a GPIO,
+then changing its output state, then updating the code before re-enabling
+the write protection. In normal use, GPIO #23 would never be touched,
+and the kernel would have no need to know about it.
+
+Again depending on appropriate hardware documentation, on some systems
+userspace GPIO can be used to determine system configuration data that
+standard kernels won't know about. And for some tasks, simple userspace
+GPIO drivers could be all that the system really needs.
+
+Note that standard kernel drivers exist for common "LEDs and Buttons"
+GPIO tasks: "leds-gpio" and "gpio_keys", respectively. Use those
+instead of talking directly to the GPIOs; they integrate with kernel
+frameworks better than your userspace code could.
+
+
+Paths in Sysfs
+--------------
+There are three kinds of entry in /sys/class/gpio:
+
+ - Control interfaces used to get userspace control over GPIOs;
+
+ - GPIOs themselves; and
+
+ - GPIO controllers ("gpio_chip" instances).
+
+That's in addition to standard files including the "device" symlink.
+
+The control interfaces are write-only:
+
+ /sys/class/gpio/
+
+ "export" ... Userspace may ask the kernel to export control of
+ a GPIO to userspace by writing its number to this file.
+
+ Example: "echo 19 > export" will create a "gpio19" node
+ for GPIO #19, if that's not requested by kernel code.
+
+ "unexport" ... Reverses the effect of exporting to userspace.
+
+ Example: "echo 19 > unexport" will remove a "gpio19"
+ node exported using the "export" file.
+
+GPIO signals have paths like /sys/class/gpio/gpio42/ (for GPIO #42)
+and have the following read/write attributes:
+
+ /sys/class/gpio/gpioN/
+
+ "direction" ... reads as either "in" or "out". This value may
+ normally be written. Writing as "out" defaults to
+ initializing the value as low. To ensure glitch free
+ operation, values "low" and "high" may be written to
+ configure the GPIO as an output with that initial value.
+
+ Note that this attribute *will not exist* if the kernel
+ doesn't support changing the direction of a GPIO, or
+ it was exported by kernel code that didn't explicitly
+ allow userspace to reconfigure this GPIO's direction.
+
+ "value" ... reads as either 0 (low) or 1 (high). If the GPIO
+ is configured as an output, this value may be written;
+ any nonzero value is treated as high.
+
+GPIO controllers have paths like /sys/class/gpio/chipchip42/ (for the
+controller implementing GPIOs starting at #42) and have the following
+read-only attributes:
+
+ /sys/class/gpio/gpiochipN/
+
+ "base" ... same as N, the first GPIO managed by this chip
+
+ "label" ... provided for diagnostics (not always unique)
+
+ "ngpio" ... how many GPIOs this manges (N to N + ngpio - 1)
+
+Board documentation should in most cases cover what GPIOs are used for
+what purposes. However, those numbers are not always stable; GPIOs on
+a daughtercard might be different depending on the base board being used,
+or other cards in the stack. In such cases, you may need to use the
+gpiochip nodes (possibly in conjunction with schematics) to determine
+the correct GPIO number to use for a given signal.
+
+
+Exporting from Kernel code
+--------------------------
+Kernel code can explicitly manage exports of GPIOs which have already been
+requested using gpio_request():
+
+ /* export the GPIO to userspace */
+ int gpio_export(unsigned gpio, bool direction_may_change);
+
+ /* reverse gpio_export() */
+ void gpio_unexport();
+
+After a kernel driver requests a GPIO, it may only be made available in
+the sysfs interface by gpio_export(). The driver can control whether the
+signal direction may change. This helps drivers prevent userspace code
+from accidentally clobbering important system state.
+
+This explicit exporting can help with debugging (by making some kinds
+of experiments easier), or can provide an always-there interface that's
+suitable for documenting as part of a board support package.
<deci-seconds>: poll all this frequency
0: no polling (default)
- tipar.timeout= [HW,PPT]
- Set communications timeout in tenths of a second
- (default 15).
-
- tipar.delay= [HW,PPT]
- Set inter-bit delay in microseconds (default 10).
-
tmscsim= [HW,SCSI]
See comment before function dc390_setup() in
drivers/scsi/tmscsim.c.
=============================================================================
-
- MOXA Smartio Family Device Driver Ver 1.1 Installation Guide
- for Linux Kernel 2.2.x and 2.0.3x
- Copyright (C) 1999, Moxa Technologies Co, Ltd.
+ MOXA Smartio/Industio Family Device Driver Installation Guide
+ for Linux Kernel 2.4.x, 2.6.x
+ Copyright (C) 2008, Moxa Inc.
=============================================================================
+Date: 01/21/2008
+
Content
1. Introduction
2. System Requirement
3. Installation
+ 3.1 Hardware installation
+ 3.2 Driver files
+ 3.3 Device naming convention
+ 3.4 Module driver configuration
+ 3.5 Static driver configuration for Linux kernel 2.4.x and 2.6.x.
+ 3.6 Custom configuration
+ 3.7 Verify driver installation
4. Utilities
5. Setserial
6. Troubleshooting
-----------------------------------------------------------------------------
1. Introduction
- The Smartio family Linux driver, Ver. 1.1, supports following multiport
+ The Smartio/Industio/UPCI family Linux driver supports following multiport
boards.
- -C104P/H/HS, C104H/PCI, C104HS/PCI, CI-104J 4 port multiport board.
- -C168P/H/HS, C168H/PCI 8 port multiport board.
-
- This driver has been modified a little and cleaned up from the Moxa
- contributed driver code and merged into Linux 2.2.14pre. In particular
- official major/minor numbers have been assigned which are different to
- those the original Moxa supplied driver used.
+ - 2 ports multiport board
+ CP-102U, CP-102UL, CP-102UF
+ CP-132U-I, CP-132UL,
+ CP-132, CP-132I, CP132S, CP-132IS,
+ CI-132, CI-132I, CI-132IS,
+ (C102H, C102HI, C102HIS, C102P, CP-102, CP-102S)
+
+ - 4 ports multiport board
+ CP-104EL,
+ CP-104UL, CP-104JU,
+ CP-134U, CP-134U-I,
+ C104H/PCI, C104HS/PCI,
+ CP-114, CP-114I, CP-114S, CP-114IS, CP-114UL,
+ C104H, C104HS,
+ CI-104J, CI-104JS,
+ CI-134, CI-134I, CI-134IS,
+ (C114HI, CT-114I, C104P)
+ POS-104UL,
+ CB-114,
+ CB-134I
+
+ - 8 ports multiport board
+ CP-118EL, CP-168EL,
+ CP-118U, CP-168U,
+ C168H/PCI,
+ C168H, C168HS,
+ (C168P),
+ CB-108
This driver and installation procedure have been developed upon Linux Kernel
- 2.2.5 and backward compatible to 2.0.3x. This driver supports Intel x86 and
- Alpha hardware platform. In order to maintain compatibility, this version
- has also been properly tested with RedHat, OpenLinux, TurboLinux and
- S.u.S.E Linux. However, if compatibility problem occurs, please contact
- Moxa at support@moxa.com.tw.
+ 2.4.x and 2.6.x. This driver supports Intel x86 hardware platform. In order
+ to maintain compatibility, this version has also been properly tested with
+ RedHat, Mandrake, Fedora and S.u.S.E Linux. However, if compatibility problem
+ occurs, please contact Moxa at support@moxa.com.tw.
In addition to device driver, useful utilities are also provided in this
version. They are
- - msdiag Diagnostic program for detecting installed Moxa Smartio boards.
+ - msdiag Diagnostic program for displaying installed Moxa
+ Smartio/Industio boards.
- msmon Monitor program to observe data count and line status signals.
- msterm A simple terminal program which is useful in testing serial
ports.
GNU General Public License in this version. Please refer to GNU General
Public License announcement in each source code file for more detail.
- In Moxa's ftp sites, you may always find latest driver at
- ftp://ftp.moxa.com or ftp://ftp.moxa.com.tw.
+ In Moxa's Web sites, you may always find latest driver at http://web.moxa.com.
This version of driver can be installed as Loadable Module (Module driver)
or built-in into kernel (Static driver). You may refer to following
-----------------------------------------------------------------------------
2. System Requirement
- - Hardware platform: Intel x86 or Alpha machine
- - Kernel version: 2.0.3x or 2.2.x
+ - Hardware platform: Intel x86 machine
+ - Kernel version: 2.4.x or 2.6.x
- gcc version 2.72 or later
- Maximum 4 boards can be installed in combination
-----------------------------------------------------------------------------
3. Installation
+ 3.1 Hardware installation
+ 3.2 Driver files
+ 3.3 Device naming convention
+ 3.4 Module driver configuration
+ 3.5 Static driver configuration for Linux kernel 2.4.x, 2.6.x.
+ 3.6 Custom configuration
+ 3.7 Verify driver installation
+
+
3.1 Hardware installation
- There are two types of buses, ISA and PCI, for Smartio family multiport
- board.
+ There are two types of buses, ISA and PCI, for Smartio/Industio
+ family multiport board.
ISA board
---------
installation procedure in User's Manual before proceed any further.
Please make sure the JP1 is open after the ISA board is set properly.
- PCI board
- ---------
+ PCI/UPCI board
+ --------------
You may need to adjust IRQ usage in BIOS to avoid from IRQ conflict
with other ISA devices. Please refer to hardware installation
procedure in User's Manual in advance.
- IRQ Sharing
+ PCI IRQ Sharing
-----------
Each port within the same multiport board shares the same IRQ. Up to
- 4 Moxa Smartio Family multiport boards can be installed together on
- one system and they can share the same IRQ.
+ 4 Moxa Smartio/Industio PCI Family multiport boards can be installed
+ together on one system and they can share the same IRQ.
+
- 3.2 Driver files and device naming convention
+ 3.2 Driver files
The driver file may be obtained from ftp, CD-ROM or floppy disk. The
first step, anyway, is to copy driver file "mxser.tgz" into specified
directory. e.g. /moxa. The execute commands as below.
+ # cd /
+ # mkdir moxa
# cd /moxa
- # tar xvf /dev/fd0
+ # tar xvf /dev/fd0
+
or
+
+ # cd /
+ # mkdir moxa
# cd /moxa
# cp /mnt/cdrom/<driver directory>/mxser.tgz .
# tar xvfz mxser.tgz
+
+ 3.3 Device naming convention
+
You may find all the driver and utilities files in /moxa/mxser.
Following installation procedure depends on the model you'd like to
- run the driver. If you prefer module driver, please refer to 3.3.
- If static driver is required, please refer to 3.4.
+ run the driver. If you prefer module driver, please refer to 3.4.
+ If static driver is required, please refer to 3.5.
Dialin and callout port
-----------------------
- This driver remains traditional serial device properties. There're
+ This driver remains traditional serial device properties. There are
two special file name for each serial port. One is dial-in port
which is named "ttyMxx". For callout port, the naming convention
is "cumxx".
Device naming when more than 2 boards installed
-----------------------------------------------
- Naming convention for each Smartio multiport board is pre-defined
- as below.
+ Naming convention for each Smartio/Industio multiport board is
+ pre-defined as below.
Board Num. Dial-in Port Callout port
1st board ttyM0 - ttyM7 cum0 - cum7
3rd board ttyM16 - ttyM23 cum16 - cum23
4th board ttyM24 - ttym31 cum24 - cum31
+
+ !!!!!!!!!!!!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ Under Kernel 2.6 the cum Device is Obsolete. So use ttyM*
+ device instead.
+ !!!!!!!!!!!!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
Board sequence
--------------
This driver will activate ISA boards according to the parameter set
For PCI boards, their sequence will be after ISA boards and C168H/PCI
has higher priority than C104H/PCI boards.
- 3.3 Module driver configuration
+ 3.4 Module driver configuration
Module driver is easiest way to install. If you prefer static driver
installation, please skip this paragraph.
- 1. Find "Makefile" in /moxa/mxser, then run
- # make install
+
+ ------------- Prepare to use the MOXA driver--------------------
+ 3.4.1 Create tty device with correct major number
+ Before using MOXA driver, your system must have the tty devices
+ which are created with driver's major number. We offer one shell
+ script "msmknod" to simplify the procedure.
+ This step is only needed to be executed once. But you still
+ need to do this procedure when:
+ a. You change the driver's major number. Please refer the "3.7"
+ section.
+ b. Your total installed MOXA boards number is changed. Maybe you
+ add/delete one MOXA board.
+ c. You want to change the tty name. This needs to modify the
+ shell script "msmknod"
+
+ The procedure is:
+ # cd /moxa/mxser/driver
+ # ./msmknod
+
+ This shell script will require the major number for dial-in
+ device and callout device to create tty device. You also need
+ to specify the total installed MOXA board number. Default major
+ numbers for dial-in device and callout device are 30, 35. If
+ you need to change to other number, please refer section "3.7"
+ for more detailed procedure.
+ Msmknod will delete any special files occupying the same device
+ naming.
+
+ 3.4.2 Build the MOXA driver and utilities
+ Before using the MOXA driver and utilities, you need compile the
+ all the source code. This step is only need to be executed once.
+ But you still re-compile the source code if you modify the source
+ code. For example, if you change the driver's major number (see
+ "3.7" section), then you need to do this step again.
+
+ Find "Makefile" in /moxa/mxser, then run
+
+ # make clean; make install
+
+ !!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!
+ For Red Hat 9, Red Hat Enterprise Linux AS3/ES3/WS3 & Fedora Core1:
+ # make clean; make installsp1
+
+ For Red Hat Enterprise Linux AS4/ES4/WS4:
+ # make clean; make installsp2
+ !!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!
The driver files "mxser.o" and utilities will be properly compiled
- and copied to system directories respectively.Then run
+ and copied to system directories respectively.
- # insmod mxser
+ ------------- Load MOXA driver--------------------
+ 3.4.3 Load the MOXA driver
- to activate the modular driver. You may run "lsmod" to check
- if "mxser.o" is activated.
+ # modprobe mxser <argument>
- 2. Create special files by executing "msmknod".
- # cd /moxa/mxser/driver
- # ./msmknod
+ will activate the module driver. You may run "lsmod" to check
+ if "mxser" is activated. If the MOXA board is ISA board, the
+ <argument> is needed. Please refer to section "3.4.5" for more
+ information.
+
+
+ ------------- Load MOXA driver on boot --------------------
+ 3.4.4 For the above description, you may manually execute
+ "modprobe mxser" to activate this driver and run
+ "rmmod mxser" to remove it.
+ However, it's better to have a boot time configuration to
+ eliminate manual operation. Boot time configuration can be
+ achieved by rc file. We offer one "rc.mxser" file to simplify
+ the procedure under "moxa/mxser/driver".
- Default major numbers for dial-in device and callout device are
- 174, 175. Msmknod will delete any special files occupying the same
- device naming.
+ But if you use ISA board, please modify the "modprobe ..." command
+ to add the argument (see "3.4.5" section). After modifying the
+ rc.mxser, please try to execute "/moxa/mxser/driver/rc.mxser"
+ manually to make sure the modification is ok. If any error
+ encountered, please try to modify again. If the modification is
+ completed, follow the below step.
- 3. Up to now, you may manually execute "insmod mxser" to activate
- this driver and run "rmmod mxser" to remove it. However, it's
- better to have a boot time configuration to eliminate manual
- operation.
- Boot time configuration can be achieved by rc file. Run following
- command for setting rc files.
+ Run following command for setting rc files.
# cd /moxa/mxser/driver
# cp ./rc.mxser /etc/rc.d
# cd /etc/rc.d
- You may have to modify part of the content in rc.mxser to specify
- parameters for ISA board. Please refer to rc.mxser for more detail.
- Find "rc.serial". If "rc.serial" doesn't exist, create it by vi.
- Add "rc.mxser" in last line. Next, open rc.local by vi
- and append following content.
+ Check "rc.serial" is existed or not. If "rc.serial" doesn't exist,
+ create it by vi, run "chmod 755 rc.serial" to change the permission.
+ Add "/etc/rc.d/rc.mxser" in last line,
- if [ -f /etc/rc.d/rc.serial ]; then
- sh /etc/rc.d/rc.serial
- fi
+ Reboot and check if moxa.o activated by "lsmod" command.
- 4. Reboot and check if mxser.o activated by "lsmod" command.
- 5. If you'd like to drive Smartio ISA boards in the system, you'll
- have to add parameter to specify CAP address of given board while
- activating "mxser.o". The format for parameters are as follows.
+ 3.4.5. If you'd like to drive Smartio/Industio ISA boards in the system,
+ you'll have to add parameter to specify CAP address of given
+ board while activating "mxser.o". The format for parameters are
+ as follows.
- insmod mxser ioaddr=0x???,0x???,0x???,0x???
+ modprobe mxser ioaddr=0x???,0x???,0x???,0x???
| | | |
| | | +- 4th ISA board
| | +------ 3rd ISA board
| +------------ 2nd ISA board
+------------------- 1st ISA board
- 3.4 Static driver configuration
+ 3.5 Static driver configuration for Linux kernel 2.4.x and 2.6.x
+
+ Note: To use static driver, you must install the linux kernel
+ source package.
+
+ 3.5.1 Backup the built-in driver in the kernel.
+ # cd /usr/src/linux/drivers/char
+ # mv mxser.c mxser.c.old
+
+ For Red Hat 7.x user, you need to create link:
+ # cd /usr/src
+ # ln -s linux-2.4 linux
- 1. Create link
+ 3.5.2 Create link
# cd /usr/src/linux/drivers/char
# ln -s /moxa/mxser/driver/mxser.c mxser.c
- 2. Add CAP address list for ISA boards
+ 3.5.3 Add CAP address list for ISA boards. For PCI boards user,
+ please skip this step.
+
In module mode, the CAP address for ISA board is given by
parameter. In static driver configuration, you'll have to
assign it within driver's source code. If you will not
static int mxserBoardCAP[]
= {0x280, 0x180, 0x00, 0x00};
- 3. Modify tty_io.c
- # cd /usr/src/linux/drivers/char/
- # vi tty_io.c
- Find pty_init(), insert "mxser_init()" as
+ 3.5.4 Setup kernel configuration
- pty_init();
- mxser_init();
+ Configure the kernel:
- 4. Modify tty.h
- # cd /usr/src/linux/include/linux
- # vi tty.h
- Find extern int tty_init(void), insert "mxser_init()" as
+ # cd /usr/src/linux
+ # make menuconfig
- extern int tty_init(void);
- extern int mxser_init(void);
-
- 5. Modify Makefile
- # cd /usr/src/linux/drivers/char
- # vi Makefile
- Find L_OBJS := tty_io.o ...... random.o, add
- "mxser.o" at last of this line as
- L_OBJS := tty_io.o ....... mxser.o
+ You will go into a menu-driven system. Please select [Character
+ devices][Non-standard serial port support], enable the [Moxa
+ SmartIO support] driver with "[*]" for built-in (not "[M]"), then
+ select [Exit] to exit this program.
- 6. Rebuild kernel
- The following are for Linux kernel rebuilding,for your reference only.
+ 3.5.5 Rebuild kernel
+ The following are for Linux kernel rebuilding, for your
+ reference only.
For appropriate details, please refer to the Linux document.
- If 'lilo' utility is installed, please use 'make zlilo' to rebuild
- kernel. If 'lilo' is not installed, please follow the following steps.
-
a. cd /usr/src/linux
- b. make clean /* take a few minutes */
- c. make bzImage /* take probably 10-20 minutes */
- d. Backup original boot kernel. /* optional step */
- e. cp /usr/src/linux/arch/i386/boot/bzImage /boot/vmlinuz
+ b. make clean /* take a few minutes */
+ c. make dep /* take a few minutes */
+ d. make bzImage /* take probably 10-20 minutes */
+ e. make install /* copy boot image to correct position */
f. Please make sure the boot kernel (vmlinuz) is in the
- correct position. If you use 'lilo' utility, you should
- check /etc/lilo.conf 'image' item specified the path
- which is the 'vmlinuz' path, or you will load wrong
- (or old) boot kernel image (vmlinuz).
- g. chmod 400 /vmlinuz
- h. lilo
- i. rdev -R /vmlinuz 1
- j. sync
-
- Note that if the result of "make zImage" is ERROR, then you have to
- go back to Linux configuration Setup. Type "make config" in directory
- /usr/src/linux or "setup".
-
- Since system include file, /usr/src/linux/include/linux/interrupt.h,
- is modified each time the MOXA driver is installed, kernel rebuilding
- is inevitable. And it takes about 10 to 20 minutes depends on the
- machine.
-
- 7. Make utility
- # cd /moxa/mxser/utility
- # make install
-
- 8. Make special file
+ correct position.
+ g. If you use 'lilo' utility, you should check /etc/lilo.conf
+ 'image' item specified the path which is the 'vmlinuz' path,
+ or you will load wrong (or old) boot kernel image (vmlinuz).
+ After checking /etc/lilo.conf, please run "lilo".
+
+ Note that if the result of "make bzImage" is ERROR, then you have to
+ go back to Linux configuration Setup. Type "make menuconfig" in
+ directory /usr/src/linux.
+
+
+ 3.5.6 Make tty device and special file
# cd /moxa/mxser/driver
# ./msmknod
- 9. Reboot
+ 3.5.7 Make utility
+ # cd /moxa/mxser/utility
+ # make clean; make install
+
+ 3.5.8 Reboot
- 3.5 Custom configuration
+
+
+ 3.6 Custom configuration
Although this driver already provides you default configuration, you
- still can change the device name and major number.The instruction to
+ still can change the device name and major number. The instruction to
change these parameters are shown as below.
Change Device name
2 free major numbers for this driver. There are 3 steps to change
major numbers.
- 1. Find free major numbers
+ 3.6.1 Find free major numbers
In /proc/devices, you may find all the major numbers occupied
in the system. Please select 2 major numbers that are available.
e.g. 40, 45.
- 2. Create special files
+ 3.6.2 Create special files
Run /moxa/mxser/driver/msmknod to create special files with
specified major numbers.
- 3. Modify driver with new major number
+ 3.6.3 Modify driver with new major number
Run vi to open /moxa/mxser/driver/mxser.c. Locate the line
contains "MXSERMAJOR". Change the content as below.
#define MXSERMAJOR 40
#define MXSERCUMAJOR 45
- 4. Run # make install in /moxa/mxser/driver.
+ 3.6.4 Run "make clean; make install" in /moxa/mxser/driver.
- 3.6 Verify driver installation
+ 3.7 Verify driver installation
You may refer to /var/log/messages to check the latest status
log reported by this driver whenever it's activated.
+
-----------------------------------------------------------------------------
4. Utilities
There are 3 utilities contained in this driver. They are msdiag, msmon and
msterm. These 3 utilities are released in form of source code. They should
be compiled into executable file and copied into /usr/bin.
+ Before using these utilities, please load driver (refer 3.4 & 3.5) and
+ make sure you had run the "msmknod" utility.
+
msdiag - Diagnostic
--------------------
- This utility provides the function to detect what Moxa Smartio multiport
- board exists in the system.
+ This utility provides the function to display what Moxa Smartio/Industio
+ board found by driver in the system.
msmon - Port Monitoring
-----------------------
application, for example, sending AT command to a modem connected to the
port or used as a terminal for login purpose. Note that this is only a
dumb terminal emulation without handling full screen operation.
+
-----------------------------------------------------------------------------
5. Setserial
Supported Setserial parameters are listed as below.
- uart set UART type(16450-->disable FIFO, 16550A-->enable FIFO)
+ uart set UART type(16450-->disable FIFO, 16550A-->enable FIFO)
close_delay set the amount of time(in 1/100 of a second) that DTR
should be kept low while being closed.
closing_wait set the amount of time(in 1/100 of a second) that the
being closed, before the receiver is disable.
spd_hi Use 57.6kb when the application requests 38.4kb.
spd_vhi Use 115.2kb when the application requests 38.4kb.
+ spd_shi Use 230.4kb when the application requests 38.4kb.
+ spd_warp Use 460.8kb when the application requests 38.4kb.
spd_normal Use 38.4kb when the application requests 38.4kb.
+ spd_cust Use the custom divisor to set the speed when the
+ application requests 38.4kb.
+ divisor This option set the custom divison.
+ baud_base This option set the base baud rate.
-----------------------------------------------------------------------------
6. Troubleshooting
possible. If all the possible solutions fail, please contact our technical
support team to get more help.
- Error msg: More than 4 Moxa Smartio family boards found. Fifth board and
- after are ignored.
+
+ Error msg: More than 4 Moxa Smartio/Industio family boards found. Fifth board
+ and after are ignored.
Solution:
To avoid this problem, please unplug fifth and after board, because Moxa
driver supports up to 4 boards.
Error msg: Request_irq fail, IRQ(?) may be conflict with another device.
Solution:
Other PCI or ISA devices occupy the assigned IRQ. If you are not sure
- which device causes the situation,please check /proc/interrupts to find
+ which device causes the situation, please check /proc/interrupts to find
free IRQ and simply change another free IRQ for Moxa board.
Error msg: Board #: C1xx Series(CAP=xxx) interrupt number invalid.
Moxa ISA board needs an interrupt vector.Please refer to user's manual
"Hardware Installation" chapter to set interrupt vector.
- Error msg: Couldn't install MOXA Smartio family driver!
+ Error msg: Couldn't install MOXA Smartio/Industio family driver!
Solution:
Load Moxa driver fail, the major number may conflict with other devices.
- Please refer to previous section 3.5 to change a free major number for
+ Please refer to previous section 3.7 to change a free major number for
Moxa driver.
- Error msg: Couldn't install MOXA Smartio family callout driver!
+ Error msg: Couldn't install MOXA Smartio/Industio family callout driver!
Solution:
Load Moxa callout driver fail, the callout device major number may
- conflict with other devices. Please refer to previous section 3.5 to
+ conflict with other devices. Please refer to previous section 3.7 to
change a free callout device major number for Moxa driver.
+
+
-----------------------------------------------------------------------------
+
p) Freescale Synchronous Serial Interface
q) USB EHCI controllers
r) MDIO on GPIOs
+ s) SPI busses
VII - Marvell Discovery mv64[345]6x System Controller chips
1) The /system-controller node
&qe_pio_c 6>;
};
+ s) SPI (Serial Peripheral Interface) busses
+
+ SPI busses can be described with a node for the SPI master device
+ and a set of child nodes for each SPI slave on the bus. For this
+ discussion, it is assumed that the system's SPI controller is in
+ SPI master mode. This binding does not describe SPI controllers
+ in slave mode.
+
+ The SPI master node requires the following properties:
+ - #address-cells - number of cells required to define a chip select
+ address on the SPI bus.
+ - #size-cells - should be zero.
+ - compatible - name of SPI bus controller following generic names
+ recommended practice.
+ No other properties are required in the SPI bus node. It is assumed
+ that a driver for an SPI bus device will understand that it is an SPI bus.
+ However, the binding does not attempt to define the specific method for
+ assigning chip select numbers. Since SPI chip select configuration is
+ flexible and non-standardized, it is left out of this binding with the
+ assumption that board specific platform code will be used to manage
+ chip selects. Individual drivers can define additional properties to
+ support describing the chip select layout.
+
+ SPI slave nodes must be children of the SPI master node and can
+ contain the following properties.
+ - reg - (required) chip select address of device.
+ - compatible - (required) name of SPI device following generic names
+ recommended practice
+ - spi-max-frequency - (required) Maximum SPI clocking speed of device in Hz
+ - spi-cpol - (optional) Empty property indicating device requires
+ inverse clock polarity (CPOL) mode
+ - spi-cpha - (optional) Empty property indicating device requires
+ shifted clock phase (CPHA) mode
+
+ SPI example for an MPC5200 SPI bus:
+ spi@f00 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,mpc5200b-spi","fsl,mpc5200-spi";
+ reg = <0xf00 0x20>;
+ interrupts = <2 13 0 2 14 0>;
+ interrupt-parent = <&mpc5200_pic>;
+
+ ethernet-switch@0 {
+ compatible = "micrel,ks8995m";
+ spi-max-frequency = <1000000>;
+ reg = <0>;
+ };
+
+ codec@1 {
+ compatible = "ti,tlv320aic26";
+ spi-max-frequency = <100000>;
+ reg = <1>;
+ };
+ };
+
VII - Marvell Discovery mv64[345]6x System Controller chips
===========================================================
where the source or destination (or both) are of type u8* or unsigned char*.
Due to the byte-wise nature of this operation, unaligned accesses are avoided.
+
+Alignment vs. Networking
+========================
+
+On architectures that require aligned loads, networking requires that the IP
+header is aligned on a four-byte boundary to optimise the IP stack. For
+regular ethernet hardware, the constant NET_IP_ALIGN is used. On most
+architectures this constant has the value 2 because the normal ethernet
+header is 14 bytes long, so in order to get proper alignment one needs to
+DMA to an address which can be expressed as 4*n + 2. One notable exception
+here is powerpc which defines NET_IP_ALIGN to 0 because DMA to unaligned
+addresses can be very expensive and dwarf the cost of unaligned loads.
+
+For some ethernet hardware that cannot DMA to unaligned addresses like
+4*n+2 or non-ethernet hardware, this can be a problem, and it is then
+required to copy the incoming frame into an aligned buffer. Because this is
+unnecessary on architectures that can do unaligned accesses, the code can be
+made dependent on CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS like so:
+
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ skb = original skb
+#else
+ skb = copy skb
+#endif
+
--
-Author: Daniel Drake <dsd@gentoo.org>
+Authors: Daniel Drake <dsd@gentoo.org>,
+ Johannes Berg <johannes@sipsolutions.net>
With help from: Alan Cox, Avuton Olrich, Heikki Orsila, Jan Engelhardt,
-Johannes Berg, Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock,
-Uli Kunitz, Vadim Lobanov
+Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock, Uli Kunitz,
+Vadim Lobanov
L: linux-scsi@vger.kernel.org
S: Supported
+BT8XXGPIO DRIVER
+P: Michael Buesch
+M: mb@bu3sch.de
+W: http://bu3sch.de/btgpio.php
+S: Maintained
+
BTTV VIDEO4LINUX DRIVER
P: Mauro Carvalho Chehab
M: mchehab@infradead.org
$(Q)$(AWK) '!x[$$0]++' $(vmlinux-dirs:%=$(objtree)/%/modules.order) > $(objtree)/modules.order
@echo ' Building modules, stage 2.';
$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
+ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.fwinst obj=firmware __fw_modbuild
# Target to prepare building external modules
for kernel debugging, non-intrusive instrumentation and testing.
If in doubt, say "N".
+config HAVE_EFFICIENT_UNALIGNED_ACCESS
+ def_bool n
+ help
+ Some architectures are unable to perform unaligned accesses
+ without the use of get_unaligned/put_unaligned. Others are
+ unable to perform such accesses efficiently (e.g. trap on
+ unaligned access and require fixing it up in the exception
+ handler.)
+
+ This symbol should be selected by an architecture if it can
+ perform unaligned accesses efficiently to allow different
+ code paths to be selected for these cases. Some network
+ drivers, for example, could opt to not fix up alignment
+ problems with received packets if doing so would not help
+ much.
+
+ See Documentation/unaligned-memory-access.txt for more
+ information on the topic of unaligned memory accesses.
+
config KRETPROBES
def_bool y
depends on KPROBES && HAVE_KRETPROBES
static int fill_inbuf(void);
static void flush_window(void);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
static char *input_data;
static int input_data_size;
static ulg output_ptr;
static ulg bytes_out;
-static void *malloc(int size);
-static void free(void *where);
static void error(char *m);
static void gzip_mark(void **);
static void gzip_release(void **);
extern int end;
static ulg free_mem_ptr;
-static ulg free_mem_ptr_end;
+static ulg free_mem_end_ptr;
#define HEAP_SIZE 0x3000
#include "../../../lib/inflate.c"
-static void *malloc(int size)
-{
- void *p;
-
- if (size <0) error("Malloc error");
- if (free_mem_ptr <= 0) error("Memory error");
-
- free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
- p = (void *)free_mem_ptr;
- free_mem_ptr += size;
-
- if (free_mem_ptr >= free_mem_ptr_end)
- error("Out of memory");
- return p;
-}
-
-static void free(void *where)
-{ /* gzip_mark & gzip_release do the free */
-}
-
-static void gzip_mark(void **ptr)
-{
- *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
- free_mem_ptr = (long) *ptr;
-}
-
/* ===========================================================================
* Fill the input buffer. This is called only when the buffer is empty
* and at least one byte is really needed.
/* FIXME FIXME FIXME */
free_mem_ptr = (ulg)output_start + ksize;
- free_mem_ptr_end = (ulg)output_start + ksize + 0x200000;
+ free_mem_end_ptr = (ulg)output_start + ksize + 0x200000;
/* FIXME FIXME FIXME */
/* put in temp area to reduce initial footprint */
select GENERIC_GPIO
select HAVE_CLK
select HAVE_CLK
- select HAVE_GPIO_LIB
+ select ARCH_REQUIRE_GPIOLIB
help
This enables support for the Cirrus EP93xx series of CPUs.
select ARCH_MTD_XIP
select GENERIC_GPIO
select HAVE_CLK
- select HAVE_GPIO_LIB
+ select ARCH_REQUIRE_GPIOLIB
select GENERIC_TIME
select GENERIC_CLOCKEVENTS
select TICK_ONESHOT
select GENERIC_CLOCKEVENTS
select HAVE_CLK
select TICK_ONESHOT
- select HAVE_GPIO_LIB
+ select ARCH_REQUIRE_GPIOLIB
help
Support for StrongARM 11x0 based boards.
bool "TI OMAP"
select GENERIC_GPIO
select HAVE_CLK
- select HAVE_GPIO_LIB
+ select ARCH_REQUIRE_GPIOLIB
select GENERIC_TIME
select GENERIC_CLOCKEVENTS
help
static int fill_inbuf(void);
static void flush_window(void);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
extern char input_data[];
extern char input_data_end[];
static ulg output_ptr;
static ulg bytes_out;
-static void *malloc(int size);
-static void free(void *where);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
static void putstr(const char *);
extern int end;
static ulg free_mem_ptr;
-static ulg free_mem_ptr_end;
+static ulg free_mem_end_ptr;
-#define HEAP_SIZE 0x3000
-
-#include "../../../../lib/inflate.c"
-
-#ifndef STANDALONE_DEBUG
-static void *malloc(int size)
-{
- void *p;
-
- if (size <0) error("Malloc error");
- if (free_mem_ptr <= 0) error("Memory error");
-
- free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
- p = (void *)free_mem_ptr;
- free_mem_ptr += size;
-
- if (free_mem_ptr >= free_mem_ptr_end)
- error("Out of memory");
- return p;
-}
-
-static void free(void *where)
-{ /* gzip_mark & gzip_release do the free */
-}
-
-static void gzip_mark(void **ptr)
-{
- arch_decomp_wdog();
- *ptr = (void *) free_mem_ptr;
-}
+#ifdef STANDALONE_DEBUG
+#define NO_INFLATE_MALLOC
+#endif
-static void gzip_release(void **ptr)
-{
- arch_decomp_wdog();
- free_mem_ptr = (long) *ptr;
-}
-#else
-static void gzip_mark(void **ptr)
-{
-}
+#define ARCH_HAS_DECOMP_WDOG
-static void gzip_release(void **ptr)
-{
-}
-#endif
+#include "../../../../lib/inflate.c"
/* ===========================================================================
* Fill the input buffer. This is called only when the buffer is empty
{
output_data = (uch *)output_start; /* Points to kernel start */
free_mem_ptr = free_mem_ptr_p;
- free_mem_ptr_end = free_mem_ptr_end_p;
+ free_mem_end_ptr = free_mem_ptr_end_p;
__machine_arch_type = arch_id;
arch_decomp_setup();
unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
INIT_HLIST_HEAD(&empty_rp);
- spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
+ kretprobe_hash_lock(current, &head, &flags);
/*
* It is possible to have multiple instances associated with a given
}
kretprobe_assert(ri, orig_ret_address, trampoline_address);
- spin_unlock_irqrestore(&kretprobe_lock, flags);
+ kretprobe_hash_unlock(current, &flags);
hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
hlist_del(&ri->hlist);
return (void *)orig_ret_address;
}
-/* Called with kretprobe_lock held. */
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
bank->chip.set = gpio_set;
if (bank_is_mpuio(bank)) {
bank->chip.label = "mpuio";
+#ifdef CONFIG_ARCH_OMAP1
+ bank->chip.dev = &omap_mpuio_device.dev;
+#endif
bank->chip.base = OMAP_MPUIO(0);
} else {
bank->chip.label = "gpio";
select SUBARCH_AVR32B
select MMU
select PERFORMANCE_COUNTERS
- select HAVE_GPIO_LIB
+ select ARCH_REQUIRE_GPIOLIB
select GENERIC_ALLOCATOR
#
pio->chip.label = pio->name;
pio->chip.base = pdev->id * 32;
pio->chip.ngpio = 32;
+ pio->chip.dev = &pdev->dev;
+ pio->chip.owner = THIS_MODULE;
pio->chip.direction_input = direction_input;
pio->chip.get = gpio_get;
static long bytes_out = 0;
static uch *output_data;
static unsigned long output_ptr = 0;
-
-static void *malloc(int size);
-static void free(void *where);
-static void gzip_mark(void **);
-static void gzip_release(void **);
-
static void puts(const char *);
/* the "heap" is put directly after the BSS ends, at end */
extern int _end;
static long free_mem_ptr = (long)&_end;
+static long free_mem_end_ptr;
#include "../../../../../lib/inflate.c"
-static void *malloc(int size)
-{
- void *p;
-
- if (size < 0)
- error("Malloc error");
-
- free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
- p = (void *)free_mem_ptr;
- free_mem_ptr += size;
-
- return p;
-}
-
-static void free(void *where)
-{ /* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
- *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
- free_mem_ptr = (long) *ptr;
-}
-
/* decompressor info and error messages to serial console */
static void
static void flush_window(void);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
extern char *input_data; /* lives in head.S */
-static long bytes_out = 0;
+static long bytes_out;
static uch *output_data;
-static unsigned long output_ptr = 0;
+static unsigned long output_ptr;
-static void *malloc(int size);
-static void free(void *where);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
static void puts(const char *);
extern int _end;
static long free_mem_ptr = (long)&_end;
+static long free_mem_end_ptr;
#include "../../../../../lib/inflate.c"
-static void *malloc(int size)
-{
- void *p;
-
- if (size <0) error("Malloc error");
-
- free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
- p = (void *)free_mem_ptr;
- free_mem_ptr += size;
-
- return p;
-}
-
-static void free(void *where)
-{ /* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
- *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
- free_mem_ptr = (long) *ptr;
-}
-
/* decompressor info and error messages to serial console */
static inline void
Read the instructions in <file:Documentation/Changes> pertaining to
pseudo terminals. It's safe to say N.
-config UNIX98_PTY_COUNT
- int "Maximum number of Unix98 PTYs in use (0-2048)"
- depends on UNIX98_PTYS
- default "256"
- help
- The maximum number of Unix98 PTYs that can be used at any one time.
- The default is 256, and should be enough for desktop systems. Server
- machines which support incoming telnet/rlogin/ssh connections and/or
- serve several X terminals may want to increase this: every incoming
- connection and every xterm uses up one PTY.
-
- When not in use, each additional set of 256 PTYs occupy
- approximately 8 KB of kernel memory on 32-bit architectures.
-
source "drivers/char/pcmcia/Kconfig"
source "drivers/serial/Kconfig"
static int fill_inbuf(void);
static void flush_window(void);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
extern char input_data[];
extern int input_len;
static uch *output_data;
static unsigned long output_ptr = 0;
-static void *malloc(int size);
-static void free(void *where);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
int puts(const char *);
#define TDR *((volatile unsigned char *)0xffff8b)
#define SSR *((volatile unsigned char *)0xffff8c)
-static void *malloc(int size)
-{
- void *p;
-
- if (size <0) error("Malloc error");
- if (free_mem_ptr == 0) error("Memory error");
-
- free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
- p = (void *)free_mem_ptr;
- free_mem_ptr += size;
-
- if (free_mem_ptr >= free_mem_end_ptr)
- error("Out of memory");
-
- return p;
-}
-
-static void free(void *where)
-{ /* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
- *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
- free_mem_ptr = (long) *ptr;
-}
-
int puts(const char *s)
{
return 0;
((struct fnptr *)kretprobe_trampoline)->ip;
INIT_HLIST_HEAD(&empty_rp);
- spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
+ kretprobe_hash_lock(current, &head, &flags);
/*
* It is possible to have multiple instances associated with a given
kretprobe_assert(ri, orig_ret_address, trampoline_address);
reset_current_kprobe();
- spin_unlock_irqrestore(&kretprobe_lock, flags);
+ kretprobe_hash_unlock(current, &flags);
preempt_enable_no_resched();
hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
return 1;
}
-/* Called with kretprobe_lock held */
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
static int fill_inbuf(void);
static void flush_window(void);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
static unsigned char *input_data;
static int input_len;
#include "m32r_sio.c"
-static void *malloc(int size);
-static void free(void *where);
-
static unsigned long free_mem_ptr;
static unsigned long free_mem_end_ptr;
#include "../../../../lib/inflate.c"
-static void *malloc(int size)
-{
- void *p;
-
- if (size <0) error("Malloc error");
- if (free_mem_ptr == 0) error("Memory error");
-
- free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
- p = (void *)free_mem_ptr;
- free_mem_ptr += size;
-
- if (free_mem_ptr >= free_mem_end_ptr)
- error("Out of memory");
-
- return p;
-}
-
-static void free(void *where)
-{ /* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
- *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
- free_mem_ptr = (long) *ptr;
-}
-
void* memset(void* s, int c, size_t n)
{
int i;
config GPIO_TXX9
select GENERIC_GPIO
- select HAVE_GPIO_LIB
+ select ARCH_REQUIRE_GPIOLIB
bool
config CFE
#include <linux/file.h>
#include <linux/smp_lock.h>
#include <linux/highuid.h>
-#include <linux/dirent.h>
#include <linux/resource.h>
#include <linux/highmem.h>
#include <linux/time.h>
static unsigned long output_ptr;
-static void *malloc(int size);
-
-static inline void free(void *where)
-{ /* Don't care */
-}
-
static unsigned long free_mem_ptr = (unsigned long) &end;
static unsigned long free_mem_end_ptr = (unsigned long) &end + 0x90000;
-static inline void gzip_mark(void **ptr)
-{
- kputs(".");
- *ptr = (void *) free_mem_ptr;
-}
-
-static inline void gzip_release(void **ptr)
-{
- free_mem_ptr = (unsigned long) *ptr;
-}
-
#define INPLACE_MOVE_ROUTINE 0x1000
#define LOW_BUFFER_START 0x2000
#define LOW_BUFFER_END 0x90000
#include "../../../../lib/inflate.c"
-static void *malloc(int size)
-{
- void *p;
-
- if (size < 0)
- error("Malloc error\n");
- if (!free_mem_ptr)
- error("Memory error\n");
-
- free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
- p = (void *) free_mem_ptr;
- free_mem_ptr += size;
-
- if (free_mem_ptr >= free_mem_end_ptr)
- error("\nOut of memory\n");
-
- return p;
-}
-
static inline void scroll(void)
{
int i;
default y
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE
+ select ARCH_WANT_OPTIONAL_GPIOLIB
select HAVE_IDE
select HAVE_IOREMAP_PROT
+ select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_KPROBES
select HAVE_ARCH_KGDB
select HAVE_KRETPROBES
struct cpu_spec* cur_cpu_spec = NULL;
EXPORT_SYMBOL(cur_cpu_spec);
+/* The platform string corresponding to the real PVR */
+const char *powerpc_base_platform;
+
/* NOTE:
* Unlike ppc32, ppc64 will only call this once for the boot CPU, it's
* the responsibility of the appropriate CPU save/restore functions to
} else
*t = *s;
*PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
+
+ /*
+ * Set the base platform string once; assumes
+ * we're called with real pvr first.
+ */
+ if (powerpc_base_platform == NULL)
+ powerpc_base_platform = t->platform;
+
#if defined(CONFIG_PPC64) || defined(CONFIG_BOOKE)
/* ppc64 and booke expect identify_cpu to also call
* setup_cpu for that processor. I will consolidate
/* Check to see if the dbcr0 register is set up to debug. Use the
internal debug mode bit to do this. */
lwz r12,THREAD_DBCR0(r12)
- andis. r12,r12,DBCR0_IDM@h
+ andis. r12,r12,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h
beq+ 3f
/* From user and task is ptraced - load up global dbcr0 */
li r12,-1 /* clear all pending debug events */
/* If the process has its own DBCR0 value, load it up. The internal
debug mode bit tells us that dbcr0 should be loaded. */
lwz r0,THREAD+THREAD_DBCR0(r2)
- andis. r10,r0,DBCR0_IDM@h
+ andis. r10,r0,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h
bnel- load_dbcr0
#endif
#ifdef CONFIG_44x
/* Check whether this process has its own DBCR0 value. The internal
debug mode bit tells us that dbcr0 should be loaded. */
lwz r0,THREAD+THREAD_DBCR0(r2)
- andis. r10,r0,DBCR0_IDM@h
+ andis. r10,r0,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h
bnel- load_dbcr0
#endif
static int protect4gb = 1;
+static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int);
+
static inline unsigned long iommu_num_pages(unsigned long vaddr,
unsigned long slen)
{
{
unsigned long entry, flags;
dma_addr_t ret = DMA_ERROR_CODE;
+ int build_fail;
spin_lock_irqsave(&(tbl->it_lock), flags);
ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */
/* Put the TCEs in the HW table */
- ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & IOMMU_PAGE_MASK,
- direction, attrs);
+ build_fail = ppc_md.tce_build(tbl, entry, npages,
+ (unsigned long)page & IOMMU_PAGE_MASK,
+ direction, attrs);
+
+ /* ppc_md.tce_build() only returns non-zero for transient errors.
+ * Clean up the table bitmap in this case and return
+ * DMA_ERROR_CODE. For all other errors the functionality is
+ * not altered.
+ */
+ if (unlikely(build_fail)) {
+ __iommu_free(tbl, ret, npages);
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+ return DMA_ERROR_CODE;
+ }
/* Flush/invalidate TLB caches if necessary */
if (ppc_md.tce_flush)
dma_addr_t dma_next = 0, dma_addr;
unsigned long flags;
struct scatterlist *s, *outs, *segstart;
- int outcount, incount, i;
+ int outcount, incount, i, build_fail = 0;
unsigned int align;
unsigned long handle;
unsigned int max_seg_size;
npages, entry, dma_addr);
/* Insert into HW table */
- ppc_md.tce_build(tbl, entry, npages, vaddr & IOMMU_PAGE_MASK,
- direction, attrs);
+ build_fail = ppc_md.tce_build(tbl, entry, npages,
+ vaddr & IOMMU_PAGE_MASK,
+ direction, attrs);
+ if(unlikely(build_fail))
+ goto failure;
/* If we are in an open segment, try merging */
if (segstart != s) {
kcb->kprobe_saved_msr = regs->msr;
}
-/* Called with kretprobe_lock held */
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
INIT_HLIST_HEAD(&empty_rp);
- spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
+ kretprobe_hash_lock(current, &head, &flags);
/*
* It is possible to have multiple instances associated with a given
regs->nip = orig_ret_address;
reset_current_kprobe();
- spin_unlock_irqrestore(&kretprobe_lock, flags);
+ kretprobe_hash_unlock(current, &flags);
preempt_enable_no_resched();
hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
#include <asm/time.h>
#include <asm/prom.h>
#include <asm/vdso_datapage.h>
+#include <asm/vio.h>
-#define MODULE_VERS "1.7"
+#define MODULE_VERS "1.8"
#define MODULE_NAME "lparcfg"
/* #define LPARCFG_DEBUG */
/*
* Methods used to fetch LPAR data when running on a pSeries platform.
*/
-static void log_plpar_hcall_return(unsigned long rc, char *tag)
+/**
+ * h_get_mpp
+ * H_GET_MPP hcall returns info in 7 parms
+ */
+int h_get_mpp(struct hvcall_mpp_data *mpp_data)
{
- switch(rc) {
- case 0:
- return;
- case H_HARDWARE:
- printk(KERN_INFO "plpar-hcall (%s) "
- "Hardware fault\n", tag);
- return;
- case H_FUNCTION:
- printk(KERN_INFO "plpar-hcall (%s) "
- "Function not allowed\n", tag);
- return;
- case H_AUTHORITY:
- printk(KERN_INFO "plpar-hcall (%s) "
- "Not authorized to this function\n", tag);
- return;
- case H_PARAMETER:
- printk(KERN_INFO "plpar-hcall (%s) "
- "Bad parameter(s)\n",tag);
- return;
- default:
- printk(KERN_INFO "plpar-hcall (%s) "
- "Unexpected rc(0x%lx)\n", tag, rc);
- }
+ int rc;
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+ rc = plpar_hcall9(H_GET_MPP, retbuf);
+
+ mpp_data->entitled_mem = retbuf[0];
+ mpp_data->mapped_mem = retbuf[1];
+
+ mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
+ mpp_data->pool_num = retbuf[2] & 0xffff;
+
+ mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff;
+ mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff;
+ mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff;
+
+ mpp_data->pool_size = retbuf[4];
+ mpp_data->loan_request = retbuf[5];
+ mpp_data->backing_mem = retbuf[6];
+
+ return rc;
}
+EXPORT_SYMBOL(h_get_mpp);
+
+struct hvcall_ppp_data {
+ u64 entitlement;
+ u64 unallocated_entitlement;
+ u16 group_num;
+ u16 pool_num;
+ u8 capped;
+ u8 weight;
+ u8 unallocated_weight;
+ u16 active_procs_in_pool;
+ u16 active_system_procs;
+};
/*
* H_GET_PPP hcall returns info in 4 parms.
* XXXX - Active processors in Physical Processor Pool.
* XXXX - Processors active on platform.
*/
-static unsigned int h_get_ppp(unsigned long *entitled,
- unsigned long *unallocated,
- unsigned long *aggregation,
- unsigned long *resource)
+static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
{
unsigned long rc;
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
rc = plpar_hcall(H_GET_PPP, retbuf);
- *entitled = retbuf[0];
- *unallocated = retbuf[1];
- *aggregation = retbuf[2];
- *resource = retbuf[3];
+ ppp_data->entitlement = retbuf[0];
+ ppp_data->unallocated_entitlement = retbuf[1];
+
+ ppp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
+ ppp_data->pool_num = retbuf[2] & 0xffff;
- log_plpar_hcall_return(rc, "H_GET_PPP");
+ ppp_data->capped = (retbuf[3] >> 6 * 8) & 0x01;
+ ppp_data->weight = (retbuf[3] >> 5 * 8) & 0xff;
+ ppp_data->unallocated_weight = (retbuf[3] >> 4 * 8) & 0xff;
+ ppp_data->active_procs_in_pool = (retbuf[3] >> 2 * 8) & 0xffff;
+ ppp_data->active_system_procs = retbuf[3] & 0xffff;
return rc;
}
-static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
+static unsigned h_pic(unsigned long *pool_idle_time,
+ unsigned long *num_procs)
{
unsigned long rc;
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
*pool_idle_time = retbuf[0];
*num_procs = retbuf[1];
- if (rc != H_AUTHORITY)
- log_plpar_hcall_return(rc, "H_PIC");
+ return rc;
+}
+
+/*
+ * parse_ppp_data
+ * Parse out the data returned from h_get_ppp and h_pic
+ */
+static void parse_ppp_data(struct seq_file *m)
+{
+ struct hvcall_ppp_data ppp_data;
+ int rc;
+
+ rc = h_get_ppp(&ppp_data);
+ if (rc)
+ return;
+
+ seq_printf(m, "partition_entitled_capacity=%ld\n",
+ ppp_data.entitlement);
+ seq_printf(m, "group=%d\n", ppp_data.group_num);
+ seq_printf(m, "system_active_processors=%d\n",
+ ppp_data.active_system_procs);
+
+ /* pool related entries are apropriate for shared configs */
+ if (lppaca[0].shared_proc) {
+ unsigned long pool_idle_time, pool_procs;
+
+ seq_printf(m, "pool=%d\n", ppp_data.pool_num);
+
+ /* report pool_capacity in percentage */
+ seq_printf(m, "pool_capacity=%d\n",
+ ppp_data.active_procs_in_pool * 100);
+
+ h_pic(&pool_idle_time, &pool_procs);
+ seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
+ seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
+ }
+
+ seq_printf(m, "unallocated_capacity_weight=%d\n",
+ ppp_data.unallocated_weight);
+ seq_printf(m, "capacity_weight=%d\n", ppp_data.weight);
+ seq_printf(m, "capped=%d\n", ppp_data.capped);
+ seq_printf(m, "unallocated_capacity=%ld\n",
+ ppp_data.unallocated_entitlement);
+}
+
+/**
+ * parse_mpp_data
+ * Parse out data returned from h_get_mpp
+ */
+static void parse_mpp_data(struct seq_file *m)
+{
+ struct hvcall_mpp_data mpp_data;
+ int rc;
+
+ rc = h_get_mpp(&mpp_data);
+ if (rc)
+ return;
+
+ seq_printf(m, "entitled_memory=%ld\n", mpp_data.entitled_mem);
+
+ if (mpp_data.mapped_mem != -1)
+ seq_printf(m, "mapped_entitled_memory=%ld\n",
+ mpp_data.mapped_mem);
+
+ seq_printf(m, "entitled_memory_group_number=%d\n", mpp_data.group_num);
+ seq_printf(m, "entitled_memory_pool_number=%d\n", mpp_data.pool_num);
+
+ seq_printf(m, "entitled_memory_weight=%d\n", mpp_data.mem_weight);
+ seq_printf(m, "unallocated_entitled_memory_weight=%d\n",
+ mpp_data.unallocated_mem_weight);
+ seq_printf(m, "unallocated_io_mapping_entitlement=%ld\n",
+ mpp_data.unallocated_entitlement);
+
+ if (mpp_data.pool_size != -1)
+ seq_printf(m, "entitled_memory_pool_size=%ld bytes\n",
+ mpp_data.pool_size);
+
+ seq_printf(m, "entitled_memory_loan_request=%ld\n",
+ mpp_data.loan_request);
+
+ seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem);
}
#define SPLPAR_CHARACTERISTICS_TOKEN 20
return count;
}
+static void pseries_cmo_data(struct seq_file *m)
+{
+ int cpu;
+ unsigned long cmo_faults = 0;
+ unsigned long cmo_fault_time = 0;
+
+ if (!firmware_has_feature(FW_FEATURE_CMO))
+ return;
+
+ for_each_possible_cpu(cpu) {
+ cmo_faults += lppaca[cpu].cmo_faults;
+ cmo_fault_time += lppaca[cpu].cmo_fault_time;
+ }
+
+ seq_printf(m, "cmo_faults=%lu\n", cmo_faults);
+ seq_printf(m, "cmo_fault_time_usec=%lu\n",
+ cmo_fault_time / tb_ticks_per_usec);
+}
+
static int pseries_lparcfg_data(struct seq_file *m, void *v)
{
int partition_potential_processors;
partition_active_processors = lparcfg_count_active_processors();
if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
- unsigned long h_entitled, h_unallocated;
- unsigned long h_aggregation, h_resource;
- unsigned long pool_idle_time, pool_procs;
- unsigned long purr;
-
- h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation,
- &h_resource);
-
- seq_printf(m, "R4=0x%lx\n", h_entitled);
- seq_printf(m, "R5=0x%lx\n", h_unallocated);
- seq_printf(m, "R6=0x%lx\n", h_aggregation);
- seq_printf(m, "R7=0x%lx\n", h_resource);
-
- purr = get_purr();
-
/* this call handles the ibm,get-system-parameter contents */
parse_system_parameter_string(m);
+ parse_ppp_data(m);
+ parse_mpp_data(m);
+ pseries_cmo_data(m);
- seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled);
-
- seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff);
-
- seq_printf(m, "system_active_processors=%ld\n",
- (h_resource >> 0 * 8) & 0xffff);
-
- /* pool related entries are apropriate for shared configs */
- if (lppaca[0].shared_proc) {
-
- h_pic(&pool_idle_time, &pool_procs);
-
- seq_printf(m, "pool=%ld\n",
- (h_aggregation >> 0 * 8) & 0xffff);
-
- /* report pool_capacity in percentage */
- seq_printf(m, "pool_capacity=%ld\n",
- ((h_resource >> 2 * 8) & 0xffff) * 100);
-
- seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
-
- seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
- }
-
- seq_printf(m, "unallocated_capacity_weight=%ld\n",
- (h_resource >> 4 * 8) & 0xFF);
-
- seq_printf(m, "capacity_weight=%ld\n",
- (h_resource >> 5 * 8) & 0xFF);
-
- seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01);
-
- seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated);
-
- seq_printf(m, "purr=%ld\n", purr);
-
+ seq_printf(m, "purr=%ld\n", get_purr());
} else { /* non SPLPAR case */
seq_printf(m, "system_active_processors=%d\n",
return 0;
}
+static ssize_t update_ppp(u64 *entitlement, u8 *weight)
+{
+ struct hvcall_ppp_data ppp_data;
+ u8 new_weight;
+ u64 new_entitled;
+ ssize_t retval;
+
+ /* Get our current parameters */
+ retval = h_get_ppp(&ppp_data);
+ if (retval)
+ return retval;
+
+ if (entitlement) {
+ new_weight = ppp_data.weight;
+ new_entitled = *entitlement;
+ } else if (weight) {
+ new_weight = *weight;
+ new_entitled = ppp_data.entitlement;
+ } else
+ return -EINVAL;
+
+ pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
+ __FUNCTION__, ppp_data.entitlement, ppp_data.weight);
+
+ pr_debug("%s: new_entitled = %lu, new_weight = %u\n",
+ __FUNCTION__, new_entitled, new_weight);
+
+ retval = plpar_hcall_norets(H_SET_PPP, new_entitled, new_weight);
+ return retval;
+}
+
+/**
+ * update_mpp
+ *
+ * Update the memory entitlement and weight for the partition. Caller must
+ * specify either a new entitlement or weight, not both, to be updated
+ * since the h_set_mpp call takes both entitlement and weight as parameters.
+ */
+static ssize_t update_mpp(u64 *entitlement, u8 *weight)
+{
+ struct hvcall_mpp_data mpp_data;
+ u64 new_entitled;
+ u8 new_weight;
+ ssize_t rc;
+
+ if (entitlement) {
+ /* Check with vio to ensure the new memory entitlement
+ * can be handled.
+ */
+ rc = vio_cmo_entitlement_update(*entitlement);
+ if (rc)
+ return rc;
+ }
+
+ rc = h_get_mpp(&mpp_data);
+ if (rc)
+ return rc;
+
+ if (entitlement) {
+ new_weight = mpp_data.mem_weight;
+ new_entitled = *entitlement;
+ } else if (weight) {
+ new_weight = *weight;
+ new_entitled = mpp_data.entitled_mem;
+ } else
+ return -EINVAL;
+
+ pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
+ __FUNCTION__, mpp_data.entitled_mem, mpp_data.mem_weight);
+
+ pr_debug("%s: new_entitled = %lu, new_weight = %u\n",
+ __FUNCTION__, new_entitled, new_weight);
+
+ rc = plpar_hcall_norets(H_SET_MPP, new_entitled, new_weight);
+ return rc;
+}
+
/*
* Interface for changing system parameters (variable capacity weight
* and entitled capacity). Format of input is "param_name=value";
static ssize_t lparcfg_write(struct file *file, const char __user * buf,
size_t count, loff_t * off)
{
- char *kbuf;
+ int kbuf_sz = 64;
+ char kbuf[kbuf_sz];
char *tmp;
u64 new_entitled, *new_entitled_ptr = &new_entitled;
u8 new_weight, *new_weight_ptr = &new_weight;
-
- unsigned long current_entitled; /* parameters for h_get_ppp */
- unsigned long dummy;
- unsigned long resource;
- u8 current_weight;
-
- ssize_t retval = -ENOMEM;
+ ssize_t retval;
if (!firmware_has_feature(FW_FEATURE_SPLPAR) ||
firmware_has_feature(FW_FEATURE_ISERIES))
return -EINVAL;
- kbuf = kmalloc(count, GFP_KERNEL);
- if (!kbuf)
- goto out;
+ if (count > kbuf_sz)
+ return -EINVAL;
- retval = -EFAULT;
if (copy_from_user(kbuf, buf, count))
- goto out;
+ return -EFAULT;
- retval = -EINVAL;
kbuf[count - 1] = '\0';
tmp = strchr(kbuf, '=');
if (!tmp)
- goto out;
+ return -EINVAL;
*tmp++ = '\0';
char *endp;
*new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
if (endp == tmp)
- goto out;
- new_weight_ptr = ¤t_weight;
+ return -EINVAL;
+
+ retval = update_ppp(new_entitled_ptr, NULL);
} else if (!strcmp(kbuf, "capacity_weight")) {
char *endp;
*new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
if (endp == tmp)
- goto out;
- new_entitled_ptr = ¤t_entitled;
- } else
- goto out;
-
- /* Get our current parameters */
- retval = h_get_ppp(¤t_entitled, &dummy, &dummy, &resource);
- if (retval) {
- retval = -EIO;
- goto out;
- }
-
- current_weight = (resource >> 5 * 8) & 0xFF;
+ return -EINVAL;
- pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
- __func__, current_entitled, current_weight);
+ retval = update_ppp(NULL, new_weight_ptr);
+ } else if (!strcmp(kbuf, "entitled_memory")) {
+ char *endp;
+ *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
+ if (endp == tmp)
+ return -EINVAL;
- pr_debug("%s: new_entitled = %lu, new_weight = %u\n",
- __func__, *new_entitled_ptr, *new_weight_ptr);
+ retval = update_mpp(new_entitled_ptr, NULL);
+ } else if (!strcmp(kbuf, "entitled_memory_weight")) {
+ char *endp;
+ *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
+ if (endp == tmp)
+ return -EINVAL;
- retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr,
- *new_weight_ptr);
+ retval = update_mpp(NULL, new_weight_ptr);
+ } else
+ return -EINVAL;
if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
retval = count;
retval = -EIO;
}
-out:
- kfree(kbuf);
return retval;
}
#ifdef CONFIG_PPC64
#include <asm/firmware.h>
#endif
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
extern unsigned long _get_SP(void);
}
#endif /* CONFIG_SMP */
+void do_dabr(struct pt_regs *regs, unsigned long address,
+ unsigned long error_code)
+{
+ siginfo_t info;
+
+ if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
+ 11, SIGSEGV) == NOTIFY_STOP)
+ return;
+
+ if (debugger_dabr_match(regs))
+ return;
+
+ /* Clear the DAC and struct entries. One shot trigger */
+#if (defined(CONFIG_44x) || defined(CONFIG_BOOKE))
+ mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R | DBSR_DAC1W
+ | DBCR0_IDM));
+#endif
+
+ /* Clear the DABR */
+ set_dabr(0);
+
+ /* Deliver the signal to userspace */
+ info.si_signo = SIGTRAP;
+ info.si_errno = 0;
+ info.si_code = TRAP_HWBKPT;
+ info.si_addr = (void __user *)address;
+ force_sig_info(SIGTRAP, &info, current);
+}
+
static DEFINE_PER_CPU(unsigned long, current_dabr);
int set_dabr(unsigned long dabr)
#if defined(CONFIG_PPC64) || defined(CONFIG_6xx)
mtspr(SPRN_DABR, dabr);
#endif
+
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+ mtspr(SPRN_DAC1, dabr);
+#endif
+
return 0;
}
if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr))
set_dabr(new->thread.dabr);
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+ /* If new thread DAC (HW breakpoint) is the same then leave it */
+ if (new->thread.dabr)
+ set_dabr(new->thread.dabr);
+#endif
+
new_thread = &new->thread;
old_thread = ¤t->thread;
if (current->thread.dabr) {
current->thread.dabr = 0;
set_dabr(0);
+
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+ current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W);
+#endif
}
}
#else
#define OV5_MSI 0x00
#endif /* CONFIG_PCI_MSI */
+#ifdef CONFIG_PPC_SMLPAR
+#define OV5_CMO 0x80 /* Cooperative Memory Overcommitment */
+#else
+#define OV5_CMO 0x00
+#endif
/*
* The architecture vector has an array of PVR mask/value pairs,
0, /* don't halt */
/* option vector 5: PAPR/OF options */
- 3 - 2, /* length */
+ 5 - 2, /* length */
0, /* don't ignore, don't halt */
OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY |
OV5_DONATE_DEDICATE_CPU | OV5_MSI,
+ 0,
+ OV5_CMO,
};
/* Old method - ELF header with PT_NOTE sections */
if (regs != NULL) {
#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
- task->thread.dbcr0 = DBCR0_IDM | DBCR0_IC;
+ task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC;
regs->msr |= MSR_DE;
#else
regs->msr |= MSR_SE;
{
struct pt_regs *regs = task->thread.regs;
+
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+ /* If DAC then do not single step, skip */
+ if (task->thread.dabr)
+ return;
+#endif
+
if (regs != NULL) {
#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
- task->thread.dbcr0 = 0;
+ task->thread.dbcr0 &= ~(DBCR0_IC | DBCR0_IDM);
regs->msr &= ~MSR_DE;
#else
regs->msr &= ~MSR_SE;
clear_tsk_thread_flag(task, TIF_SINGLESTEP);
}
-static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
+int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
unsigned long data)
{
- /* We only support one DABR and no IABRS at the moment */
+ /* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
+ * For embedded processors we support one DAC and no IAC's at the
+ * moment.
+ */
if (addr > 0)
return -EINVAL;
- /* The bottom 3 bits are flags */
if ((data & ~0x7UL) >= TASK_SIZE)
return -EIO;
- /* Ensure translation is on */
+#ifdef CONFIG_PPC64
+
+ /* For processors using DABR (i.e. 970), the bottom 3 bits are flags.
+ * It was assumed, on previous implementations, that 3 bits were
+ * passed together with the data address, fitting the design of the
+ * DABR register, as follows:
+ *
+ * bit 0: Read flag
+ * bit 1: Write flag
+ * bit 2: Breakpoint translation
+ *
+ * Thus, we use them here as so.
+ */
+
+ /* Ensure breakpoint translation bit is set */
if (data && !(data & DABR_TRANSLATION))
return -EIO;
+ /* Move contents to the DABR register */
task->thread.dabr = data;
+
+#endif
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+
+ /* As described above, it was assumed 3 bits were passed with the data
+ * address, but we will assume only the mode bits will be passed
+ * as to not cause alignment restrictions for DAC-based processors.
+ */
+
+ /* DAC's hold the whole address without any mode flags */
+ task->thread.dabr = data & ~0x3UL;
+
+ if (task->thread.dabr == 0) {
+ task->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W | DBCR0_IDM);
+ task->thread.regs->msr &= ~MSR_DE;
+ return 0;
+ }
+
+ /* Read or Write bits must be set */
+
+ if (!(data & 0x3UL))
+ return -EINVAL;
+
+ /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0
+ register */
+ task->thread.dbcr0 = DBCR0_IDM;
+
+ /* Check for write and read flags and set DBCR0
+ accordingly */
+ if (data & 0x1UL)
+ task->thread.dbcr0 |= DBSR_DAC1R;
+ if (data & 0x2UL)
+ task->thread.dbcr0 |= DBSR_DAC1W;
+
+ task->thread.regs->msr |= MSR_DE;
+#endif
return 0;
}
* user space. The DABR will have been cleared if it
* triggered inside the kernel.
*/
- if (current->thread.dabr)
+ if (current->thread.dabr) {
set_dabr(current->thread.dabr);
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+ mtspr(SPRN_DBCR0, current->thread.dbcr0);
+#endif
+ }
if (is32) {
if (ka.sa.sa_flags & SA_SIGINFO)
#endif
/* Only valid if CPU is present. */
-static ssize_t show_physical_id(struct sys_device *dev, char *buf)
+static ssize_t show_physical_id(struct sys_device *dev,
+ struct sysdev_attribute *attr, char *buf)
{
struct cpu *cpu = container_of(dev, struct cpu, sysdev);
}
_exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
+ } else if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) {
+ regs->msr &= ~MSR_DE;
+
+ if (user_mode(regs)) {
+ current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W |
+ DBCR0_IDM);
+ } else {
+ /* Disable DAC interupts */
+ mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R |
+ DBSR_DAC1W | DBCR0_IDM));
+
+ /* Clear the DAC event */
+ mtspr(SPRN_DBSR, (DBSR_DAC1R | DBSR_DAC1W));
+ }
+ /* Setup and send the trap to the handler */
+ do_dabr(regs, mfspr(SPRN_DAC1), debug_status);
}
}
#endif /* CONFIG_4xx || CONFIG_BOOKE */
/*
* IBM PowerPC Virtual I/O Infrastructure Support.
*
- * Copyright (c) 2003-2005 IBM Corp.
+ * Copyright (c) 2003,2008 IBM Corp.
* Dave Engebretsen engebret@us.ibm.com
* Santiago Leon santil@us.ibm.com
* Hollis Blanchard <hollisb@us.ibm.com>
* Stephen Rothwell
+ * Robert Jennings <rcjenn@us.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
.dev.bus = &vio_bus_type,
};
+#ifdef CONFIG_PPC_SMLPAR
+/**
+ * vio_cmo_pool - A pool of IO memory for CMO use
+ *
+ * @size: The size of the pool in bytes
+ * @free: The amount of free memory in the pool
+ */
+struct vio_cmo_pool {
+ size_t size;
+ size_t free;
+};
+
+/* How many ms to delay queued balance work */
+#define VIO_CMO_BALANCE_DELAY 100
+
+/* Portion out IO memory to CMO devices by this chunk size */
+#define VIO_CMO_BALANCE_CHUNK 131072
+
+/**
+ * vio_cmo_dev_entry - A device that is CMO-enabled and requires entitlement
+ *
+ * @vio_dev: struct vio_dev pointer
+ * @list: pointer to other devices on bus that are being tracked
+ */
+struct vio_cmo_dev_entry {
+ struct vio_dev *viodev;
+ struct list_head list;
+};
+
+/**
+ * vio_cmo - VIO bus accounting structure for CMO entitlement
+ *
+ * @lock: spinlock for entire structure
+ * @balance_q: work queue for balancing system entitlement
+ * @device_list: list of CMO-enabled devices requiring entitlement
+ * @entitled: total system entitlement in bytes
+ * @reserve: pool of memory from which devices reserve entitlement, incl. spare
+ * @excess: pool of excess entitlement not needed for device reserves or spare
+ * @spare: IO memory for device hotplug functionality
+ * @min: minimum necessary for system operation
+ * @desired: desired memory for system operation
+ * @curr: bytes currently allocated
+ * @high: high water mark for IO data usage
+ */
+struct vio_cmo {
+ spinlock_t lock;
+ struct delayed_work balance_q;
+ struct list_head device_list;
+ size_t entitled;
+ struct vio_cmo_pool reserve;
+ struct vio_cmo_pool excess;
+ size_t spare;
+ size_t min;
+ size_t desired;
+ size_t curr;
+ size_t high;
+} vio_cmo;
+
+/**
+ * vio_cmo_OF_devices - Count the number of OF devices that have DMA windows
+ */
+static int vio_cmo_num_OF_devs(void)
+{
+ struct device_node *node_vroot;
+ int count = 0;
+
+ /*
+ * Count the number of vdevice entries with an
+ * ibm,my-dma-window OF property
+ */
+ node_vroot = of_find_node_by_name(NULL, "vdevice");
+ if (node_vroot) {
+ struct device_node *of_node;
+ struct property *prop;
+
+ for_each_child_of_node(node_vroot, of_node) {
+ prop = of_find_property(of_node, "ibm,my-dma-window",
+ NULL);
+ if (prop)
+ count++;
+ }
+ }
+ of_node_put(node_vroot);
+ return count;
+}
+
+/**
+ * vio_cmo_alloc - allocate IO memory for CMO-enable devices
+ *
+ * @viodev: VIO device requesting IO memory
+ * @size: size of allocation requested
+ *
+ * Allocations come from memory reserved for the devices and any excess
+ * IO memory available to all devices. The spare pool used to service
+ * hotplug must be equal to %VIO_CMO_MIN_ENT for the excess pool to be
+ * made available.
+ *
+ * Return codes:
+ * 0 for successful allocation and -ENOMEM for a failure
+ */
+static inline int vio_cmo_alloc(struct vio_dev *viodev, size_t size)
+{
+ unsigned long flags;
+ size_t reserve_free = 0;
+ size_t excess_free = 0;
+ int ret = -ENOMEM;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+
+ /* Determine the amount of free entitlement available in reserve */
+ if (viodev->cmo.entitled > viodev->cmo.allocated)
+ reserve_free = viodev->cmo.entitled - viodev->cmo.allocated;
+
+ /* If spare is not fulfilled, the excess pool can not be used. */
+ if (vio_cmo.spare >= VIO_CMO_MIN_ENT)
+ excess_free = vio_cmo.excess.free;
+
+ /* The request can be satisfied */
+ if ((reserve_free + excess_free) >= size) {
+ vio_cmo.curr += size;
+ if (vio_cmo.curr > vio_cmo.high)
+ vio_cmo.high = vio_cmo.curr;
+ viodev->cmo.allocated += size;
+ size -= min(reserve_free, size);
+ vio_cmo.excess.free -= size;
+ ret = 0;
+ }
+
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+ return ret;
+}
+
+/**
+ * vio_cmo_dealloc - deallocate IO memory from CMO-enable devices
+ * @viodev: VIO device freeing IO memory
+ * @size: size of deallocation
+ *
+ * IO memory is freed by the device back to the correct memory pools.
+ * The spare pool is replenished first from either memory pool, then
+ * the reserve pool is used to reduce device entitlement, the excess
+ * pool is used to increase the reserve pool toward the desired entitlement
+ * target, and then the remaining memory is returned to the pools.
+ *
+ */
+static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size)
+{
+ unsigned long flags;
+ size_t spare_needed = 0;
+ size_t excess_freed = 0;
+ size_t reserve_freed = size;
+ size_t tmp;
+ int balance = 0;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ vio_cmo.curr -= size;
+
+ /* Amount of memory freed from the excess pool */
+ if (viodev->cmo.allocated > viodev->cmo.entitled) {
+ excess_freed = min(reserve_freed, (viodev->cmo.allocated -
+ viodev->cmo.entitled));
+ reserve_freed -= excess_freed;
+ }
+
+ /* Remove allocation from device */
+ viodev->cmo.allocated -= (reserve_freed + excess_freed);
+
+ /* Spare is a subset of the reserve pool, replenish it first. */
+ spare_needed = VIO_CMO_MIN_ENT - vio_cmo.spare;
+
+ /*
+ * Replenish the spare in the reserve pool from the excess pool.
+ * This moves entitlement into the reserve pool.
+ */
+ if (spare_needed && excess_freed) {
+ tmp = min(excess_freed, spare_needed);
+ vio_cmo.excess.size -= tmp;
+ vio_cmo.reserve.size += tmp;
+ vio_cmo.spare += tmp;
+ excess_freed -= tmp;
+ spare_needed -= tmp;
+ balance = 1;
+ }
+
+ /*
+ * Replenish the spare in the reserve pool from the reserve pool.
+ * This removes entitlement from the device down to VIO_CMO_MIN_ENT,
+ * if needed, and gives it to the spare pool. The amount of used
+ * memory in this pool does not change.
+ */
+ if (spare_needed && reserve_freed) {
+ tmp = min(spare_needed, min(reserve_freed,
+ (viodev->cmo.entitled -
+ VIO_CMO_MIN_ENT)));
+
+ vio_cmo.spare += tmp;
+ viodev->cmo.entitled -= tmp;
+ reserve_freed -= tmp;
+ spare_needed -= tmp;
+ balance = 1;
+ }
+
+ /*
+ * Increase the reserve pool until the desired allocation is met.
+ * Move an allocation freed from the excess pool into the reserve
+ * pool and schedule a balance operation.
+ */
+ if (excess_freed && (vio_cmo.desired > vio_cmo.reserve.size)) {
+ tmp = min(excess_freed, (vio_cmo.desired - vio_cmo.reserve.size));
+
+ vio_cmo.excess.size -= tmp;
+ vio_cmo.reserve.size += tmp;
+ excess_freed -= tmp;
+ balance = 1;
+ }
+
+ /* Return memory from the excess pool to that pool */
+ if (excess_freed)
+ vio_cmo.excess.free += excess_freed;
+
+ if (balance)
+ schedule_delayed_work(&vio_cmo.balance_q, VIO_CMO_BALANCE_DELAY);
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+/**
+ * vio_cmo_entitlement_update - Manage system entitlement changes
+ *
+ * @new_entitlement: new system entitlement to attempt to accommodate
+ *
+ * Increases in entitlement will be used to fulfill the spare entitlement
+ * and the rest is given to the excess pool. Decreases, if they are
+ * possible, come from the excess pool and from unused device entitlement
+ *
+ * Returns: 0 on success, -ENOMEM when change can not be made
+ */
+int vio_cmo_entitlement_update(size_t new_entitlement)
+{
+ struct vio_dev *viodev;
+ struct vio_cmo_dev_entry *dev_ent;
+ unsigned long flags;
+ size_t avail, delta, tmp;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+
+ /* Entitlement increases */
+ if (new_entitlement > vio_cmo.entitled) {
+ delta = new_entitlement - vio_cmo.entitled;
+
+ /* Fulfill spare allocation */
+ if (vio_cmo.spare < VIO_CMO_MIN_ENT) {
+ tmp = min(delta, (VIO_CMO_MIN_ENT - vio_cmo.spare));
+ vio_cmo.spare += tmp;
+ vio_cmo.reserve.size += tmp;
+ delta -= tmp;
+ }
+
+ /* Remaining new allocation goes to the excess pool */
+ vio_cmo.entitled += delta;
+ vio_cmo.excess.size += delta;
+ vio_cmo.excess.free += delta;
+
+ goto out;
+ }
+
+ /* Entitlement decreases */
+ delta = vio_cmo.entitled - new_entitlement;
+ avail = vio_cmo.excess.free;
+
+ /*
+ * Need to check how much unused entitlement each device can
+ * sacrifice to fulfill entitlement change.
+ */
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+ if (avail >= delta)
+ break;
+
+ viodev = dev_ent->viodev;
+ if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
+ (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
+ avail += viodev->cmo.entitled -
+ max_t(size_t, viodev->cmo.allocated,
+ VIO_CMO_MIN_ENT);
+ }
+
+ if (delta <= avail) {
+ vio_cmo.entitled -= delta;
+
+ /* Take entitlement from the excess pool first */
+ tmp = min(vio_cmo.excess.free, delta);
+ vio_cmo.excess.size -= tmp;
+ vio_cmo.excess.free -= tmp;
+ delta -= tmp;
+
+ /*
+ * Remove all but VIO_CMO_MIN_ENT bytes from devices
+ * until entitlement change is served
+ */
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+ if (!delta)
+ break;
+
+ viodev = dev_ent->viodev;
+ tmp = 0;
+ if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
+ (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
+ tmp = viodev->cmo.entitled -
+ max_t(size_t, viodev->cmo.allocated,
+ VIO_CMO_MIN_ENT);
+ viodev->cmo.entitled -= min(tmp, delta);
+ delta -= min(tmp, delta);
+ }
+ } else {
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+ return -ENOMEM;
+ }
+
+out:
+ schedule_delayed_work(&vio_cmo.balance_q, 0);
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+ return 0;
+}
+
+/**
+ * vio_cmo_balance - Balance entitlement among devices
+ *
+ * @work: work queue structure for this operation
+ *
+ * Any system entitlement above the minimum needed for devices, or
+ * already allocated to devices, can be distributed to the devices.
+ * The list of devices is iterated through to recalculate the desired
+ * entitlement level and to determine how much entitlement above the
+ * minimum entitlement is allocated to devices.
+ *
+ * Small chunks of the available entitlement are given to devices until
+ * their requirements are fulfilled or there is no entitlement left to give.
+ * Upon completion sizes of the reserve and excess pools are calculated.
+ *
+ * The system minimum entitlement level is also recalculated here.
+ * Entitlement will be reserved for devices even after vio_bus_remove to
+ * accommodate reloading the driver. The OF tree is walked to count the
+ * number of devices present and this will remove entitlement for devices
+ * that have actually left the system after having vio_bus_remove called.
+ */
+static void vio_cmo_balance(struct work_struct *work)
+{
+ struct vio_cmo *cmo;
+ struct vio_dev *viodev;
+ struct vio_cmo_dev_entry *dev_ent;
+ unsigned long flags;
+ size_t avail = 0, level, chunk, need;
+ int devcount = 0, fulfilled;
+
+ cmo = container_of(work, struct vio_cmo, balance_q.work);
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+
+ /* Calculate minimum entitlement and fulfill spare */
+ cmo->min = vio_cmo_num_OF_devs() * VIO_CMO_MIN_ENT;
+ BUG_ON(cmo->min > cmo->entitled);
+ cmo->spare = min_t(size_t, VIO_CMO_MIN_ENT, (cmo->entitled - cmo->min));
+ cmo->min += cmo->spare;
+ cmo->desired = cmo->min;
+
+ /*
+ * Determine how much entitlement is available and reset device
+ * entitlements
+ */
+ avail = cmo->entitled - cmo->spare;
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+ viodev = dev_ent->viodev;
+ devcount++;
+ viodev->cmo.entitled = VIO_CMO_MIN_ENT;
+ cmo->desired += (viodev->cmo.desired - VIO_CMO_MIN_ENT);
+ avail -= max_t(size_t, viodev->cmo.allocated, VIO_CMO_MIN_ENT);
+ }
+
+ /*
+ * Having provided each device with the minimum entitlement, loop
+ * over the devices portioning out the remaining entitlement
+ * until there is nothing left.
+ */
+ level = VIO_CMO_MIN_ENT;
+ while (avail) {
+ fulfilled = 0;
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+ viodev = dev_ent->viodev;
+
+ if (viodev->cmo.desired <= level) {
+ fulfilled++;
+ continue;
+ }
+
+ /*
+ * Give the device up to VIO_CMO_BALANCE_CHUNK
+ * bytes of entitlement, but do not exceed the
+ * desired level of entitlement for the device.
+ */
+ chunk = min_t(size_t, avail, VIO_CMO_BALANCE_CHUNK);
+ chunk = min(chunk, (viodev->cmo.desired -
+ viodev->cmo.entitled));
+ viodev->cmo.entitled += chunk;
+
+ /*
+ * If the memory for this entitlement increase was
+ * already allocated to the device it does not come
+ * from the available pool being portioned out.
+ */
+ need = max(viodev->cmo.allocated, viodev->cmo.entitled)-
+ max(viodev->cmo.allocated, level);
+ avail -= need;
+
+ }
+ if (fulfilled == devcount)
+ break;
+ level += VIO_CMO_BALANCE_CHUNK;
+ }
+
+ /* Calculate new reserve and excess pool sizes */
+ cmo->reserve.size = cmo->min;
+ cmo->excess.free = 0;
+ cmo->excess.size = 0;
+ need = 0;
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+ viodev = dev_ent->viodev;
+ /* Calculated reserve size above the minimum entitlement */
+ if (viodev->cmo.entitled)
+ cmo->reserve.size += (viodev->cmo.entitled -
+ VIO_CMO_MIN_ENT);
+ /* Calculated used excess entitlement */
+ if (viodev->cmo.allocated > viodev->cmo.entitled)
+ need += viodev->cmo.allocated - viodev->cmo.entitled;
+ }
+ cmo->excess.size = cmo->entitled - cmo->reserve.size;
+ cmo->excess.free = cmo->excess.size - need;
+
+ cancel_delayed_work(container_of(work, struct delayed_work, work));
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ void *ret;
+
+ if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) {
+ atomic_inc(&viodev->cmo.allocs_failed);
+ return NULL;
+ }
+
+ ret = dma_iommu_ops.alloc_coherent(dev, size, dma_handle, flag);
+ if (unlikely(ret == NULL)) {
+ vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
+ atomic_inc(&viodev->cmo.allocs_failed);
+ }
+
+ return ret;
+}
+
+static void vio_dma_iommu_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+
+ dma_iommu_ops.free_coherent(dev, size, vaddr, dma_handle);
+
+ vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
+}
+
+static dma_addr_t vio_dma_iommu_map_single(struct device *dev, void *vaddr,
+ size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ dma_addr_t ret = DMA_ERROR_CODE;
+
+ if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) {
+ atomic_inc(&viodev->cmo.allocs_failed);
+ return ret;
+ }
+
+ ret = dma_iommu_ops.map_single(dev, vaddr, size, direction, attrs);
+ if (unlikely(dma_mapping_error(ret))) {
+ vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
+ atomic_inc(&viodev->cmo.allocs_failed);
+ }
+
+ return ret;
+}
+
+static void vio_dma_iommu_unmap_single(struct device *dev,
+ dma_addr_t dma_handle, size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+
+ dma_iommu_ops.unmap_single(dev, dma_handle, size, direction, attrs);
+
+ vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
+}
+
+static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ struct scatterlist *sgl;
+ int ret, count = 0;
+ size_t alloc_size = 0;
+
+ for (sgl = sglist; count < nelems; count++, sgl++)
+ alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE);
+
+ if (vio_cmo_alloc(viodev, alloc_size)) {
+ atomic_inc(&viodev->cmo.allocs_failed);
+ return 0;
+ }
+
+ ret = dma_iommu_ops.map_sg(dev, sglist, nelems, direction, attrs);
+
+ if (unlikely(!ret)) {
+ vio_cmo_dealloc(viodev, alloc_size);
+ atomic_inc(&viodev->cmo.allocs_failed);
+ }
+
+ for (sgl = sglist, count = 0; count < ret; count++, sgl++)
+ alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
+ if (alloc_size)
+ vio_cmo_dealloc(viodev, alloc_size);
+
+ return ret;
+}
+
+static void vio_dma_iommu_unmap_sg(struct device *dev,
+ struct scatterlist *sglist, int nelems,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ struct scatterlist *sgl;
+ size_t alloc_size = 0;
+ int count = 0;
+
+ for (sgl = sglist; count < nelems; count++, sgl++)
+ alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
+
+ dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs);
+
+ vio_cmo_dealloc(viodev, alloc_size);
+}
+
+struct dma_mapping_ops vio_dma_mapping_ops = {
+ .alloc_coherent = vio_dma_iommu_alloc_coherent,
+ .free_coherent = vio_dma_iommu_free_coherent,
+ .map_single = vio_dma_iommu_map_single,
+ .unmap_single = vio_dma_iommu_unmap_single,
+ .map_sg = vio_dma_iommu_map_sg,
+ .unmap_sg = vio_dma_iommu_unmap_sg,
+};
+
+/**
+ * vio_cmo_set_dev_desired - Set desired entitlement for a device
+ *
+ * @viodev: struct vio_dev for device to alter
+ * @new_desired: new desired entitlement level in bytes
+ *
+ * For use by devices to request a change to their entitlement at runtime or
+ * through sysfs. The desired entitlement level is changed and a balancing
+ * of system resources is scheduled to run in the future.
+ */
+void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired)
+{
+ unsigned long flags;
+ struct vio_cmo_dev_entry *dev_ent;
+ int found = 0;
+
+ if (!firmware_has_feature(FW_FEATURE_CMO))
+ return;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ if (desired < VIO_CMO_MIN_ENT)
+ desired = VIO_CMO_MIN_ENT;
+
+ /*
+ * Changes will not be made for devices not in the device list.
+ * If it is not in the device list, then no driver is loaded
+ * for the device and it can not receive entitlement.
+ */
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
+ if (viodev == dev_ent->viodev) {
+ found = 1;
+ break;
+ }
+ if (!found)
+ return;
+
+ /* Increase/decrease in desired device entitlement */
+ if (desired >= viodev->cmo.desired) {
+ /* Just bump the bus and device values prior to a balance*/
+ vio_cmo.desired += desired - viodev->cmo.desired;
+ viodev->cmo.desired = desired;
+ } else {
+ /* Decrease bus and device values for desired entitlement */
+ vio_cmo.desired -= viodev->cmo.desired - desired;
+ viodev->cmo.desired = desired;
+ /*
+ * If less entitlement is desired than current entitlement, move
+ * any reserve memory in the change region to the excess pool.
+ */
+ if (viodev->cmo.entitled > desired) {
+ vio_cmo.reserve.size -= viodev->cmo.entitled - desired;
+ vio_cmo.excess.size += viodev->cmo.entitled - desired;
+ /*
+ * If entitlement moving from the reserve pool to the
+ * excess pool is currently unused, add to the excess
+ * free counter.
+ */
+ if (viodev->cmo.allocated < viodev->cmo.entitled)
+ vio_cmo.excess.free += viodev->cmo.entitled -
+ max(viodev->cmo.allocated, desired);
+ viodev->cmo.entitled = desired;
+ }
+ }
+ schedule_delayed_work(&vio_cmo.balance_q, 0);
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+/**
+ * vio_cmo_bus_probe - Handle CMO specific bus probe activities
+ *
+ * @viodev - Pointer to struct vio_dev for device
+ *
+ * Determine the devices IO memory entitlement needs, attempting
+ * to satisfy the system minimum entitlement at first and scheduling
+ * a balance operation to take care of the rest at a later time.
+ *
+ * Returns: 0 on success, -EINVAL when device doesn't support CMO, and
+ * -ENOMEM when entitlement is not available for device or
+ * device entry.
+ *
+ */
+static int vio_cmo_bus_probe(struct vio_dev *viodev)
+{
+ struct vio_cmo_dev_entry *dev_ent;
+ struct device *dev = &viodev->dev;
+ struct vio_driver *viodrv = to_vio_driver(dev->driver);
+ unsigned long flags;
+ size_t size;
+
+ /*
+ * Check to see that device has a DMA window and configure
+ * entitlement for the device.
+ */
+ if (of_get_property(viodev->dev.archdata.of_node,
+ "ibm,my-dma-window", NULL)) {
+ /* Check that the driver is CMO enabled and get desired DMA */
+ if (!viodrv->get_desired_dma) {
+ dev_err(dev, "%s: device driver does not support CMO\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ viodev->cmo.desired = IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev));
+ if (viodev->cmo.desired < VIO_CMO_MIN_ENT)
+ viodev->cmo.desired = VIO_CMO_MIN_ENT;
+ size = VIO_CMO_MIN_ENT;
+
+ dev_ent = kmalloc(sizeof(struct vio_cmo_dev_entry),
+ GFP_KERNEL);
+ if (!dev_ent)
+ return -ENOMEM;
+
+ dev_ent->viodev = viodev;
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ list_add(&dev_ent->list, &vio_cmo.device_list);
+ } else {
+ viodev->cmo.desired = 0;
+ size = 0;
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ }
+
+ /*
+ * If the needs for vio_cmo.min have not changed since they
+ * were last set, the number of devices in the OF tree has
+ * been constant and the IO memory for this is already in
+ * the reserve pool.
+ */
+ if (vio_cmo.min == ((vio_cmo_num_OF_devs() + 1) *
+ VIO_CMO_MIN_ENT)) {
+ /* Updated desired entitlement if device requires it */
+ if (size)
+ vio_cmo.desired += (viodev->cmo.desired -
+ VIO_CMO_MIN_ENT);
+ } else {
+ size_t tmp;
+
+ tmp = vio_cmo.spare + vio_cmo.excess.free;
+ if (tmp < size) {
+ dev_err(dev, "%s: insufficient free "
+ "entitlement to add device. "
+ "Need %lu, have %lu\n", __func__,
+ size, (vio_cmo.spare + tmp));
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+ return -ENOMEM;
+ }
+
+ /* Use excess pool first to fulfill request */
+ tmp = min(size, vio_cmo.excess.free);
+ vio_cmo.excess.free -= tmp;
+ vio_cmo.excess.size -= tmp;
+ vio_cmo.reserve.size += tmp;
+
+ /* Use spare if excess pool was insufficient */
+ vio_cmo.spare -= size - tmp;
+
+ /* Update bus accounting */
+ vio_cmo.min += size;
+ vio_cmo.desired += viodev->cmo.desired;
+ }
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+ return 0;
+}
+
+/**
+ * vio_cmo_bus_remove - Handle CMO specific bus removal activities
+ *
+ * @viodev - Pointer to struct vio_dev for device
+ *
+ * Remove the device from the cmo device list. The minimum entitlement
+ * will be reserved for the device as long as it is in the system. The
+ * rest of the entitlement the device had been allocated will be returned
+ * to the system.
+ */
+static void vio_cmo_bus_remove(struct vio_dev *viodev)
+{
+ struct vio_cmo_dev_entry *dev_ent;
+ unsigned long flags;
+ size_t tmp;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ if (viodev->cmo.allocated) {
+ dev_err(&viodev->dev, "%s: device had %lu bytes of IO "
+ "allocated after remove operation.\n",
+ __func__, viodev->cmo.allocated);
+ BUG();
+ }
+
+ /*
+ * Remove the device from the device list being maintained for
+ * CMO enabled devices.
+ */
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
+ if (viodev == dev_ent->viodev) {
+ list_del(&dev_ent->list);
+ kfree(dev_ent);
+ break;
+ }
+
+ /*
+ * Devices may not require any entitlement and they do not need
+ * to be processed. Otherwise, return the device's entitlement
+ * back to the pools.
+ */
+ if (viodev->cmo.entitled) {
+ /*
+ * This device has not yet left the OF tree, it's
+ * minimum entitlement remains in vio_cmo.min and
+ * vio_cmo.desired
+ */
+ vio_cmo.desired -= (viodev->cmo.desired - VIO_CMO_MIN_ENT);
+
+ /*
+ * Save min allocation for device in reserve as long
+ * as it exists in OF tree as determined by later
+ * balance operation
+ */
+ viodev->cmo.entitled -= VIO_CMO_MIN_ENT;
+
+ /* Replenish spare from freed reserve pool */
+ if (viodev->cmo.entitled && (vio_cmo.spare < VIO_CMO_MIN_ENT)) {
+ tmp = min(viodev->cmo.entitled, (VIO_CMO_MIN_ENT -
+ vio_cmo.spare));
+ vio_cmo.spare += tmp;
+ viodev->cmo.entitled -= tmp;
+ }
+
+ /* Remaining reserve goes to excess pool */
+ vio_cmo.excess.size += viodev->cmo.entitled;
+ vio_cmo.excess.free += viodev->cmo.entitled;
+ vio_cmo.reserve.size -= viodev->cmo.entitled;
+
+ /*
+ * Until the device is removed it will keep a
+ * minimum entitlement; this will guarantee that
+ * a module unload/load will result in a success.
+ */
+ viodev->cmo.entitled = VIO_CMO_MIN_ENT;
+ viodev->cmo.desired = VIO_CMO_MIN_ENT;
+ atomic_set(&viodev->cmo.allocs_failed, 0);
+ }
+
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+static void vio_cmo_set_dma_ops(struct vio_dev *viodev)
+{
+ vio_dma_mapping_ops.dma_supported = dma_iommu_ops.dma_supported;
+ viodev->dev.archdata.dma_ops = &vio_dma_mapping_ops;
+}
+
+/**
+ * vio_cmo_bus_init - CMO entitlement initialization at bus init time
+ *
+ * Set up the reserve and excess entitlement pools based on available
+ * system entitlement and the number of devices in the OF tree that
+ * require entitlement in the reserve pool.
+ */
+static void vio_cmo_bus_init(void)
+{
+ struct hvcall_mpp_data mpp_data;
+ int err;
+
+ memset(&vio_cmo, 0, sizeof(struct vio_cmo));
+ spin_lock_init(&vio_cmo.lock);
+ INIT_LIST_HEAD(&vio_cmo.device_list);
+ INIT_DELAYED_WORK(&vio_cmo.balance_q, vio_cmo_balance);
+
+ /* Get current system entitlement */
+ err = h_get_mpp(&mpp_data);
+
+ /*
+ * On failure, continue with entitlement set to 0, will panic()
+ * later when spare is reserved.
+ */
+ if (err != H_SUCCESS) {
+ printk(KERN_ERR "%s: unable to determine system IO "\
+ "entitlement. (%d)\n", __func__, err);
+ vio_cmo.entitled = 0;
+ } else {
+ vio_cmo.entitled = mpp_data.entitled_mem;
+ }
+
+ /* Set reservation and check against entitlement */
+ vio_cmo.spare = VIO_CMO_MIN_ENT;
+ vio_cmo.reserve.size = vio_cmo.spare;
+ vio_cmo.reserve.size += (vio_cmo_num_OF_devs() *
+ VIO_CMO_MIN_ENT);
+ if (vio_cmo.reserve.size > vio_cmo.entitled) {
+ printk(KERN_ERR "%s: insufficient system entitlement\n",
+ __func__);
+ panic("%s: Insufficient system entitlement", __func__);
+ }
+
+ /* Set the remaining accounting variables */
+ vio_cmo.excess.size = vio_cmo.entitled - vio_cmo.reserve.size;
+ vio_cmo.excess.free = vio_cmo.excess.size;
+ vio_cmo.min = vio_cmo.reserve.size;
+ vio_cmo.desired = vio_cmo.reserve.size;
+}
+
+/* sysfs device functions and data structures for CMO */
+
+#define viodev_cmo_rd_attr(name) \
+static ssize_t viodev_cmo_##name##_show(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+{ \
+ return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name); \
+}
+
+static ssize_t viodev_cmo_allocs_failed_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed));
+}
+
+static ssize_t viodev_cmo_allocs_failed_reset(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ atomic_set(&viodev->cmo.allocs_failed, 0);
+ return count;
+}
+
+static ssize_t viodev_cmo_desired_set(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ size_t new_desired;
+ int ret;
+
+ ret = strict_strtoul(buf, 10, &new_desired);
+ if (ret)
+ return ret;
+
+ vio_cmo_set_dev_desired(viodev, new_desired);
+ return count;
+}
+
+viodev_cmo_rd_attr(desired);
+viodev_cmo_rd_attr(entitled);
+viodev_cmo_rd_attr(allocated);
+
+static ssize_t name_show(struct device *, struct device_attribute *, char *);
+static ssize_t devspec_show(struct device *, struct device_attribute *, char *);
+static struct device_attribute vio_cmo_dev_attrs[] = {
+ __ATTR_RO(name),
+ __ATTR_RO(devspec),
+ __ATTR(cmo_desired, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
+ viodev_cmo_desired_show, viodev_cmo_desired_set),
+ __ATTR(cmo_entitled, S_IRUGO, viodev_cmo_entitled_show, NULL),
+ __ATTR(cmo_allocated, S_IRUGO, viodev_cmo_allocated_show, NULL),
+ __ATTR(cmo_allocs_failed, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
+ viodev_cmo_allocs_failed_show, viodev_cmo_allocs_failed_reset),
+ __ATTR_NULL
+};
+
+/* sysfs bus functions and data structures for CMO */
+
+#define viobus_cmo_rd_attr(name) \
+static ssize_t \
+viobus_cmo_##name##_show(struct bus_type *bt, char *buf) \
+{ \
+ return sprintf(buf, "%lu\n", vio_cmo.name); \
+}
+
+#define viobus_cmo_pool_rd_attr(name, var) \
+static ssize_t \
+viobus_cmo_##name##_pool_show_##var(struct bus_type *bt, char *buf) \
+{ \
+ return sprintf(buf, "%lu\n", vio_cmo.name.var); \
+}
+
+static ssize_t viobus_cmo_high_reset(struct bus_type *bt, const char *buf,
+ size_t count)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ vio_cmo.high = vio_cmo.curr;
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+
+ return count;
+}
+
+viobus_cmo_rd_attr(entitled);
+viobus_cmo_pool_rd_attr(reserve, size);
+viobus_cmo_pool_rd_attr(excess, size);
+viobus_cmo_pool_rd_attr(excess, free);
+viobus_cmo_rd_attr(spare);
+viobus_cmo_rd_attr(min);
+viobus_cmo_rd_attr(desired);
+viobus_cmo_rd_attr(curr);
+viobus_cmo_rd_attr(high);
+
+static struct bus_attribute vio_cmo_bus_attrs[] = {
+ __ATTR(cmo_entitled, S_IRUGO, viobus_cmo_entitled_show, NULL),
+ __ATTR(cmo_reserve_size, S_IRUGO, viobus_cmo_reserve_pool_show_size, NULL),
+ __ATTR(cmo_excess_size, S_IRUGO, viobus_cmo_excess_pool_show_size, NULL),
+ __ATTR(cmo_excess_free, S_IRUGO, viobus_cmo_excess_pool_show_free, NULL),
+ __ATTR(cmo_spare, S_IRUGO, viobus_cmo_spare_show, NULL),
+ __ATTR(cmo_min, S_IRUGO, viobus_cmo_min_show, NULL),
+ __ATTR(cmo_desired, S_IRUGO, viobus_cmo_desired_show, NULL),
+ __ATTR(cmo_curr, S_IRUGO, viobus_cmo_curr_show, NULL),
+ __ATTR(cmo_high, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
+ viobus_cmo_high_show, viobus_cmo_high_reset),
+ __ATTR_NULL
+};
+
+static void vio_cmo_sysfs_init(void)
+{
+ vio_bus_type.dev_attrs = vio_cmo_dev_attrs;
+ vio_bus_type.bus_attrs = vio_cmo_bus_attrs;
+}
+#else /* CONFIG_PPC_SMLPAR */
+/* Dummy functions for iSeries platform */
+int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; }
+void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {}
+static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; }
+static void vio_cmo_bus_remove(struct vio_dev *viodev) {}
+static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {}
+static void vio_cmo_bus_init() {}
+static void vio_cmo_sysfs_init() { }
+#endif /* CONFIG_PPC_SMLPAR */
+EXPORT_SYMBOL(vio_cmo_entitlement_update);
+EXPORT_SYMBOL(vio_cmo_set_dev_desired);
+
static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
{
const unsigned char *dma_window;
return error;
id = vio_match_device(viodrv->id_table, viodev);
- if (id)
+ if (id) {
+ memset(&viodev->cmo, 0, sizeof(viodev->cmo));
+ if (firmware_has_feature(FW_FEATURE_CMO)) {
+ error = vio_cmo_bus_probe(viodev);
+ if (error)
+ return error;
+ }
error = viodrv->probe(viodev, id);
+ if (error)
+ vio_cmo_bus_remove(viodev);
+ }
return error;
}
{
struct vio_dev *viodev = to_vio_dev(dev);
struct vio_driver *viodrv = to_vio_driver(dev->driver);
+ struct device *devptr;
+ int ret = 1;
+
+ /*
+ * Hold a reference to the device after the remove function is called
+ * to allow for CMO accounting cleanup for the device.
+ */
+ devptr = get_device(dev);
if (viodrv->remove)
- return viodrv->remove(viodev);
+ ret = viodrv->remove(viodev);
+
+ if (!ret && firmware_has_feature(FW_FEATURE_CMO))
+ vio_cmo_bus_remove(viodev);
- /* driver can't remove */
- return 1;
+ put_device(devptr);
+ return ret;
}
/**
viodev->unit_address = *unit_address;
}
viodev->dev.archdata.of_node = of_node_get(of_node);
- viodev->dev.archdata.dma_ops = &dma_iommu_ops;
+
+ if (firmware_has_feature(FW_FEATURE_CMO))
+ vio_cmo_set_dma_ops(viodev);
+ else
+ viodev->dev.archdata.dma_ops = &dma_iommu_ops;
viodev->dev.archdata.dma_data = vio_build_iommu_table(viodev);
viodev->dev.archdata.numa_node = of_node_to_nid(of_node);
int err;
struct device_node *node_vroot;
+ if (firmware_has_feature(FW_FEATURE_CMO))
+ vio_cmo_sysfs_init();
+
err = bus_register(&vio_bus_type);
if (err) {
printk(KERN_ERR "failed to register VIO bus\n");
return err;
}
+ if (firmware_has_feature(FW_FEATURE_CMO))
+ vio_cmo_bus_init();
+
node_vroot = of_find_node_by_name(NULL, "vdevice");
if (node_vroot) {
struct device_node *of_node;
/* The dummy segment contents for the bug workaround mentioned above
near PHDRS. */
- .dummy : {
+ .dummy : AT(ADDR(.dummy) - LOAD_OFFSET) {
LONG(0xf177)
} :kernel :dummy
return 0;
}
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
-static void do_dabr(struct pt_regs *regs, unsigned long address,
- unsigned long error_code)
-{
- siginfo_t info;
-
- if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
- 11, SIGSEGV) == NOTIFY_STOP)
- return;
-
- if (debugger_dabr_match(regs))
- return;
-
- /* Clear the DABR */
- set_dabr(0);
-
- /* Deliver the signal to userspace */
- info.si_signo = SIGTRAP;
- info.si_errno = 0;
- info.si_code = TRAP_HWBKPT;
- info.si_addr = (void __user *)address;
- force_sig_info(SIGTRAP, &info, current);
-}
-#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
-
/*
* For 600- and 800-family processors, the error_code parameter is DSISR
* for a data fault, SRR1 for an instruction fault. For 400-family processors
config PPC_MPC52xx
bool "52xx-based boards"
depends on PPC_MULTIPLATFORM && PPC32
- select FSL_SOC
select PPC_CLOCK
select PPC_PCI_CHOICE
config PPC_MPC5200_GPIO
bool "MPC5200 GPIO support"
depends on PPC_MPC52xx
- select HAVE_GPIO_LIB
+ select ARCH_REQUIRE_GPIOLIB
+ select GENERIC_GPIO
help
Enable gpiolib support for mpc5200 based boards
}
}
-static void tce_build_cell(struct iommu_table *tbl, long index, long npages,
+static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
unsigned long uaddr, enum dma_data_direction direction,
struct dma_attrs *attrs)
{
pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n",
index, npages, direction, base_pte);
+ return 0;
}
static void tce_free_cell(struct iommu_table *tbl, long index, long npages)
static int __init setup_iommu_fixed(char *str)
{
+ struct device_node *pciep;
+
if (strcmp(str, "off") == 0)
iommu_fixed_disabled = 1;
- else if (strcmp(str, "weak") == 0)
+ /* If we can find a pcie-endpoint in the device tree assume that
+ * we're on a triblade or a CAB so by default the fixed mapping
+ * should be set to be weakly ordered; but only if the boot
+ * option WASN'T set for strong ordering
+ */
+ pciep = of_find_node_by_type(NULL, "pcie-endpoint");
+
+ if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0))
iommu_fixed_is_weak = 1;
+ of_node_put(pciep);
+
return 1;
}
__setup("iommu_fixed=", setup_iommu_fixed);
*/
node = cpu_to_node(raw_smp_processor_id());
for (n = 0; n < MAX_NUMNODES; n++, node++) {
+ int available_spus;
+
node = (node < MAX_NUMNODES) ? node : 0;
if (!node_allowed(ctx, node))
continue;
+
+ available_spus = 0;
mutex_lock(&cbe_spu_info[node].list_mutex);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+ if (spu->ctx && spu->ctx->gang
+ && spu->ctx->aff_offset == 0)
+ available_spus -=
+ (spu->ctx->gang->contexts - 1);
+ else
+ available_spus++;
+ }
+ if (available_spus < ctx->gang->contexts) {
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ continue;
+ }
+
list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
if ((!mem_aff || spu->has_mem_affinity) &&
sched_spu(spu)) {
if (list_empty(&ctx->aff_list))
return 0;
+ if (atomic_read(&ctx->gang->aff_sched_count) == 0)
+ ctx->gang->aff_ref_spu = NULL;
+
if (!gang->aff_ref_spu) {
if (!(gang->aff_flags & AFF_MERGED))
aff_merge_remaining_ctxs(gang);
if (spu->ctx->flags & SPU_CREATE_NOSCHED)
atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
- if (ctx->gang){
- mutex_lock(&ctx->gang->aff_mutex);
- if (has_affinity(ctx)) {
- if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
- ctx->gang->aff_ref_spu = NULL;
- }
- mutex_unlock(&ctx->gang->aff_mutex);
- }
+ if (ctx->gang)
+ atomic_dec_if_positive(&ctx->gang->aff_sched_count);
spu_switch_notify(spu, NULL);
spu_unmap_mappings(ctx);
goto found;
mutex_unlock(&cbe_spu_info[node].list_mutex);
- mutex_lock(&ctx->gang->aff_mutex);
- if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
- ctx->gang->aff_ref_spu = NULL;
- mutex_unlock(&ctx->gang->aff_mutex);
+ atomic_dec(&ctx->gang->aff_sched_count);
goto not_found;
}
mutex_unlock(&ctx->gang->aff_mutex);
struct proc_dir_entry *entry;
int i, error = -ENOMEM;
- sputrace_log = kcalloc(sizeof(struct sputrace),
- bufsize, GFP_KERNEL);
+ sputrace_log = kcalloc(bufsize, sizeof(struct sputrace), GFP_KERNEL);
if (!sputrace_log)
goto out;
#include <asm/iseries/hv_call_event.h>
#include <asm/iseries/iommu.h>
-static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
+static int tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
unsigned long uaddr, enum dma_data_direction direction,
struct dma_attrs *attrs)
{
index++;
uaddr += TCE_PAGE_SIZE;
}
+ return 0;
}
static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
static struct iommu_table iommu_table_iobmap;
static int iommu_table_iobmap_inited;
-static void iobmap_build(struct iommu_table *tbl, long index,
+static int iobmap_build(struct iommu_table *tbl, long index,
long npages, unsigned long uaddr,
enum dma_data_direction direction,
struct dma_attrs *attrs)
uaddr += IOBMAP_PAGE_SIZE;
bus_addr += IOBMAP_PAGE_SIZE;
}
+ return 0;
}
depends on PPC_PSERIES && PPC_EARLY_DEBUG
bool "Enable extra debug logging in platforms/pseries"
default y
+
+config PPC_SMLPAR
+ bool "Support for shared-memory logical partitions"
+ depends on PPC_PSERIES
+ select LPARCFG
+ default n
+ help
+ Select this option to enable shared memory partition support.
+ With this option a system running in an LPAR can be given more
+ memory than physically available and will allow firmware to
+ balance memory across many LPARs.
+
+config CMM
+ tristate "Collaborative memory management"
+ depends on PPC_SMLPAR
+ default y
+ help
+ Select this option, if you want to enable the kernel interface
+ to reduce the memory size of the system. This is accomplished
+ by allocating pages of memory and put them "on hold". This only
+ makes sense for a system running in an LPAR where the unused pages
+ will be reused for other LPARs. The interface allows firmware to
+ balance memory across many LPARs.
obj-$(CONFIG_HVCS) += hvcserver.o
obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o
obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o
+obj-$(CONFIG_CMM) += cmm.o
--- /dev/null
+/*
+ * Collaborative memory management interface.
+ *
+ * Copyright (C) 2008 IBM Corporation
+ * Author(s): Brian King (brking@linux.vnet.ibm.com),
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/oom.h>
+#include <linux/sched.h>
+#include <linux/stringify.h>
+#include <linux/swap.h>
+#include <linux/sysdev.h>
+#include <asm/firmware.h>
+#include <asm/hvcall.h>
+#include <asm/mmu.h>
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+
+#include "plpar_wrappers.h"
+
+#define CMM_DRIVER_VERSION "1.0.0"
+#define CMM_DEFAULT_DELAY 1
+#define CMM_DEBUG 0
+#define CMM_DISABLE 0
+#define CMM_OOM_KB 1024
+#define CMM_MIN_MEM_MB 256
+#define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10))
+#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
+
+static unsigned int delay = CMM_DEFAULT_DELAY;
+static unsigned int oom_kb = CMM_OOM_KB;
+static unsigned int cmm_debug = CMM_DEBUG;
+static unsigned int cmm_disabled = CMM_DISABLE;
+static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
+static struct sys_device cmm_sysdev;
+
+MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(CMM_DRIVER_VERSION);
+
+module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
+ "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
+module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
+ "[Default=" __stringify(CMM_OOM_KB) "]");
+module_param_named(min_mem_mb, min_mem_mb, ulong, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
+ "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
+module_param_named(debug, cmm_debug, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
+ "[Default=" __stringify(CMM_DEBUG) "]");
+
+#define CMM_NR_PAGES ((PAGE_SIZE - sizeof(void *) - sizeof(unsigned long)) / sizeof(unsigned long))
+
+#define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
+
+struct cmm_page_array {
+ struct cmm_page_array *next;
+ unsigned long index;
+ unsigned long page[CMM_NR_PAGES];
+};
+
+static unsigned long loaned_pages;
+static unsigned long loaned_pages_target;
+static unsigned long oom_freed_pages;
+
+static struct cmm_page_array *cmm_page_list;
+static DEFINE_SPINLOCK(cmm_lock);
+
+static struct task_struct *cmm_thread_ptr;
+
+/**
+ * cmm_alloc_pages - Allocate pages and mark them as loaned
+ * @nr: number of pages to allocate
+ *
+ * Return value:
+ * number of pages requested to be allocated which were not
+ **/
+static long cmm_alloc_pages(long nr)
+{
+ struct cmm_page_array *pa, *npa;
+ unsigned long addr;
+ long rc;
+
+ cmm_dbg("Begin request for %ld pages\n", nr);
+
+ while (nr) {
+ addr = __get_free_page(GFP_NOIO | __GFP_NOWARN |
+ __GFP_NORETRY | __GFP_NOMEMALLOC);
+ if (!addr)
+ break;
+ spin_lock(&cmm_lock);
+ pa = cmm_page_list;
+ if (!pa || pa->index >= CMM_NR_PAGES) {
+ /* Need a new page for the page list. */
+ spin_unlock(&cmm_lock);
+ npa = (struct cmm_page_array *)__get_free_page(GFP_NOIO | __GFP_NOWARN |
+ __GFP_NORETRY | __GFP_NOMEMALLOC);
+ if (!npa) {
+ pr_info("%s: Can not allocate new page list\n", __FUNCTION__);
+ free_page(addr);
+ break;
+ }
+ spin_lock(&cmm_lock);
+ pa = cmm_page_list;
+
+ if (!pa || pa->index >= CMM_NR_PAGES) {
+ npa->next = pa;
+ npa->index = 0;
+ pa = npa;
+ cmm_page_list = pa;
+ } else
+ free_page((unsigned long) npa);
+ }
+
+ if ((rc = plpar_page_set_loaned(__pa(addr)))) {
+ pr_err("%s: Can not set page to loaned. rc=%ld\n", __FUNCTION__, rc);
+ spin_unlock(&cmm_lock);
+ free_page(addr);
+ break;
+ }
+
+ pa->page[pa->index++] = addr;
+ loaned_pages++;
+ totalram_pages--;
+ spin_unlock(&cmm_lock);
+ nr--;
+ }
+
+ cmm_dbg("End request with %ld pages unfulfilled\n", nr);
+ return nr;
+}
+
+/**
+ * cmm_free_pages - Free pages and mark them as active
+ * @nr: number of pages to free
+ *
+ * Return value:
+ * number of pages requested to be freed which were not
+ **/
+static long cmm_free_pages(long nr)
+{
+ struct cmm_page_array *pa;
+ unsigned long addr;
+
+ cmm_dbg("Begin free of %ld pages.\n", nr);
+ spin_lock(&cmm_lock);
+ pa = cmm_page_list;
+ while (nr) {
+ if (!pa || pa->index <= 0)
+ break;
+ addr = pa->page[--pa->index];
+
+ if (pa->index == 0) {
+ pa = pa->next;
+ free_page((unsigned long) cmm_page_list);
+ cmm_page_list = pa;
+ }
+
+ plpar_page_set_active(__pa(addr));
+ free_page(addr);
+ loaned_pages--;
+ nr--;
+ totalram_pages++;
+ }
+ spin_unlock(&cmm_lock);
+ cmm_dbg("End request with %ld pages unfulfilled\n", nr);
+ return nr;
+}
+
+/**
+ * cmm_oom_notify - OOM notifier
+ * @self: notifier block struct
+ * @dummy: not used
+ * @parm: returned - number of pages freed
+ *
+ * Return value:
+ * NOTIFY_OK
+ **/
+static int cmm_oom_notify(struct notifier_block *self,
+ unsigned long dummy, void *parm)
+{
+ unsigned long *freed = parm;
+ long nr = KB2PAGES(oom_kb);
+
+ cmm_dbg("OOM processing started\n");
+ nr = cmm_free_pages(nr);
+ loaned_pages_target = loaned_pages;
+ *freed += KB2PAGES(oom_kb) - nr;
+ oom_freed_pages += KB2PAGES(oom_kb) - nr;
+ cmm_dbg("OOM processing complete\n");
+ return NOTIFY_OK;
+}
+
+/**
+ * cmm_get_mpp - Read memory performance parameters
+ *
+ * Makes hcall to query the current page loan request from the hypervisor.
+ *
+ * Return value:
+ * nothing
+ **/
+static void cmm_get_mpp(void)
+{
+ int rc;
+ struct hvcall_mpp_data mpp_data;
+ unsigned long active_pages_target;
+ signed long page_loan_request;
+
+ rc = h_get_mpp(&mpp_data);
+
+ if (rc != H_SUCCESS)
+ return;
+
+ page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE);
+ loaned_pages_target = page_loan_request + loaned_pages;
+ if (loaned_pages_target > oom_freed_pages)
+ loaned_pages_target -= oom_freed_pages;
+ else
+ loaned_pages_target = 0;
+
+ active_pages_target = totalram_pages + loaned_pages - loaned_pages_target;
+
+ if ((min_mem_mb * 1024 * 1024) > (active_pages_target * PAGE_SIZE))
+ loaned_pages_target = totalram_pages + loaned_pages -
+ ((min_mem_mb * 1024 * 1024) / PAGE_SIZE);
+
+ cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
+ page_loan_request, loaned_pages, loaned_pages_target,
+ oom_freed_pages, totalram_pages);
+}
+
+static struct notifier_block cmm_oom_nb = {
+ .notifier_call = cmm_oom_notify
+};
+
+/**
+ * cmm_thread - CMM task thread
+ * @dummy: not used
+ *
+ * Return value:
+ * 0
+ **/
+static int cmm_thread(void *dummy)
+{
+ unsigned long timeleft;
+
+ while (1) {
+ timeleft = msleep_interruptible(delay * 1000);
+
+ if (kthread_should_stop() || timeleft) {
+ loaned_pages_target = loaned_pages;
+ break;
+ }
+
+ cmm_get_mpp();
+
+ if (loaned_pages_target > loaned_pages) {
+ if (cmm_alloc_pages(loaned_pages_target - loaned_pages))
+ loaned_pages_target = loaned_pages;
+ } else if (loaned_pages_target < loaned_pages)
+ cmm_free_pages(loaned_pages - loaned_pages_target);
+ }
+ return 0;
+}
+
+#define CMM_SHOW(name, format, args...) \
+ static ssize_t show_##name(struct sys_device *dev, char *buf) \
+ { \
+ return sprintf(buf, format, ##args); \
+ } \
+ static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
+
+CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages));
+CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
+
+static ssize_t show_oom_pages(struct sys_device *dev, char *buf)
+{
+ return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
+}
+
+static ssize_t store_oom_pages(struct sys_device *dev,
+ const char *buf, size_t count)
+{
+ unsigned long val = simple_strtoul (buf, NULL, 10);
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (val != 0)
+ return -EBADMSG;
+
+ oom_freed_pages = 0;
+ return count;
+}
+
+static SYSDEV_ATTR(oom_freed_kb, S_IWUSR| S_IRUGO,
+ show_oom_pages, store_oom_pages);
+
+static struct sysdev_attribute *cmm_attrs[] = {
+ &attr_loaned_kb,
+ &attr_loaned_target_kb,
+ &attr_oom_freed_kb,
+};
+
+static struct sysdev_class cmm_sysdev_class = {
+ .name = "cmm",
+};
+
+/**
+ * cmm_sysfs_register - Register with sysfs
+ *
+ * Return value:
+ * 0 on success / other on failure
+ **/
+static int cmm_sysfs_register(struct sys_device *sysdev)
+{
+ int i, rc;
+
+ if ((rc = sysdev_class_register(&cmm_sysdev_class)))
+ return rc;
+
+ sysdev->id = 0;
+ sysdev->cls = &cmm_sysdev_class;
+
+ if ((rc = sysdev_register(sysdev)))
+ goto class_unregister;
+
+ for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
+ if ((rc = sysdev_create_file(sysdev, cmm_attrs[i])))
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ while (--i >= 0)
+ sysdev_remove_file(sysdev, cmm_attrs[i]);
+ sysdev_unregister(sysdev);
+class_unregister:
+ sysdev_class_unregister(&cmm_sysdev_class);
+ return rc;
+}
+
+/**
+ * cmm_unregister_sysfs - Unregister from sysfs
+ *
+ **/
+static void cmm_unregister_sysfs(struct sys_device *sysdev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
+ sysdev_remove_file(sysdev, cmm_attrs[i]);
+ sysdev_unregister(sysdev);
+ sysdev_class_unregister(&cmm_sysdev_class);
+}
+
+/**
+ * cmm_init - Module initialization
+ *
+ * Return value:
+ * 0 on success / other on failure
+ **/
+static int cmm_init(void)
+{
+ int rc = -ENOMEM;
+
+ if (!firmware_has_feature(FW_FEATURE_CMO))
+ return -EOPNOTSUPP;
+
+ if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0)
+ return rc;
+
+ if ((rc = cmm_sysfs_register(&cmm_sysdev)))
+ goto out_oom_notifier;
+
+ if (cmm_disabled)
+ return rc;
+
+ cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
+ if (IS_ERR(cmm_thread_ptr)) {
+ rc = PTR_ERR(cmm_thread_ptr);
+ goto out_unregister_sysfs;
+ }
+
+ return rc;
+
+out_unregister_sysfs:
+ cmm_unregister_sysfs(&cmm_sysdev);
+out_oom_notifier:
+ unregister_oom_notifier(&cmm_oom_nb);
+ return rc;
+}
+
+/**
+ * cmm_exit - Module exit
+ *
+ * Return value:
+ * nothing
+ **/
+static void cmm_exit(void)
+{
+ if (cmm_thread_ptr)
+ kthread_stop(cmm_thread_ptr);
+ unregister_oom_notifier(&cmm_oom_nb);
+ cmm_free_pages(loaned_pages);
+ cmm_unregister_sysfs(&cmm_sysdev);
+}
+
+/**
+ * cmm_set_disable - Disable/Enable CMM
+ *
+ * Return value:
+ * 0 on success / other on failure
+ **/
+static int cmm_set_disable(const char *val, struct kernel_param *kp)
+{
+ int disable = simple_strtoul(val, NULL, 10);
+
+ if (disable != 0 && disable != 1)
+ return -EINVAL;
+
+ if (disable && !cmm_disabled) {
+ if (cmm_thread_ptr)
+ kthread_stop(cmm_thread_ptr);
+ cmm_thread_ptr = NULL;
+ cmm_free_pages(loaned_pages);
+ } else if (!disable && cmm_disabled) {
+ cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
+ if (IS_ERR(cmm_thread_ptr))
+ return PTR_ERR(cmm_thread_ptr);
+ }
+
+ cmm_disabled = disable;
+ return 0;
+}
+
+module_param_call(disable, cmm_set_disable, param_get_uint,
+ &cmm_disabled, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
+ "[Default=" __stringify(CMM_DISABLE) "]");
+
+module_init(cmm_init);
+module_exit(cmm_exit);
#include "plpar_wrappers.h"
-static void tce_build_pSeries(struct iommu_table *tbl, long index,
+static int tce_build_pSeries(struct iommu_table *tbl, long index,
long npages, unsigned long uaddr,
enum dma_data_direction direction,
struct dma_attrs *attrs)
uaddr += TCE_PAGE_SIZE;
tcep++;
}
+ return 0;
}
return *tcep;
}
-static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
+static void tce_free_pSeriesLP(struct iommu_table*, long, long);
+static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
+
+static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
long npages, unsigned long uaddr,
enum dma_data_direction direction,
struct dma_attrs *attrs)
{
- u64 rc;
+ u64 rc = 0;
u64 proto_tce, tce;
u64 rpn;
+ int ret = 0;
+ long tcenum_start = tcenum, npages_start = npages;
rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
proto_tce = TCE_PCI_READ;
tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
+ if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
+ ret = (int)rc;
+ tce_free_pSeriesLP(tbl, tcenum_start,
+ (npages_start - (npages + 1)));
+ break;
+ }
+
if (rc && printk_ratelimit()) {
printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
tcenum++;
rpn++;
}
+ return ret;
}
static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
-static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
long npages, unsigned long uaddr,
enum dma_data_direction direction,
struct dma_attrs *attrs)
{
- u64 rc;
+ u64 rc = 0;
u64 proto_tce;
u64 *tcep;
u64 rpn;
long l, limit;
+ long tcenum_start = tcenum, npages_start = npages;
+ int ret = 0;
if (npages == 1) {
- tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
- direction, attrs);
- return;
+ return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
+ direction, attrs);
}
tcep = __get_cpu_var(tce_page);
tcep = (u64 *)__get_free_page(GFP_ATOMIC);
/* If allocation fails, fall back to the loop implementation */
if (!tcep) {
- tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
+ return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
direction, attrs);
- return;
}
__get_cpu_var(tce_page) = tcep;
}
tcenum += limit;
} while (npages > 0 && !rc);
+ if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
+ ret = (int)rc;
+ tce_freemulti_pSeriesLP(tbl, tcenum_start,
+ (npages_start - (npages + limit)));
+ return ret;
+ }
+
if (rc && printk_ratelimit()) {
printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc);
printk("\tindex = 0x%lx\n", (u64)tbl->it_index);
printk("\ttce[0] val = 0x%lx\n", tcep[0]);
show_stack(current, (unsigned long *)__get_SP());
}
+ return ret;
}
static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
return vpa_call(0x3, cpu, vpa);
}
+static inline long plpar_page_set_loaned(unsigned long vpa)
+{
+ return plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa, 0);
+}
+
+static inline long plpar_page_set_active(unsigned long vpa)
+{
+ return plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa, 0);
+}
+
extern void vpa_init(int cpu);
static inline long plpar_pte_enter(unsigned long flags,
H_DABRX_KERNEL | H_DABRX_USER);
}
+#define CMO_CHARACTERISTICS_TOKEN 44
+#define CMO_MAXLENGTH 1026
+
+/**
+ * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
+ * handle that here. (Stolen from parse_system_parameter_string)
+ */
+void pSeries_cmo_feature_init(void)
+{
+ char *ptr, *key, *value, *end;
+ int call_status;
+ int PrPSP = -1;
+ int SecPSP = -1;
+
+ pr_debug(" -> fw_cmo_feature_init()\n");
+ spin_lock(&rtas_data_buf_lock);
+ memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
+ call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+ NULL,
+ CMO_CHARACTERISTICS_TOKEN,
+ __pa(rtas_data_buf),
+ RTAS_DATA_BUF_SIZE);
+
+ if (call_status != 0) {
+ spin_unlock(&rtas_data_buf_lock);
+ pr_debug("CMO not available\n");
+ pr_debug(" <- fw_cmo_feature_init()\n");
+ return;
+ }
+
+ end = rtas_data_buf + CMO_MAXLENGTH - 2;
+ ptr = rtas_data_buf + 2; /* step over strlen value */
+ key = value = ptr;
+
+ while (*ptr && (ptr <= end)) {
+ /* Separate the key and value by replacing '=' with '\0' and
+ * point the value at the string after the '='
+ */
+ if (ptr[0] == '=') {
+ ptr[0] = '\0';
+ value = ptr + 1;
+ } else if (ptr[0] == '\0' || ptr[0] == ',') {
+ /* Terminate the string containing the key/value pair */
+ ptr[0] = '\0';
+
+ if (key == value) {
+ pr_debug("Malformed key/value pair\n");
+ /* Never found a '=', end processing */
+ break;
+ }
+
+ if (0 == strcmp(key, "PrPSP"))
+ PrPSP = simple_strtol(value, NULL, 10);
+ else if (0 == strcmp(key, "SecPSP"))
+ SecPSP = simple_strtol(value, NULL, 10);
+ value = key = ptr + 1;
+ }
+ ptr++;
+ }
+
+ if (PrPSP != -1 || SecPSP != -1) {
+ pr_info("CMO enabled\n");
+ pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", PrPSP, SecPSP);
+ powerpc_firmware_features |= FW_FEATURE_CMO;
+ } else
+ pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", PrPSP, SecPSP);
+ spin_unlock(&rtas_data_buf_lock);
+ pr_debug(" <- fw_cmo_feature_init()\n");
+}
+
/*
* Early initialization. Relocation is on but do not reference unbolted pages
*/
else if (firmware_has_feature(FW_FEATURE_XDABR))
ppc_md.set_dabr = pseries_set_xdabr;
+ pSeries_cmo_feature_init();
iommu_init_early_pSeries();
pr_debug(" <- pSeries_init_early()\n");
}
}
-static void dart_build(struct iommu_table *tbl, long index,
+static int dart_build(struct iommu_table *tbl, long index,
long npages, unsigned long uaddr,
enum dma_data_direction direction,
struct dma_attrs *attrs)
} else {
dart_dirty = 1;
}
+ return 0;
}
bool "QE GPIO support"
depends on QUICC_ENGINE
select GENERIC_GPIO
- select HAVE_GPIO_LIB
+ select ARCH_REQUIRE_GPIOLIB
help
Say Y here if you're going to use hardware that connects to the
QE GPIOs.
depends on 64BIT && EXPERIMENTAL
select VIRTIO
select VIRTIO_RING
+ select VIRTIO_CONSOLE
help
Select this option if you want to run the kernel under s390 linux
endmenu
__ctl_store(kcb->kprobe_saved_ctl, 9, 11);
}
-/* Called with kretprobe_lock held */
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
INIT_HLIST_HEAD(&empty_rp);
- spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
+ kretprobe_hash_lock(current, &head, &flags);
/*
* It is possible to have multiple instances associated with a given
regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
reset_current_kprobe();
- spin_unlock_irqrestore(&kretprobe_lock, flags);
+ kretprobe_hash_unlock(current, &flags);
preempt_enable_no_resched();
hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
#include <asm/sections.h>
#include <asm/ebcdic.h>
#include <asm/compat.h>
+#include <asm/kvm_virtio.h>
long psw_kernel_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY |
PSW_MASK_MCHECK | PSW_DEFAULT_KEY);
printk("We are running under VM (64 bit mode)\n");
else if (MACHINE_IS_KVM) {
printk("We are running under KVM (64 bit mode)\n");
- add_preferred_console("ttyS", 1, NULL);
+ add_preferred_console("hvc", 0, NULL);
+ s390_virtio_console_init();
} else
printk("We are running native (64 bit mode)\n");
#endif /* CONFIG_64BIT */
#include <linux/device.h>
#include <linux/bootmem.h>
#include <linux/sched.h>
-#include <linux/kthread.h>
#include <linux/workqueue.h>
#include <linux/cpu.h>
#include <linux/smp.h>
}
}
-static int topology_kthread(void *data)
-{
- arch_reinit_sched_domains();
- return 0;
-}
-
static void topology_work_fn(struct work_struct *work)
{
- /* We can't call arch_reinit_sched_domains() from a multi-threaded
- * workqueue context since it may deadlock in case of cpu hotplug.
- * So we have to create a kernel thread in order to call
- * arch_reinit_sched_domains().
- */
- kthread_run(topology_kthread, NULL, "topology_update");
+ arch_reinit_sched_domains();
}
void topology_schedule_update(void)
static int fill_inbuf(void);
static void flush_window(void);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
extern char input_data[];
extern int input_len;
static uch *output_data;
static unsigned long output_ptr = 0;
-static void *malloc(int size);
-static void free(void *where);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
int puts(const char *);
#include "../../../../lib/inflate.c"
-static void *malloc(int size)
-{
- void *p;
-
- if (size <0) error("Malloc error");
- if (free_mem_ptr == 0) error("Memory error");
-
- free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
- p = (void *)free_mem_ptr;
- free_mem_ptr += size;
-
- if (free_mem_ptr >= free_mem_end_ptr)
- error("Out of memory");
-
- return p;
-}
-
-static void free(void *where)
-{ /* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
- *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
- free_mem_ptr = (long) *ptr;
-}
-
#ifdef CONFIG_SH_STANDARD_BIOS
size_t strlen(const char *s)
{
static int fill_inbuf(void);
static void flush_window(void);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
extern char input_data[];
extern int input_len;
static uch *output_data;
static unsigned long output_ptr = 0;
-static void *malloc(int size);
-static void free(void *where);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
static void puts(const char *);
#include "../../../../lib/inflate.c"
-static void *malloc(int size)
-{
- void *p;
-
- if (size < 0)
- error("Malloc error\n");
- if (free_mem_ptr == 0)
- error("Memory error\n");
-
- free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
- p = (void *) free_mem_ptr;
- free_mem_ptr += size;
-
- if (free_mem_ptr >= free_mem_end_ptr)
- error("\nOut of memory\n");
-
- return p;
-}
-
-static void free(void *where)
-{ /* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
- *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
- free_mem_ptr = (long) *ptr;
-}
-
void puts(const char *s)
{
}
Read the instructions in <file:Documentation/Changes> pertaining to
pseudo terminals. It's safe to say N.
-config UNIX98_PTY_COUNT
- int "Maximum number of Unix98 PTYs in use (0-2048)"
- depends on UNIX98_PTYS
- default "256"
- help
- The maximum number of Unix98 PTYs that can be used at any one time.
- The default is 256, and should be enough for desktop systems. Server
- machines which support incoming telnet/rlogin/ssh connections and/or
- serve several X terminals may want to increase this: every incoming
- connection and every xterm uses up one PTY.
-
- When not in use, each additional set of 256 PTYs occupy
- approximately 8 KB of kernel memory on 32-bit architectures.
-
endmenu
source "fs/Kconfig"
return 0;
}
-/* Called with kretprobe_lock held. The value stored in the return
- * address register is actually 2 instructions before where the
- * callee will return to. Sequences usually look something like this
+/* The value stored in the return address register is actually 2
+ * instructions before where the callee will return to.
+ * Sequences usually look something like this
*
* call some_function <--- return register points here
* nop <--- call delay slot
unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
INIT_HLIST_HEAD(&empty_rp);
- spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
+ kretprobe_hash_lock(current, &head, &flags);
/*
* It is possible to have multiple instances associated with a given
regs->tnpc = orig_ret_address + 4;
reset_current_kprobe();
- spin_unlock_irqrestore(&kretprobe_lock, flags);
+ kretprobe_hash_unlock(current, &flags);
preempt_enable_no_resched();
hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
select HAVE_OPROFILE
select HAVE_IOREMAP_PROT
select HAVE_KPROBES
+ select ARCH_WANT_OPTIONAL_GPIOLIB if !X86_RDC321X
select HAVE_KRETPROBES
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE
select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
select HAVE_ARCH_KGDB if !X86_VOYAGER
+ select HAVE_EFFICIENT_UNALIGNED_ACCESS
config ARCH_DEFCONFIG
string
static int fill_inbuf(void);
static void flush_window(void);
static void error(char *m);
-static void gzip_mark(void **);
-static void gzip_release(void **);
/*
* This is set up by the setup-routine at boot-time
static long bytes_out;
-static void *malloc(int size);
-static void free(void *where);
-
static void *memset(void *s, int c, unsigned n);
static void *memcpy(void *dest, const void *src, unsigned n);
#include "../../../../lib/inflate.c"
-static void *malloc(int size)
-{
- void *p;
-
- if (size < 0)
- error("Malloc error");
- if (free_mem_ptr <= 0)
- error("Memory error");
-
- free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
-
- p = (void *)free_mem_ptr;
- free_mem_ptr += size;
-
- if (free_mem_ptr >= free_mem_end_ptr)
- error("Out of memory");
-
- return p;
-}
-
-static void free(void *where)
-{ /* Don't care */
-}
-
-static void gzip_mark(void **ptr)
-{
- *ptr = (void *) free_mem_ptr;
-}
-
-static void gzip_release(void **ptr)
-{
- free_mem_ptr = (memptr) *ptr;
-}
-
static void scroll(void)
{
int i;
regs->ip = (unsigned long)p->ainsn.insn;
}
-/* Called with kretprobe_lock held */
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
INIT_HLIST_HEAD(&empty_rp);
- spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
+ kretprobe_hash_lock(current, &head, &flags);
/* fixup registers */
#ifdef CONFIG_X86_64
regs->cs = __KERNEL_CS;
kretprobe_assert(ri, orig_ret_address, trampoline_address);
- spin_unlock_irqrestore(&kretprobe_lock, flags);
+ kretprobe_hash_unlock(current, &flags);
hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
hlist_del(&ri->hlist);
#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/string.h>
+#include <linux/crash_dump.h>
#include <linux/dma-mapping.h>
#include <linux/bitops.h>
#include <linux/pci_ids.h>
static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev);
static void calioc2_tce_cache_blast(struct iommu_table *tbl);
static void calioc2_dump_error_regs(struct iommu_table *tbl);
+static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl);
+static void get_tce_space_from_tar(void);
static struct cal_chipset_ops calgary_chip_ops = {
.handle_quirks = calgary_handle_quirks,
tbl = pci_iommu(dev->bus);
tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
- tce_free(tbl, 0, tbl->it_size);
+
+ if (is_kdump_kernel())
+ calgary_init_bitmap_from_tce_table(tbl);
+ else
+ tce_free(tbl, 0, tbl->it_size);
if (is_calgary(dev->device))
tbl->chip_ops = &calgary_chip_ops;
if (ret)
return ret;
+ /* Purely for kdump kernel case */
+ if (is_kdump_kernel())
+ get_tce_space_from_tar();
+
do {
dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev);
if (!dev)
return (val != 0xffffffff);
}
+/*
+ * calgary_init_bitmap_from_tce_table():
+ * Funtion for kdump case. In the second/kdump kernel initialize
+ * the bitmap based on the tce table entries obtained from first kernel
+ */
+static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
+{
+ u64 *tp;
+ unsigned int index;
+ tp = ((u64 *)tbl->it_base);
+ for (index = 0 ; index < tbl->it_size; index++) {
+ if (*tp != 0x0)
+ set_bit(index, tbl->it_map);
+ tp++;
+ }
+}
+
+/*
+ * get_tce_space_from_tar():
+ * Function for kdump case. Get the tce tables from first kernel
+ * by reading the contents of the base adress register of calgary iommu
+ */
+static void get_tce_space_from_tar()
+{
+ int bus;
+ void __iomem *target;
+ unsigned long tce_space;
+
+ for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
+ struct calgary_bus_info *info = &bus_info[bus];
+ unsigned short pci_device;
+ u32 val;
+
+ val = read_pci_config(bus, 0, 0, 0);
+ pci_device = (val & 0xFFFF0000) >> 16;
+
+ if (!is_cal_pci_dev(pci_device))
+ continue;
+ if (info->translation_disabled)
+ continue;
+
+ if (calgary_bus_has_devices(bus, pci_device) ||
+ translate_empty_slots) {
+ target = calgary_reg(bus_info[bus].bbar,
+ tar_offset(bus));
+ tce_space = be64_to_cpu(readq(target));
+ tce_space = tce_space & TAR_SW_BITS;
+
+ tce_space = tce_space & (~specified_table_size);
+ info->tce_space = (u64 *)__va(tce_space);
+ }
+ }
+ return;
+}
+
void __init detect_calgary(void)
{
int bus;
return;
}
- specified_table_size = determine_tce_table_size(max_pfn * PAGE_SIZE);
+ specified_table_size = determine_tce_table_size((is_kdump_kernel() ?
+ saved_max_pfn : max_pfn) * PAGE_SIZE);
for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
struct calgary_bus_info *info = &bus_info[bus];
if (calgary_bus_has_devices(bus, pci_device) ||
translate_empty_slots) {
- tbl = alloc_tce_table();
- if (!tbl)
- goto cleanup;
- info->tce_space = tbl;
+ /*
+ * If it is kdump kernel, find and use tce tables
+ * from first kernel, else allocate tce tables here
+ */
+ if (!is_kdump_kernel()) {
+ tbl = alloc_tce_table();
+ if (!tbl)
+ goto cleanup;
+ info->tce_space = tbl;
+ }
calgary_found = 1;
}
}
long long start, length;
int part;
int i;
+ int err;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
}
}
/* all seems OK */
- add_partition(disk, part, start, length, ADDPART_FLAG_NONE);
+ err = add_partition(disk, part, start, length, ADDPART_FLAG_NONE);
mutex_unlock(&bdev->bd_mutex);
- return 0;
+ return err;
case BLKPG_DEL_PARTITION:
if (!disk->part[part-1])
return -ENXIO;
# Rewritten to use lists instead of if-statements.
#
-obj-$(CONFIG_HAVE_GPIO_LIB) += gpio/
+obj-y += gpio/
obj-$(CONFIG_PCI) += pci/
obj-$(CONFIG_PARISC) += parisc/
obj-$(CONFIG_RAPIDIO) += rapidio/
void __iomem *mmio = ap->host->iomap[AHCI_PCI_BAR];
u32 em_ctl;
u32 message[] = {0, 0};
- unsigned int flags;
+ unsigned long flags;
int pmp;
struct ahci_em_priv *emp;
struct device *dev = to_dev(kobj);
struct firmware_priv *fw_priv = dev_get_drvdata(dev);
struct firmware *fw;
- ssize_t ret_count = count;
+ ssize_t ret_count;
mutex_lock(&fw_lock);
fw = fw_priv->fw;
ret_count = -ENODEV;
goto out;
}
- if (offset > fw->size) {
- ret_count = 0;
- goto out;
- }
- if (offset + ret_count > fw->size)
- ret_count = fw->size - offset;
-
- memcpy(buffer, fw->data + offset, ret_count);
+ ret_count = memory_read_from_buffer(buffer, count, &offset,
+ fw->data, fw->size);
out:
mutex_unlock(&fw_lock);
return ret_count;
#include <linux/hdreg.h>
#include <linux/blkdev.h>
+#include <linux/completion.h>
#include <linux/delay.h>
#include <linux/smp_lock.h>
#include "aoe.h"
static struct ErrMsg emsgs[NMSG];
static int emsgs_head_idx, emsgs_tail_idx;
-static struct semaphore emsgs_sema;
+static struct completion emsgs_comp;
static spinlock_t emsgs_lock;
static int nblocked_emsgs_readers;
static struct class *aoe_class;
spin_unlock_irqrestore(&emsgs_lock, flags);
if (nblocked_emsgs_readers)
- up(&emsgs_sema);
+ complete(&emsgs_comp);
}
static ssize_t
spin_unlock_irqrestore(&emsgs_lock, flags);
- n = down_interruptible(&emsgs_sema);
+ n = wait_for_completion_interruptible(&emsgs_comp);
spin_lock_irqsave(&emsgs_lock, flags);
printk(KERN_ERR "aoe: can't register char device\n");
return n;
}
- sema_init(&emsgs_sema, 0);
+ init_completion(&emsgs_comp);
spin_lock_init(&emsgs_lock);
aoe_class = class_create(THIS_MODULE, "aoe");
if (IS_ERR(aoe_class)) {
int err;
u64 cap;
u32 v;
+ u32 blk_size;
if (index_to_minor(index) >= 1 << MINORBITS)
return -ENOSPC;
if (!err)
blk_queue_max_hw_segments(vblk->disk->queue, v);
+ /* Host can optionally specify the block size of the device */
+ err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
+ offsetof(struct virtio_blk_config, blk_size),
+ &blk_size);
+ if (!err)
+ blk_queue_hardsect_size(vblk->disk->queue, blk_size);
+
add_disk(vblk->disk);
return 0;
static unsigned int features[] = {
VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
- VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO,
+ VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
};
static struct virtio_driver virtio_blk = {
If unsure, say Y, or else you won't be able to do much with your new
shiny Linux system :-)
+config CONSOLE_TRANSLATIONS
+ depends on VT
+ default y
+ bool "Enable character translations in console" if EMBEDDED
+ ---help---
+ This enables support for font mapping and Unicode translation
+ on virtual consoles.
+
config VT_CONSOLE
bool "Support for console on virtual terminal" if EMBEDDED
depends on VT
It will automatically be selected if one of the back-end console drivers
is selected.
+config HVC_IRQ
+ bool
config HVC_CONSOLE
bool "pSeries Hypervisor Virtual Console support"
depends on PPC_PSERIES
select HVC_DRIVER
+ select HVC_IRQ
help
pSeries machines when partitioned support a hypervisor virtual
console. This driver allows each pSeries partition to have a console
depends on PPC_ISERIES
default y
select HVC_DRIVER
+ select HVC_IRQ
help
iSeries machines support a hypervisor virtual console.
bool "Xen Hypervisor Console support"
depends on XEN
select HVC_DRIVER
+ select HVC_IRQ
default y
help
Xen virtual console device driver
config VIRTIO_CONSOLE
- bool
+ tristate "Virtio console"
+ depends on VIRTIO
select HVC_DRIVER
+ help
+ Virtio console for use with lguest and other hypervisors.
+
config HVCS
tristate "IBM Hypervisor Virtual Console Server support"
obj-$(CONFIG_LEGACY_PTYS) += pty.o
obj-$(CONFIG_UNIX98_PTYS) += pty.o
obj-y += misc.o
-obj-$(CONFIG_VT) += vt_ioctl.o vc_screen.o consolemap.o \
- consolemap_deftbl.o selection.o keyboard.o
+obj-$(CONFIG_VT) += vt_ioctl.o vc_screen.o selection.o keyboard.o
+obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o
obj-$(CONFIG_HW_CONSOLE) += vt.o defkeymap.o
obj-$(CONFIG_AUDIT) += tty_audit.o
obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o
obj-$(CONFIG_HVC_RTAS) += hvc_rtas.o
obj-$(CONFIG_HVC_BEAT) += hvc_beat.o
obj-$(CONFIG_HVC_DRIVER) += hvc_console.o
+obj-$(CONFIG_HVC_IRQ) += hvc_irq.o
obj-$(CONFIG_HVC_XEN) += hvc_xen.o
obj-$(CONFIG_VIRTIO_CONSOLE) += virtio_console.o
obj-$(CONFIG_RAW_DRIVER) += raw.o
obj-$(CONFIG_BFIN_OTP) += bfin-otp.o
obj-$(CONFIG_PRINTER) += lp.o
-obj-$(CONFIG_TIPAR) += tipar.o
obj-$(CONFIG_APM_EMULATION) += apm-emulation.o
#include <linux/miscdevice.h>
#include <linux/delay.h>
#include <linux/bcd.h>
+#include <linux/smp_lock.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
-#include <asm/uaccess.h>
#include <asm/system.h>
-#include <asm/io.h>
#include <asm/rtc.h>
#if defined(CONFIG_M32R)
#include <asm/m32r.h>
/* ioctl that supports RTC_RD_TIME and RTC_SET_TIME (read and set time/date). */
-static int
-rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
- unsigned long arg)
+static long rtc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
unsigned long flags;
struct rtc_time rtc_tm;
memset(&rtc_tm, 0, sizeof (struct rtc_time));
+ lock_kernel();
get_rtc_time(&rtc_tm);
+ unlock_kernel();
if (copy_to_user((struct rtc_time*)arg, &rtc_tm, sizeof(struct rtc_time)))
return -EFAULT;
return 0;
BIN_TO_BCD(mon);
BIN_TO_BCD(yrs);
+ lock_kernel();
local_irq_save(flags);
CMOS_WRITE(yrs, RTC_YEAR);
CMOS_WRITE(mon, RTC_MONTH);
CMOS_WRITE(min, RTC_MINUTES);
CMOS_WRITE(sec, RTC_SECONDS);
local_irq_restore(flags);
+ unlock_kernel();
/* Notice that at this point, the RTC is updated but
* the kernel is still running with the old time.
if(copy_from_user(&tcs_val, (int*)arg, sizeof(int)))
return -EFAULT;
+ lock_kernel();
tcs_val = RTC_TCR_PATTERN | (tcs_val & 0x0F);
ds1302_writereg(RTC_TRICKLECHARGER, tcs_val);
+ unlock_kernel();
return 0;
}
default:
static const struct file_operations rtc_fops = {
.owner = THIS_MODULE,
- .ioctl = rtc_ioctl,
+ .unlocked_ioctl = rtc_ioctl,
};
/* Probe for the chip by writing something to its RAM and try reading it back. */
#include <linux/smp_lock.h>
#include <linux/firmware.h>
#include <linux/platform_device.h>
+#include <linux/uaccess.h> /* For put_user and get_user */
#include <asm/atarihw.h>
#include <asm/traps.h>
-#include <asm/uaccess.h> /* For put_user and get_user */
#include <asm/dsp56k.h>
}
}
-static int dsp56k_ioctl(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg)
+static long dsp56k_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
{
int dev = iminor(inode) & 0x0f;
void __user *argp = (void __user *)arg;
if (len > DSP56K_MAX_BINARY_LENGTH) {
return -EINVAL;
}
-
+ lock_kernel();
r = dsp56k_upload(bin, len);
+ unlock_kernel();
if (r < 0) {
return r;
}
case DSP56K_SET_TX_WSIZE:
if (arg > 4 || arg < 1)
return -EINVAL;
+ lock_kernel();
dsp56k.tx_wsize = (int) arg;
+ unlock_kernel();
break;
case DSP56K_SET_RX_WSIZE:
if (arg > 4 || arg < 1)
return -EINVAL;
+ lock_kernel();
dsp56k.rx_wsize = (int) arg;
+ unlock_kernel();
break;
case DSP56K_HOST_FLAGS:
{
if(get_user(out, &hf->out) < 0)
return -EFAULT;
+ lock_kernel();
if ((dir & 0x1) && (out & 0x1))
dsp56k_host_interface.icr |= DSP56K_ICR_HF0;
else if (dir & 0x1)
if (dsp56k_host_interface.icr & DSP56K_ICR_HF1) status |= 0x2;
if (dsp56k_host_interface.isr & DSP56K_ISR_HF2) status |= 0x4;
if (dsp56k_host_interface.isr & DSP56K_ISR_HF3) status |= 0x8;
-
+ unlock_kernel();
return put_user(status, &hf->status);
}
case DSP56K_HOST_CMD:
if (arg > 31 || arg < 0)
return -EINVAL;
+ lock_kernel();
dsp56k_host_interface.cvr = (u_char)((arg & DSP56K_CVR_HV_MASK) |
DSP56K_CVR_HC);
+ unlock_kernel();
break;
default:
return -EINVAL;
.owner = THIS_MODULE,
.read = dsp56k_read,
.write = dsp56k_write,
- .ioctl = dsp56k_ioctl,
+ .unlocked_ioctl = dsp56k_ioctl,
.open = dsp56k_open,
.release = dsp56k_release,
};
#include <linux/rtc.h>
#include <linux/proc_fs.h>
#include <linux/efi.h>
+#include <linux/smp_lock.h>
+#include <linux/uaccess.h>
-#include <asm/uaccess.h>
#include <asm/system.h>
#define EFI_RTC_VERSION "0.4"
static DEFINE_SPINLOCK(efi_rtc_lock);
-static int efi_rtc_ioctl(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg);
+static long efi_rtc_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg);
#define is_leap(year) \
((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
}
}
-static int
-efi_rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
- unsigned long arg)
+static long efi_rtc_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
{
efi_status_t status;
return -EINVAL;
case RTC_RD_TIME:
-
+ lock_kernel();
spin_lock_irqsave(&efi_rtc_lock, flags);
status = efi.get_time(&eft, &cap);
spin_unlock_irqrestore(&efi_rtc_lock,flags);
-
+ unlock_kernel();
if (status != EFI_SUCCESS) {
/* should never happen */
printk(KERN_ERR "efitime: can't read time\n");
convert_to_efi_time(&wtime, &eft);
+ lock_kernel();
spin_lock_irqsave(&efi_rtc_lock, flags);
status = efi.set_time(&eft);
spin_unlock_irqrestore(&efi_rtc_lock,flags);
+ unlock_kernel();
return status == EFI_SUCCESS ? 0 : -EINVAL;
convert_to_efi_time(&wtime, &eft);
+ lock_kernel();
spin_lock_irqsave(&efi_rtc_lock, flags);
/*
* XXX Fixme:
status = efi.set_wakeup_time((efi_bool_t)enabled, &eft);
spin_unlock_irqrestore(&efi_rtc_lock,flags);
+ unlock_kernel();
return status == EFI_SUCCESS ? 0 : -EINVAL;
case RTC_WKALM_RD:
+ lock_kernel();
spin_lock_irqsave(&efi_rtc_lock, flags);
status = efi.get_wakeup_time((efi_bool_t *)&enabled, (efi_bool_t *)&pending, &eft);
spin_unlock_irqrestore(&efi_rtc_lock,flags);
+ unlock_kernel();
if (status != EFI_SUCCESS) return -EINVAL;
return copy_to_user(&ewp->time, &wtime,
sizeof(struct rtc_time)) ? -EFAULT : 0;
}
- return -EINVAL;
+ return -ENOTTY;
}
/*
* up things on a close.
*/
-static int
-efi_rtc_open(struct inode *inode, struct file *file)
+static int efi_rtc_open(struct inode *inode, struct file *file)
{
/*
* nothing special to do here
return 0;
}
-static int
-efi_rtc_close(struct inode *inode, struct file *file)
+static int efi_rtc_close(struct inode *inode, struct file *file)
{
return 0;
}
static const struct file_operations efi_rtc_fops = {
.owner = THIS_MODULE,
- .ioctl = efi_rtc_ioctl,
+ .unlocked_ioctl = efi_rtc_ioctl,
.open = efi_rtc_open,
.release = efi_rtc_close,
};
-static struct miscdevice efi_rtc_dev=
-{
+static struct miscdevice efi_rtc_dev= {
EFI_RTC_MINOR,
"efirtc",
&efi_rtc_fops
return -ENXIO;
}
+#if 0
int hpet_unregister(struct hpet_task *tp)
{
struct hpet_dev *devp;
return 0;
}
+#endif /* 0 */
static ctl_table hpet_table[] = {
{
#include <linux/init.h>
#include <linux/kbd_kern.h>
#include <linux/kernel.h>
-#include <linux/kref.h>
#include <linux/kthread.h>
#include <linux/list.h>
#include <linux/module.h>
static int sysrq_pressed;
#endif
-struct hvc_struct {
- spinlock_t lock;
- int index;
- struct tty_struct *tty;
- unsigned int count;
- int do_wakeup;
- char *outbuf;
- int outbuf_size;
- int n_outbuf;
- uint32_t vtermno;
- struct hv_ops *ops;
- int irq_requested;
- int irq;
- struct list_head next;
- struct kref kref; /* ref count & hvc_struct lifetime */
-};
-
/* dynamic list of hvc_struct instances */
static LIST_HEAD(hvc_structs);
return 0;
}
+EXPORT_SYMBOL_GPL(hvc_instantiate);
/* Wake the sleeping khvcd */
-static void hvc_kick(void)
+void hvc_kick(void)
{
hvc_kicked = 1;
wake_up_process(hvc_task);
}
-
-static int hvc_poll(struct hvc_struct *hp);
-
-/*
- * NOTE: This API isn't used if the console adapter doesn't support interrupts.
- * In this case the console is poll driven.
- */
-static irqreturn_t hvc_handle_interrupt(int irq, void *dev_instance)
-{
- /* if hvc_poll request a repoll, then kick the hvcd thread */
- if (hvc_poll(dev_instance))
- hvc_kick();
- return IRQ_HANDLED;
-}
+EXPORT_SYMBOL_GPL(hvc_kick);
static void hvc_unthrottle(struct tty_struct *tty)
{
{
struct hvc_struct *hp;
unsigned long flags;
- int irq = 0;
int rc = 0;
/* Auto increments kref reference if found. */
tty->low_latency = 1; /* Makes flushes to ldisc synchronous. */
hp->tty = tty;
- /* Save for request_irq outside of spin_lock. */
- irq = hp->irq;
- if (irq)
- hp->irq_requested = 1;
+
+ if (hp->ops->notifier_add)
+ rc = hp->ops->notifier_add(hp, hp->data);
spin_unlock_irqrestore(&hp->lock, flags);
- /* check error, fallback to non-irq */
- if (irq)
- rc = request_irq(irq, hvc_handle_interrupt, IRQF_DISABLED, "hvc_console", hp);
+
/*
- * If the request_irq() fails and we return an error. The tty layer
+ * If the notifier fails we return an error. The tty layer
* will call hvc_close() after a failed open but we don't want to clean
* up there so we'll clean up here and clear out the previously set
* tty fields and return the kref reference.
if (rc) {
spin_lock_irqsave(&hp->lock, flags);
hp->tty = NULL;
- hp->irq_requested = 0;
spin_unlock_irqrestore(&hp->lock, flags);
tty->driver_data = NULL;
kref_put(&hp->kref, destroy_hvc_struct);
static void hvc_close(struct tty_struct *tty, struct file * filp)
{
struct hvc_struct *hp;
- int irq = 0;
unsigned long flags;
if (tty_hung_up_p(filp))
spin_lock_irqsave(&hp->lock, flags);
if (--hp->count == 0) {
- if (hp->irq_requested)
- irq = hp->irq;
- hp->irq_requested = 0;
+ if (hp->ops->notifier_del)
+ hp->ops->notifier_del(hp, hp->data);
/* We are done with the tty pointer now. */
hp->tty = NULL;
* waking periodically to check chars_in_buffer().
*/
tty_wait_until_sent(tty, HVC_CLOSE_WAIT);
-
- if (irq)
- free_irq(irq, hp);
-
} else {
if (hp->count < 0)
printk(KERN_ERR "hvc_close %X: oops, count is %d\n",
{
struct hvc_struct *hp = tty->driver_data;
unsigned long flags;
- int irq = 0;
int temp_open_count;
if (!hp)
hp->count = 0;
hp->n_outbuf = 0;
hp->tty = NULL;
- if (hp->irq_requested)
- /* Saved for use outside of spin_lock. */
- irq = hp->irq;
- hp->irq_requested = 0;
+
+ if (hp->ops->notifier_del)
+ hp->ops->notifier_del(hp, hp->data);
+
spin_unlock_irqrestore(&hp->lock, flags);
- if (irq)
- free_irq(irq, hp);
+
while(temp_open_count) {
--temp_open_count;
kref_put(&hp->kref, destroy_hvc_struct);
#define HVC_POLL_READ 0x00000001
#define HVC_POLL_WRITE 0x00000002
-static int hvc_poll(struct hvc_struct *hp)
+int hvc_poll(struct hvc_struct *hp)
{
struct tty_struct *tty;
int i, n, poll_mask = 0;
if (test_bit(TTY_THROTTLED, &tty->flags))
goto throttled;
- /* If we aren't interrupt driven and aren't throttled, we always
+ /* If we aren't notifier driven and aren't throttled, we always
* request a reschedule
*/
- if (hp->irq == 0)
+ if (!hp->irq_requested)
poll_mask |= HVC_POLL_READ;
/* Read data if any */
return poll_mask;
}
+EXPORT_SYMBOL_GPL(hvc_poll);
/*
* This kthread is either polling or interrupt driven. This is determined by
.chars_in_buffer = hvc_chars_in_buffer,
};
-struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq,
+struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int data,
struct hv_ops *ops, int outbuf_size)
{
struct hvc_struct *hp;
memset(hp, 0x00, sizeof(*hp));
hp->vtermno = vtermno;
- hp->irq = irq;
+ hp->data = data;
hp->ops = ops;
hp->outbuf_size = outbuf_size;
hp->outbuf = &((char *)hp)[ALIGN(sizeof(*hp), sizeof(long))];
return hp;
}
+EXPORT_SYMBOL_GPL(hvc_alloc);
int __devexit hvc_remove(struct hvc_struct *hp)
{
#ifndef HVC_CONSOLE_H
#define HVC_CONSOLE_H
+#include <linux/kref.h>
/*
* This is the max number of console adapters that can/will be found as
*/
#define HVC_ALLOC_TTY_ADAPTERS 8
+struct hvc_struct {
+ spinlock_t lock;
+ int index;
+ struct tty_struct *tty;
+ unsigned int count;
+ int do_wakeup;
+ char *outbuf;
+ int outbuf_size;
+ int n_outbuf;
+ uint32_t vtermno;
+ struct hv_ops *ops;
+ int irq_requested;
+ int data;
+ struct list_head next;
+ struct kref kref; /* ref count & hvc_struct lifetime */
+};
/* implemented by a low level driver */
struct hv_ops {
int (*get_chars)(uint32_t vtermno, char *buf, int count);
int (*put_chars)(uint32_t vtermno, const char *buf, int count);
-};
-struct hvc_struct;
+ /* Callbacks for notification. Called in open and close */
+ int (*notifier_add)(struct hvc_struct *hp, int irq);
+ void (*notifier_del)(struct hvc_struct *hp, int irq);
+};
/* Register a vterm and a slot index for use as a console (console_init) */
extern int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops);
/* register a vterm for hvc tty operation (module_init or hotplug add) */
-extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int irq,
+extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int data,
struct hv_ops *ops, int outbuf_size);
-/* remove a vterm from hvc tty operation (modele_exit or hotplug remove) */
+/* remove a vterm from hvc tty operation (module_exit or hotplug remove) */
extern int __devexit hvc_remove(struct hvc_struct *hp);
+/* data available */
+int hvc_poll(struct hvc_struct *hp);
+void hvc_kick(void);
+
+/* default notifier for irq based notification */
+extern int notifier_add_irq(struct hvc_struct *hp, int data);
+extern void notifier_del_irq(struct hvc_struct *hp, int data);
+
#if defined(CONFIG_XMON) && defined(CONFIG_SMP)
#include <asm/xmon.h>
--- /dev/null
+/*
+ * Copyright IBM Corp. 2001,2008
+ *
+ * This file contains the IRQ specific code for hvc_console
+ *
+ */
+
+#include <linux/interrupt.h>
+
+#include "hvc_console.h"
+
+static irqreturn_t hvc_handle_interrupt(int irq, void *dev_instance)
+{
+ /* if hvc_poll request a repoll, then kick the hvcd thread */
+ if (hvc_poll(dev_instance))
+ hvc_kick();
+ return IRQ_HANDLED;
+}
+
+/*
+ * For IRQ based systems these callbacks can be used
+ */
+int notifier_add_irq(struct hvc_struct *hp, int irq)
+{
+ int rc;
+
+ if (!irq) {
+ hp->irq_requested = 0;
+ return 0;
+ }
+ rc = request_irq(irq, hvc_handle_interrupt, IRQF_DISABLED,
+ "hvc_console", hp);
+ if (!rc)
+ hp->irq_requested = 1;
+ return rc;
+}
+
+void notifier_del_irq(struct hvc_struct *hp, int irq)
+{
+ if (!irq)
+ return;
+ free_irq(irq, hp);
+ hp->irq_requested = 0;
+}
static struct hv_ops hvc_get_put_ops = {
.get_chars = get_chars,
.put_chars = put_chars,
+ .notifier_add = notifier_add_irq,
+ .notifier_del = notifier_del_irq,
};
static int __devinit hvc_vio_probe(struct vio_dev *vdev,
static struct hv_ops hvc_get_put_ops = {
.get_chars = filtered_get_chars,
.put_chars = hvc_put_chars,
+ .notifier_add = notifier_add_irq,
+ .notifier_del = notifier_del_irq,
};
static int __devinit hvc_vio_probe(struct vio_dev *vdev,
static struct hv_ops hvc_ops = {
.get_chars = read_console,
.put_chars = write_console,
+ .notifier_add = notifier_add_irq,
+ .notifier_del = notifier_del_irq,
};
static int __init xen_init(void)
static ssize_t ip2_ipl_read(struct file *, char __user *, size_t, loff_t *);
static ssize_t ip2_ipl_write(struct file *, const char __user *, size_t, loff_t *);
-static int ip2_ipl_ioctl(struct inode *, struct file *, UINT, ULONG);
+static long ip2_ipl_ioctl(struct file *, UINT, ULONG);
static int ip2_ipl_open(struct inode *, struct file *);
static int DumpTraceBuffer(char __user *, int);
.owner = THIS_MODULE,
.read = ip2_ipl_read,
.write = ip2_ipl_write,
- .ioctl = ip2_ipl_ioctl,
+ .unlocked_ioctl = ip2_ipl_ioctl,
.open = ip2_ipl_open,
};
/* */
/* */
/******************************************************************************/
-static int
-ip2_ipl_ioctl ( struct inode *pInode, struct file *pFile, UINT cmd, ULONG arg )
+static long
+ip2_ipl_ioctl (struct file *pFile, UINT cmd, ULONG arg )
{
- unsigned int iplminor = iminor(pInode);
+ unsigned int iplminor = iminor(pFile->f_path.dentry->d_inode);
int rc = 0;
void __user *argp = (void __user *)arg;
ULONG __user *pIndex = argp;
printk (KERN_DEBUG "IP2IPL: ioctl cmd %d, arg %ld\n", cmd, arg );
#endif
+ lock_kernel();
+
switch ( iplminor ) {
case 0: // IPL device
rc = -EINVAL;
rc = -ENODEV;
break;
}
+ unlock_kernel();
return rc;
}
static int mwave_open(struct inode *inode, struct file *file);
static int mwave_close(struct inode *inode, struct file *file);
-static int mwave_ioctl(struct inode *inode, struct file *filp,
- unsigned int iocmd, unsigned long ioarg);
+static long mwave_ioctl(struct file *filp, unsigned int iocmd,
+ unsigned long ioarg);
MWAVE_DEVICE_DATA mwave_s_mdd;
return retval;
}
-static int mwave_ioctl(struct inode *inode, struct file *file,
- unsigned int iocmd, unsigned long ioarg)
+static long mwave_ioctl(struct file *file, unsigned int iocmd,
+ unsigned long ioarg)
{
unsigned int retval = 0;
pMWAVE_DEVICE_DATA pDrvData = &mwave_s_mdd;
void __user *arg = (void __user *)ioarg;
- PRINTK_5(TRACE_MWAVE,
- "mwavedd::mwave_ioctl, entry inode %p file %p cmd %x arg %x\n",
- inode, file, iocmd, (int) ioarg);
+ PRINTK_4(TRACE_MWAVE,
+ "mwavedd::mwave_ioctl, entry file %p cmd %x arg %x\n",
+ file, iocmd, (int) ioarg);
switch (iocmd) {
PRINTK_1(TRACE_MWAVE,
"mwavedd::mwave_ioctl, IOCTL_MW_RESET"
" calling tp3780I_ResetDSP\n");
+ lock_kernel();
retval = tp3780I_ResetDSP(&pDrvData->rBDData);
+ unlock_kernel();
PRINTK_2(TRACE_MWAVE,
"mwavedd::mwave_ioctl, IOCTL_MW_RESET"
" retval %x from tp3780I_ResetDSP\n",
PRINTK_1(TRACE_MWAVE,
"mwavedd::mwave_ioctl, IOCTL_MW_RUN"
" calling tp3780I_StartDSP\n");
+ lock_kernel();
retval = tp3780I_StartDSP(&pDrvData->rBDData);
+ unlock_kernel();
PRINTK_2(TRACE_MWAVE,
"mwavedd::mwave_ioctl, IOCTL_MW_RUN"
" retval %x from tp3780I_StartDSP\n",
"mwavedd::mwave_ioctl,"
" IOCTL_MW_DSP_ABILITIES calling"
" tp3780I_QueryAbilities\n");
+ lock_kernel();
retval = tp3780I_QueryAbilities(&pDrvData->rBDData,
&rAbilities);
+ unlock_kernel();
PRINTK_2(TRACE_MWAVE,
"mwavedd::mwave_ioctl, IOCTL_MW_DSP_ABILITIES"
" retval %x from tp3780I_QueryAbilities\n",
"mwavedd::mwave_ioctl IOCTL_MW_READ_DATA,"
" size %lx, ioarg %lx pusBuffer %p\n",
rReadData.ulDataLength, ioarg, pusBuffer);
+ lock_kernel();
retval = tp3780I_ReadWriteDspDStore(&pDrvData->rBDData,
iocmd,
pusBuffer,
rReadData.ulDataLength,
rReadData.usDspAddress);
+ unlock_kernel();
}
break;
" size %lx, ioarg %lx pusBuffer %p\n",
rReadData.ulDataLength / 2, ioarg,
pusBuffer);
+ lock_kernel();
retval = tp3780I_ReadWriteDspDStore(&pDrvData->rBDData,
iocmd, pusBuffer,
rReadData.ulDataLength / 2,
rReadData.usDspAddress);
+ unlock_kernel();
}
break;
" size %lx, ioarg %lx pusBuffer %p\n",
rWriteData.ulDataLength, ioarg,
pusBuffer);
+ lock_kernel();
retval = tp3780I_ReadWriteDspDStore(&pDrvData->rBDData,
iocmd, pusBuffer,
rWriteData.ulDataLength,
rWriteData.usDspAddress);
+ unlock_kernel();
}
break;
" size %lx, ioarg %lx pusBuffer %p\n",
rWriteData.ulDataLength, ioarg,
pusBuffer);
+ lock_kernel();
retval = tp3780I_ReadWriteDspIStore(&pDrvData->rBDData,
iocmd, pusBuffer,
rWriteData.ulDataLength,
rWriteData.usDspAddress);
+ unlock_kernel();
}
break;
ipcnum);
return -EINVAL;
}
+ lock_kernel();
pDrvData->IPCs[ipcnum].bIsHere = FALSE;
pDrvData->IPCs[ipcnum].bIsEnabled = TRUE;
+ unlock_kernel();
PRINTK_2(TRACE_MWAVE,
"mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC"
return -EINVAL;
}
+ lock_kernel();
if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) {
DECLARE_WAITQUEUE(wait, current);
" processing\n",
ipcnum);
}
+ unlock_kernel();
}
break;
ipcnum);
return -EINVAL;
}
+ lock_kernel();
if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) {
pDrvData->IPCs[ipcnum].bIsEnabled = FALSE;
if (pDrvData->IPCs[ipcnum].bIsHere == TRUE) {
wake_up_interruptible(&pDrvData->IPCs[ipcnum].ipc_wait_queue);
}
}
+ unlock_kernel();
}
break;
default:
- PRINTK_ERROR(KERN_ERR_MWAVE "mwavedd::mwave_ioctl:"
- " Error: Unrecognized iocmd %x\n",
- iocmd);
return -ENOTTY;
break;
} /* switch */
.owner = THIS_MODULE,
.read = mwave_read,
.write = mwave_write,
- .ioctl = mwave_ioctl,
+ .unlocked_ioctl = mwave_ioctl,
.open = mwave_open,
.release = mwave_close
};
} MWAVE_DEVICE_DATA, *pMWAVE_DEVICE_DATA;
+extern MWAVE_DEVICE_DATA mwave_s_mdd;
+
#endif
#include "3780i.h"
#include "mwavepub.h"
-extern MWAVE_DEVICE_DATA mwave_s_mdd;
-
static unsigned short s_ausThinkpadIrqToField[16] =
{ 0xFFFF, 0xFFFF, 0xFFFF, 0x0001, 0x0002, 0x0003, 0xFFFF, 0x0004,
0xFFFF, 0xFFFF, 0x0005, 0x0006, 0xFFFF, 0xFFFF, 0xFFFF, 0x0007 };
* Fed through a cleanup, indent and remove of non 2.6 code by Alan Cox
* <alan@redhat.com>. The original 1.8 code is available on www.moxa.com.
* - Fixed x86_64 cleanness
- * - Fixed sleep with spinlock held in mxser_send_break
*/
#include <linux/module.h>
#define MXSER_VERSION "2.0.4" /* 1.12 */
#define MXSERMAJOR 174
-#define MXSERCUMAJOR 175
#define MXSER_BOARDS 4 /* Max. boards */
#define MXSER_PORTS_PER_BOARD 8 /* Max. ports per board */
#define MXSER_PORTS (MXSER_BOARDS * MXSER_PORTS_PER_BOARD)
#define MXSER_ISR_PASS_LIMIT 100
-#define MXSER_ERR_IOADDR -1
-#define MXSER_ERR_IRQ -2
-#define MXSER_ERR_IRQ_CONFLIT -3
-#define MXSER_ERR_VECTOR -4
-
/*CheckIsMoxaMust return value*/
#define MOXA_OTHER_UART 0x00
#define MOXA_MUST_MU150_HWID 0x01
};
MODULE_DEVICE_TABLE(pci, mxser_pcibrds);
-static int ioaddr[MXSER_BOARDS] = { 0, 0, 0, 0 };
+static unsigned long ioaddr[MXSER_BOARDS];
static int ttymajor = MXSERMAJOR;
/* Variables for insmod */
MODULE_AUTHOR("Casper Yang");
MODULE_DESCRIPTION("MOXA Smartio/Industio Family Multiport Board Device Driver");
-module_param_array(ioaddr, int, NULL, 0);
+module_param_array(ioaddr, ulong, NULL, 0);
+MODULE_PARM_DESC(ioaddr, "ISA io addresses to look for a moxa board");
module_param(ttymajor, int, 0);
MODULE_LICENSE("GPL");
unsigned long txcnt[MXSER_PORTS];
};
-
struct mxser_mon {
unsigned long rxcnt;
unsigned long txcnt;
int dcd;
};
-static struct mxser_mstatus GMStatus[MXSER_PORTS];
-
-static int mxserBoardCAP[MXSER_BOARDS] = {
- 0, 0, 0, 0
- /* 0x180, 0x280, 0x200, 0x320 */
-};
-
static struct mxser_board mxser_boards[MXSER_BOARDS];
static struct tty_driver *mxvar_sdriver;
static struct mxser_log mxvar_log;
-static int mxvar_diagflag;
-static unsigned char mxser_msr[MXSER_PORTS + 1];
-static struct mxser_mon_ext mon_data_ext;
static int mxser_set_baud_method[MXSER_PORTS + 1];
static void mxser_enable_must_enchance_mode(unsigned long baseio)
static unsigned char mxser_get_msr(int baseaddr, int mode, int port)
{
+ static unsigned char mxser_msr[MXSER_PORTS + 1];
unsigned char status = 0;
status = inb(baseaddr + UART_MSR);
struct mxser_port *info = tty->driver_data;
unsigned long flags;
- if (info->xmit_cnt <= 0 ||
- tty->stopped ||
- !info->port.xmit_buf ||
- (tty->hw_stopped &&
- (info->type != PORT_16550A) &&
- (!info->board->chip_flag)
- ))
+ if (info->xmit_cnt <= 0 || tty->stopped || !info->port.xmit_buf ||
+ (tty->hw_stopped && info->type != PORT_16550A &&
+ !info->board->chip_flag))
return;
spin_lock_irqsave(&info->slock, flags);
int ret;
ret = SERIAL_XMIT_SIZE - info->xmit_cnt - 1;
- if (ret < 0)
- ret = 0;
- return ret;
+ return ret < 0 ? 0 : ret;
}
static int mxser_chars_in_buffer(struct tty_struct *tty)
switch (cmd) {
case MOXA_GET_MAJOR:
+ printk(KERN_WARNING "mxser: '%s' uses deprecated ioctl %x, fix "
+ "your userspace\n", current->comm, cmd);
return put_user(ttymajor, (int __user *)argp);
case MOXA_CHKPORTENABLE:
ret = -EFAULT;
unlock_kernel();
return ret;
- case MOXA_GETMSTATUS:
+ case MOXA_GETMSTATUS: {
+ struct mxser_mstatus ms, __user *msu = argp;
lock_kernel();
for (i = 0; i < MXSER_BOARDS; i++)
for (j = 0; j < MXSER_PORTS_PER_BOARD; j++) {
port = &mxser_boards[i].ports[j];
+ memset(&ms, 0, sizeof(ms));
- GMStatus[i].ri = 0;
- if (!port->ioaddr) {
- GMStatus[i].dcd = 0;
- GMStatus[i].dsr = 0;
- GMStatus[i].cts = 0;
- continue;
- }
+ if (!port->ioaddr)
+ goto copy;
if (!port->port.tty || !port->port.tty->termios)
- GMStatus[i].cflag =
- port->normal_termios.c_cflag;
+ ms.cflag = port->normal_termios.c_cflag;
else
- GMStatus[i].cflag =
- port->port.tty->termios->c_cflag;
+ ms.cflag = port->port.tty->termios->c_cflag;
status = inb(port->ioaddr + UART_MSR);
- if (status & 0x80 /*UART_MSR_DCD */ )
- GMStatus[i].dcd = 1;
- else
- GMStatus[i].dcd = 0;
-
- if (status & 0x20 /*UART_MSR_DSR */ )
- GMStatus[i].dsr = 1;
- else
- GMStatus[i].dsr = 0;
-
-
- if (status & 0x10 /*UART_MSR_CTS */ )
- GMStatus[i].cts = 1;
- else
- GMStatus[i].cts = 0;
+ if (status & UART_MSR_DCD)
+ ms.dcd = 1;
+ if (status & UART_MSR_DSR)
+ ms.dsr = 1;
+ if (status & UART_MSR_CTS)
+ ms.cts = 1;
+ copy:
+ if (copy_to_user(msu, &ms, sizeof(ms))) {
+ unlock_kernel();
+ return -EFAULT;
+ }
+ msu++;
}
unlock_kernel();
- if (copy_to_user(argp, GMStatus,
- sizeof(struct mxser_mstatus) * MXSER_PORTS))
- return -EFAULT;
return 0;
+ }
case MOXA_ASPP_MON_EXT: {
- int p, shiftbit;
- unsigned long opmode;
- unsigned cflag, iflag;
+ struct mxser_mon_ext *me; /* it's 2k, stack unfriendly */
+ unsigned int cflag, iflag, p;
+ u8 opmode;
+
+ me = kzalloc(sizeof(*me), GFP_KERNEL);
+ if (!me)
+ return -ENOMEM;
lock_kernel();
- for (i = 0; i < MXSER_BOARDS; i++) {
- for (j = 0; j < MXSER_PORTS_PER_BOARD; j++) {
+ for (i = 0, p = 0; i < MXSER_BOARDS; i++) {
+ for (j = 0; j < MXSER_PORTS_PER_BOARD; j++, p++) {
+ if (p >= ARRAY_SIZE(me->rx_cnt)) {
+ i = MXSER_BOARDS;
+ break;
+ }
port = &mxser_boards[i].ports[j];
if (!port->ioaddr)
continue;
- status = mxser_get_msr(port->ioaddr, 0, i);
+ status = mxser_get_msr(port->ioaddr, 0, p);
if (status & UART_MSR_TERI)
port->icount.rng++;
port->icount.cts++;
port->mon_data.modem_status = status;
- mon_data_ext.rx_cnt[i] = port->mon_data.rxcnt;
- mon_data_ext.tx_cnt[i] = port->mon_data.txcnt;
- mon_data_ext.up_rxcnt[i] =
- port->mon_data.up_rxcnt;
- mon_data_ext.up_txcnt[i] =
- port->mon_data.up_txcnt;
- mon_data_ext.modem_status[i] =
+ me->rx_cnt[p] = port->mon_data.rxcnt;
+ me->tx_cnt[p] = port->mon_data.txcnt;
+ me->up_rxcnt[p] = port->mon_data.up_rxcnt;
+ me->up_txcnt[p] = port->mon_data.up_txcnt;
+ me->modem_status[p] =
port->mon_data.modem_status;
- mon_data_ext.baudrate[i] =
- tty_get_baud_rate(port->port.tty);
+ me->baudrate[p] = tty_get_baud_rate(port->port.tty);
if (!port->port.tty || !port->port.tty->termios) {
cflag = port->normal_termios.c_cflag;
iflag = port->port.tty->termios->c_iflag;
}
- mon_data_ext.databits[i] = cflag & CSIZE;
-
- mon_data_ext.stopbits[i] = cflag & CSTOPB;
-
- mon_data_ext.parity[i] =
- cflag & (PARENB | PARODD | CMSPAR);
-
- mon_data_ext.flowctrl[i] = 0x00;
+ me->databits[p] = cflag & CSIZE;
+ me->stopbits[p] = cflag & CSTOPB;
+ me->parity[p] = cflag & (PARENB | PARODD |
+ CMSPAR);
if (cflag & CRTSCTS)
- mon_data_ext.flowctrl[i] |= 0x03;
+ me->flowctrl[p] |= 0x03;
if (iflag & (IXON | IXOFF))
- mon_data_ext.flowctrl[i] |= 0x0C;
+ me->flowctrl[p] |= 0x0C;
if (port->type == PORT_16550A)
- mon_data_ext.fifo[i] = 1;
- else
- mon_data_ext.fifo[i] = 0;
+ me->fifo[p] = 1;
- p = i % 4;
- shiftbit = p * 2;
- opmode = inb(port->opmode_ioaddr) >> shiftbit;
+ opmode = inb(port->opmode_ioaddr) >>
+ ((p % 4) * 2);
opmode &= OP_MODE_MASK;
-
- mon_data_ext.iftype[i] = opmode;
-
+ me->iftype[p] = opmode;
}
}
unlock_kernel();
- if (copy_to_user(argp, &mon_data_ext,
- sizeof(mon_data_ext)))
- return -EFAULT;
- return 0;
+ if (copy_to_user(argp, me, sizeof(*me)))
+ ret = -EFAULT;
+ kfree(me);
+ return ret;
}
default:
return -ENOIOCTLCMD;
{
struct mxser_port *info = tty->driver_data;
struct async_icount cnow;
- struct serial_icounter_struct __user *p_cuser;
unsigned long flags;
void __user *argp = (void __user *)arg;
int retval;
* NB: both 1->0 and 0->1 transitions are counted except for
* RI where only 0->1 is counted.
*/
- case TIOCGICOUNT:
+ case TIOCGICOUNT: {
+ struct serial_icounter_struct icnt = { 0 };
spin_lock_irqsave(&info->slock, flags);
cnow = info->icount;
spin_unlock_irqrestore(&info->slock, flags);
- p_cuser = argp;
- if (put_user(cnow.frame, &p_cuser->frame))
- return -EFAULT;
- if (put_user(cnow.brk, &p_cuser->brk))
- return -EFAULT;
- if (put_user(cnow.overrun, &p_cuser->overrun))
- return -EFAULT;
- if (put_user(cnow.buf_overrun, &p_cuser->buf_overrun))
- return -EFAULT;
- if (put_user(cnow.parity, &p_cuser->parity))
- return -EFAULT;
- if (put_user(cnow.rx, &p_cuser->rx))
- return -EFAULT;
- if (put_user(cnow.tx, &p_cuser->tx))
- return -EFAULT;
- put_user(cnow.cts, &p_cuser->cts);
- put_user(cnow.dsr, &p_cuser->dsr);
- put_user(cnow.rng, &p_cuser->rng);
- put_user(cnow.dcd, &p_cuser->dcd);
- return 0;
+
+ icnt.frame = cnow.frame;
+ icnt.brk = cnow.brk;
+ icnt.overrun = cnow.overrun;
+ icnt.buf_overrun = cnow.buf_overrun;
+ icnt.parity = cnow.parity;
+ icnt.rx = cnow.rx;
+ icnt.tx = cnow.tx;
+ icnt.cts = cnow.cts;
+ icnt.dsr = cnow.dsr;
+ icnt.rng = cnow.rng;
+ icnt.dcd = cnow.dcd;
+
+ return copy_to_user(argp, &icnt, sizeof(icnt)) ? -EFAULT : 0;
+ }
case MOXA_HighSpeedOn:
return put_user(info->baud_base != 115200 ? 1 : 0, (int __user *)argp);
case MOXA_SDS_RSTICOUNTER:
unsigned int i;
int retval;
- printk(KERN_INFO "max. baud rate = %d bps.\n", brd->ports[0].max_baud);
+ printk(KERN_INFO "mxser: max. baud rate = %d bps\n",
+ brd->ports[0].max_baud);
for (i = 0; i < brd->info->nports; i++) {
info = &brd->ports[i];
irq = regs[9] & 0xF000;
irq = irq | (irq >> 4);
if (irq != (regs[9] & 0xFF00))
- return MXSER_ERR_IRQ_CONFLIT;
+ goto err_irqconflict;
} else if (brd->info->nports == 4) {
irq = regs[9] & 0xF000;
irq = irq | (irq >> 4);
irq = irq | (irq >> 8);
if (irq != regs[9])
- return MXSER_ERR_IRQ_CONFLIT;
+ goto err_irqconflict;
} else if (brd->info->nports == 8) {
irq = regs[9] & 0xF000;
irq = irq | (irq >> 4);
irq = irq | (irq >> 8);
if ((irq != regs[9]) || (irq != regs[10]))
- return MXSER_ERR_IRQ_CONFLIT;
+ goto err_irqconflict;
}
- if (!irq)
- return MXSER_ERR_IRQ;
+ if (!irq) {
+ printk(KERN_ERR "mxser: interrupt number unset\n");
+ return -EIO;
+ }
brd->irq = ((int)(irq & 0xF000) >> 12);
for (i = 0; i < 8; i++)
brd->ports[i].ioaddr = (int) regs[i + 1] & 0xFFF8;
- if ((regs[12] & 0x80) == 0)
- return MXSER_ERR_VECTOR;
+ if ((regs[12] & 0x80) == 0) {
+ printk(KERN_ERR "mxser: invalid interrupt vector\n");
+ return -EIO;
+ }
brd->vector = (int)regs[11]; /* interrupt vector */
if (id == 1)
brd->vector_mask = 0x00FF;
else
brd->uart_type = PORT_16450;
if (!request_region(brd->ports[0].ioaddr, 8 * brd->info->nports,
- "mxser(IO)"))
- return MXSER_ERR_IOADDR;
+ "mxser(IO)")) {
+ printk(KERN_ERR "mxser: can't request ports I/O region: "
+ "0x%.8lx-0x%.8lx\n",
+ brd->ports[0].ioaddr, brd->ports[0].ioaddr +
+ 8 * brd->info->nports - 1);
+ return -EIO;
+ }
if (!request_region(brd->vector, 1, "mxser(vector)")) {
release_region(brd->ports[0].ioaddr, 8 * brd->info->nports);
- return MXSER_ERR_VECTOR;
+ printk(KERN_ERR "mxser: can't request interrupt vector region: "
+ "0x%.8lx-0x%.8lx\n",
+ brd->ports[0].ioaddr, brd->ports[0].ioaddr +
+ 8 * brd->info->nports - 1);
+ return -EIO;
}
return brd->info->nports;
+
+err_irqconflict:
+ printk(KERN_ERR "mxser: invalid interrupt number\n");
+ return -EIO;
}
static int __devinit mxser_probe(struct pci_dev *pdev,
break;
if (i >= MXSER_BOARDS) {
- printk(KERN_ERR "Too many Smartio/Industio family boards found "
- "(maximum %d), board not configured\n", MXSER_BOARDS);
+ dev_err(&pdev->dev, "too many boards found (maximum %d), board "
+ "not configured\n", MXSER_BOARDS);
goto err;
}
brd = &mxser_boards[i];
brd->idx = i * MXSER_PORTS_PER_BOARD;
- printk(KERN_INFO "Found MOXA %s board (BusNo=%d, DevNo=%d)\n",
+ dev_info(&pdev->dev, "found MOXA %s board (BusNo=%d, DevNo=%d)\n",
mxser_cards[ent->driver_data].name,
pdev->bus->number, PCI_SLOT(pdev->devfn));
retval = pci_enable_device(pdev);
if (retval) {
- printk(KERN_ERR "Moxa SmartI/O PCI enable fail !\n");
+ dev_err(&pdev->dev, "PCI enable failed\n");
goto err;
}
static int __init mxser_module_init(void)
{
struct mxser_board *brd;
- unsigned long cap;
- unsigned int i, m, isaloop;
- int retval, b;
-
- pr_debug("Loading module mxser ...\n");
+ unsigned int b, i, m;
+ int retval;
mxvar_sdriver = alloc_tty_driver(MXSER_PORTS + 1);
if (!mxvar_sdriver)
goto err_put;
}
- mxvar_diagflag = 0;
-
- m = 0;
/* Start finding ISA boards here */
- for (isaloop = 0; isaloop < 2; isaloop++)
- for (b = 0; b < MXSER_BOARDS && m < MXSER_BOARDS; b++) {
- if (!isaloop)
- cap = mxserBoardCAP[b]; /* predefined */
- else
- cap = ioaddr[b]; /* module param */
-
- if (!cap)
- continue;
+ for (m = 0, b = 0; b < MXSER_BOARDS; b++) {
+ if (!ioaddr[b])
+ continue;
+
+ brd = &mxser_boards[m];
+ retval = mxser_get_ISA_conf(!ioaddr[b], brd);
+ if (retval <= 0) {
+ brd->info = NULL;
+ continue;
+ }
- brd = &mxser_boards[m];
- retval = mxser_get_ISA_conf(cap, brd);
-
- if (retval != 0)
- printk(KERN_INFO "Found MOXA %s board "
- "(CAP=0x%x)\n",
- brd->info->name, ioaddr[b]);
-
- if (retval <= 0) {
- if (retval == MXSER_ERR_IRQ)
- printk(KERN_ERR "Invalid interrupt "
- "number, board not "
- "configured\n");
- else if (retval == MXSER_ERR_IRQ_CONFLIT)
- printk(KERN_ERR "Invalid interrupt "
- "number, board not "
- "configured\n");
- else if (retval == MXSER_ERR_VECTOR)
- printk(KERN_ERR "Invalid interrupt "
- "vector, board not "
- "configured\n");
- else if (retval == MXSER_ERR_IOADDR)
- printk(KERN_ERR "Invalid I/O address, "
- "board not configured\n");
-
- brd->info = NULL;
- continue;
- }
+ printk(KERN_INFO "mxser: found MOXA %s board (CAP=0x%lx)\n",
+ brd->info->name, ioaddr[b]);
- /* mxser_initbrd will hook ISR. */
- if (mxser_initbrd(brd, NULL) < 0) {
- brd->info = NULL;
- continue;
- }
+ /* mxser_initbrd will hook ISR. */
+ if (mxser_initbrd(brd, NULL) < 0) {
+ brd->info = NULL;
+ continue;
+ }
- brd->idx = m * MXSER_PORTS_PER_BOARD;
- for (i = 0; i < brd->info->nports; i++)
- tty_register_device(mxvar_sdriver, brd->idx + i,
- NULL);
+ brd->idx = m * MXSER_PORTS_PER_BOARD;
+ for (i = 0; i < brd->info->nports; i++)
+ tty_register_device(mxvar_sdriver, brd->idx + i, NULL);
- m++;
- }
+ m++;
+ }
retval = pci_register_driver(&mxser_driver);
if (retval) {
- printk(KERN_ERR "Can't register pci driver\n");
+ printk(KERN_ERR "mxser: can't register pci driver\n");
if (!m) {
retval = -ENODEV;
goto err_unr;
} /* else: we have some ISA cards under control */
}
- pr_debug("Done.\n");
-
return 0;
err_unr:
tty_unregister_driver(mxvar_sdriver);
{
unsigned int i, j;
- pr_debug("Unloading module mxser ...\n");
-
pci_unregister_driver(&mxser_driver);
for (i = 0; i < MXSER_BOARDS; i++) /* ISA remains */
for (i = 0; i < MXSER_BOARDS; i++)
if (mxser_boards[i].info != NULL)
mxser_release_res(&mxser_boards[i], NULL, 1);
-
- pr_debug("Done.\n");
}
module_init(mxser_module_init);
static ssize_t flash_read(struct file *file, char __user *buf, size_t size,
loff_t *ppos)
{
- unsigned long p = *ppos;
- unsigned int count = size;
- int ret = 0;
+ ssize_t ret;
if (flashdebug)
printk(KERN_DEBUG "flash_read: flash_read: offset=0x%lX, "
"buffer=%p, count=0x%X.\n", p, buf, count);
+ /*
+ * We now lock against reads and writes. --rmk
+ */
+ if (mutex_lock_interruptible(&nwflash_mutex))
+ return -ERESTARTSYS;
- if (count)
- ret = -ENXIO;
-
- if (p < gbFlashSize) {
- if (count > gbFlashSize - p)
- count = gbFlashSize - p;
+ ret = simple_read_from_buffer(buf, size, ppos, FLASH_BASE, gbFlashSize);
+ mutex_unlock(&nwflash_mutex);
- /*
- * We now lock against reads and writes. --rmk
- */
- if (mutex_lock_interruptible(&nwflash_mutex))
- return -ERESTARTSYS;
-
- ret = copy_to_user(buf, (void *)(FLASH_BASE + p), count);
- if (ret == 0) {
- ret = count;
- *ppos += count;
- } else
- ret = -EFAULT;
- mutex_unlock(&nwflash_mutex);
- }
return ret;
}
#include <linux/major.h>
#include <linux/ppdev.h>
#include <linux/smp_lock.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#define PP_VERSION "ppdev: user-space parallel port driver"
#define CHRDEV "ppdev"
return IEEE1284_PH_FWD_IDLE;
}
-static int pp_ioctl(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg)
+static int pp_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
- unsigned int minor = iminor(inode);
+ unsigned int minor = iminor(file->f_path.dentry->d_inode);
struct pp_struct *pp = file->private_data;
struct parport * port;
void __user *argp = (void __user *)arg;
return 0;
}
+static long pp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ long ret;
+ lock_kernel();
+ ret = pp_do_ioctl(file, cmd, arg);
+ unlock_kernel();
+ return ret;
+}
+
static int pp_open (struct inode * inode, struct file * file)
{
unsigned int minor = iminor(inode);
.read = pp_read,
.write = pp_write,
.poll = pp_poll,
- .ioctl = pp_ioctl,
+ .unlocked_ioctl = pp_ioctl,
.open = pp_open,
.release = pp_release,
};
static void rio_hungup(void *ptr);
static void rio_close(void *ptr);
static int rio_chars_in_buffer(void *ptr);
-static int rio_fw_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
+static long rio_fw_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
static int rio_init_drivers(void);
static void my_hd(void *addr, int len);
static const struct file_operations rio_fw_fops = {
.owner = THIS_MODULE,
- .ioctl = rio_fw_ioctl,
+ .unlocked_ioctl = rio_fw_ioctl,
};
static struct miscdevice rio_fw_device = {
-static int rio_fw_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+static long rio_fw_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
int rc = 0;
func_enter();
/* The "dev" argument isn't used. */
+ lock_kernel();
rc = riocontrol(p, 0, cmd, arg, capable(CAP_SYS_ADMIN));
+ unlock_kernel();
func_exit();
return rc;
static int sx_chars_in_buffer(void *ptr);
static int sx_init_board(struct sx_board *board);
static int sx_init_portstructs(int nboards, int nports);
-static int sx_fw_ioctl(struct inode *inode, struct file *filp,
- unsigned int cmd, unsigned long arg);
+static long sx_fw_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg);
static int sx_init_drivers(void);
static struct tty_driver *sx_driver;
static const struct file_operations sx_fw_fops = {
.owner = THIS_MODULE,
- .ioctl = sx_fw_ioctl,
+ .unlocked_ioctl = sx_fw_ioctl,
};
static struct miscdevice sx_fw_device = {
}
#endif
-static int sx_fw_ioctl(struct inode *inode, struct file *filp,
- unsigned int cmd, unsigned long arg)
+static long sx_fw_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
{
- int rc = 0;
+ long rc = 0;
int __user *descr = (int __user *)arg;
int i;
static struct sx_board *board = NULL;
func_enter();
-#if 0
- /* Removed superuser check: Sysops can use the permissions on the device
- file to restrict access. Recommendation: Root only. (root.root 600) */
- if (!capable(CAP_SYS_ADMIN)) {
+ if (!capable(CAP_SYS_RAWIO))
return -EPERM;
- }
-#endif
+
+ lock_kernel();
sx_dprintk(SX_DEBUG_FIRMWARE, "IOCTL %x: %lx\n", cmd, arg);
for (i = 0; i < SX_NBOARDS; i++)
sx_dprintk(SX_DEBUG_FIRMWARE, "<%x> ", boards[i].flags);
sx_dprintk(SX_DEBUG_FIRMWARE, "\n");
+ unlock_kernel();
return -EIO;
}
switch (cmd) {
case SXIO_SET_BOARD:
sx_dprintk(SX_DEBUG_FIRMWARE, "set board to %ld\n", arg);
+ rc = -EIO;
if (arg >= SX_NBOARDS)
- return -EIO;
+ break;
sx_dprintk(SX_DEBUG_FIRMWARE, "not out of range\n");
if (!(boards[arg].flags & SX_BOARD_PRESENT))
- return -EIO;
+ break;
sx_dprintk(SX_DEBUG_FIRMWARE, ".. and present!\n");
board = &boards[arg];
+ rc = 0;
+ /* FIXME: And this does ... nothing?? */
break;
case SXIO_GET_TYPE:
rc = -ENOENT; /* If we manage to miss one, return error. */
rc = SX_TYPE_SI;
if (IS_EISA_BOARD(board))
rc = SX_TYPE_SI;
- sx_dprintk(SX_DEBUG_FIRMWARE, "returning type= %d\n", rc);
+ sx_dprintk(SX_DEBUG_FIRMWARE, "returning type= %ld\n", rc);
break;
case SXIO_DO_RAMTEST:
if (sx_initialized) /* Already initialized: better not ramtest the board. */
rc = do_memtest(board, 0, 0x7ff8);
/* if (!rc) rc = do_memtest_w (board, 0, 0x7ff8); */
}
- sx_dprintk(SX_DEBUG_FIRMWARE, "returning memtest result= %d\n",
- rc);
+ sx_dprintk(SX_DEBUG_FIRMWARE,
+ "returning memtest result= %ld\n", rc);
break;
case SXIO_DOWNLOAD:
- if (sx_initialized) /* Already initialized */
- return -EEXIST;
- if (!sx_reset(board))
- return -EIO;
+ if (sx_initialized) {/* Already initialized */
+ rc = -EEXIST;
+ break;
+ }
+ if (!sx_reset(board)) {
+ rc = -EIO;
+ break;
+ }
sx_dprintk(SX_DEBUG_INIT, "reset the board...\n");
tmp = kmalloc(SX_CHUNK_SIZE, GFP_USER);
- if (!tmp)
- return -ENOMEM;
+ if (!tmp) {
+ rc = -ENOMEM;
+ break;
+ }
+ /* FIXME: check returns */
get_user(nbytes, descr++);
get_user(offset, descr++);
get_user(data, descr++);
(i + SX_CHUNK_SIZE > nbytes) ?
nbytes - i : SX_CHUNK_SIZE)) {
kfree(tmp);
- return -EFAULT;
+ rc = -EFAULT;
+ break;
}
memcpy_toio(board->base2 + offset + i, tmp,
(i + SX_CHUNK_SIZE > nbytes) ?
rc = sx_nports;
break;
case SXIO_INIT:
- if (sx_initialized) /* Already initialized */
- return -EEXIST;
+ if (sx_initialized) { /* Already initialized */
+ rc = -EEXIST;
+ break;
+ }
/* This is not allowed until all boards are initialized... */
for (i = 0; i < SX_NBOARDS; i++) {
if ((boards[i].flags & SX_BOARD_PRESENT) &&
- !(boards[i].flags & SX_BOARD_INITIALIZED))
- return -EIO;
+ !(boards[i].flags & SX_BOARD_INITIALIZED)) {
+ rc = -EIO;
+ break;
+ }
}
for (i = 0; i < SX_NBOARDS; i++)
if (!(boards[i].flags & SX_BOARD_PRESENT))
rc = sx_nports;
break;
default:
- printk(KERN_WARNING "Unknown ioctl on firmware device (%x).\n",
- cmd);
+ rc = -ENOTTY;
break;
}
+ unlock_kernel();
func_exit();
return rc;
}
p->signal->tty = NULL;
spin_unlock_irq(&p->sighand->siglock);
}
-EXPORT_SYMBOL(proc_clear_tty);
/* Called under the sighand lock */
/* The operations for our console. */
static struct hv_ops virtio_cons;
+/* The hvc device */
+static struct hvc_struct *hvc;
+
/*D:310 The put_chars() callback is pretty straightforward.
*
* We turn the characters into a scatter-gather list, add it to the output
return hvc_instantiate(0, 0, &virtio_cons);
}
+/*
+ * we support only one console, the hvc struct is a global var
+ * There is no need to do anything
+ */
+static int notifier_add_vio(struct hvc_struct *hp, int data)
+{
+ hp->irq_requested = 1;
+ return 0;
+}
+
+static void notifier_del_vio(struct hvc_struct *hp, int data)
+{
+ hp->irq_requested = 0;
+}
+
+static void hvc_handle_input(struct virtqueue *vq)
+{
+ if (hvc_poll(hvc))
+ hvc_kick();
+}
+
/*D:370 Once we're further in boot, we get probed like any other virtio device.
* At this stage we set up the output virtqueue.
*
static int __devinit virtcons_probe(struct virtio_device *dev)
{
int err;
- struct hvc_struct *hvc;
vdev = dev;
/* Find the input queue. */
/* FIXME: This is why we want to wean off hvc: we do nothing
* when input comes in. */
- in_vq = vdev->config->find_vq(vdev, 0, NULL);
+ in_vq = vdev->config->find_vq(vdev, 0, hvc_handle_input);
if (IS_ERR(in_vq)) {
err = PTR_ERR(in_vq);
goto free;
/* Start using the new console output. */
virtio_cons.get_chars = get_chars;
virtio_cons.put_chars = put_chars;
+ virtio_cons.notifier_add = notifier_add_vio;
+ virtio_cons.notifier_del = notifier_del_vio;
/* The first argument of hvc_alloc() is the virtual console number, so
- * we use zero. The second argument is the interrupt number; we
- * currently leave this as zero: it would be better not to use the
- * hvc mechanism and fix this (FIXME!).
+ * we use zero. The second argument is the parameter for the
+ * notification mechanism (like irq number). We currently leave this
+ * as zero, virtqueues have implicit notifications.
*
* The third argument is a "struct hv_ops" containing the put_chars()
- * and get_chars() pointers. The final argument is the output buffer
- * size: we can do any size, so we put PAGE_SIZE here. */
+ * get_chars(), notifier_add() and notifier_del() pointers.
+ * The final argument is the output buffer size: we can do any size,
+ * so we put PAGE_SIZE here. */
hvc = hvc_alloc(0, 0, &virtio_cons, PAGE_SIZE);
if (IS_ERR(hvc)) {
err = PTR_ERR(hvc);
c = 0xfffd;
tc = c;
} else { /* no utf or alternate charset mode */
- tc = vc->vc_translate[vc->vc_toggle_meta ? (c | 0x80) : c];
+ tc = vc_translate(vc, c);
}
param.c = tc;
#include <linux/mutex.h>
#include <linux/smp_lock.h>
#include <linux/sysctl.h>
-#include <linux/version.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/platform_device.h>
Support for error detection and correction the Intel
Greekcreek/Blackford chipsets.
+config EDAC_I5100
+ tristate "Intel San Clemente MCH"
+ depends on EDAC_MM_EDAC && X86 && PCI
+ help
+ Support for error detection and correction the Intel
+ San Clemente MCH.
+
config EDAC_MPC85XX
tristate "Freescale MPC85xx"
depends on EDAC_MM_EDAC && FSL_SOC && MPC85xx
obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o
obj-$(CONFIG_EDAC_I5000) += i5000_edac.o
+obj-$(CONFIG_EDAC_I5100) += i5100_edac.o
obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o
obj-$(CONFIG_EDAC_E752X) += e752x_edac.o
obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o
#define E752X_REVISION " Ver: 2.0.2 " __DATE__
#define EDAC_MOD_STR "e752x_edac"
+static int report_non_memory_errors;
static int force_function_unhide;
static int sysbus_parity = -1;
#define E752X_BUF_FERR 0x70 /* Memory buffer first error reg (8b) */
#define E752X_BUF_NERR 0x72 /* Memory buffer next error reg (8b) */
#define E752X_BUF_ERRMASK 0x74 /* Memory buffer error mask reg (8b) */
-#define E752X_BUF_SMICMD 0x7A /* Memory buffer SMI command reg (8b) */
+#define E752X_BUF_SMICMD 0x7A /* Memory buffer SMI cmd reg (8b) */
#define E752X_DRAM_FERR 0x80 /* DRAM first error register (16b) */
#define E752X_DRAM_NERR 0x82 /* DRAM next error register (16b) */
#define E752X_DRAM_ERRMASK 0x84 /* DRAM error mask register (8b) */
/* error address register (32b) */
/*
* 31 Reserved
- * 30:2 CE address (64 byte block 34:6)
+ * 30:2 CE address (64 byte block 34:6
* 1 Reserved
* 0 HiLoCS
*/
* 1 Reserved
* 0 HiLoCS
*/
-#define E752X_DRAM_SCRB_ADD 0xA8 /* DRAM first uncorrectable scrub memory */
+#define E752X_DRAM_SCRB_ADD 0xA8 /* DRAM 1st uncorrectable scrub mem */
/* error address register (32b) */
/*
* 31 Reserved
- * 30:2 CE address (64 byte block 34:6)
+ * 30:2 CE address (64 byte block 34:6
* 1 Reserved
* 0 HiLoCS
*/
struct e752x_pvt *pvt = (struct e752x_pvt *)mci->pvt_info;
error_1b = retry_add;
- page = error_1b >> (PAGE_SHIFT - 4); /* convert the addr to 4k page */
- row = pvt->mc_symmetric ? ((page >> 1) & 3) : /* chip select are bits 14 & 13 */
+ page = error_1b >> (PAGE_SHIFT - 4); /* convert the addr to 4k page */
+
+ /* chip select are bits 14 & 13 */
+ row = pvt->mc_symmetric ? ((page >> 1) & 3) :
edac_mc_find_csrow_by_page(mci, page);
+
e752x_mc_printk(mci, KERN_WARNING,
"CE page 0x%lx, row %d : Memory read retry\n",
(long unsigned int)page, row);
}
static char *global_message[11] = {
- "PCI Express C1", "PCI Express C", "PCI Express B1",
- "PCI Express B", "PCI Express A1", "PCI Express A",
- "DMA Controler", "HUB or NS Interface", "System Bus",
- "DRAM Controler", "Internal Buffer"
+ "PCI Express C1",
+ "PCI Express C",
+ "PCI Express B1",
+ "PCI Express B",
+ "PCI Express A1",
+ "PCI Express A",
+ "DMA Controller",
+ "HUB or NS Interface",
+ "System Bus",
+ "DRAM Controller", /* 9th entry */
+ "Internal Buffer"
};
+#define DRAM_ENTRY 9
+
static char *fatal_message[2] = { "Non-Fatal ", "Fatal " };
static void do_global_error(int fatal, u32 errors)
int i;
for (i = 0; i < 11; i++) {
- if (errors & (1 << i))
- e752x_printk(KERN_WARNING, "%sError %s\n",
- fatal_message[fatal], global_message[i]);
+ if (errors & (1 << i)) {
+ /* If the error is from DRAM Controller OR
+ * we are to report ALL errors, then
+ * report the error
+ */
+ if ((i == DRAM_ENTRY) || report_non_memory_errors)
+ e752x_printk(KERN_WARNING, "%sError %s\n",
+ fatal_message[fatal],
+ global_message[i]);
+ }
}
}
struct pci_dev *dev;
pvt->bridge_ck = pci_get_device(PCI_VENDOR_ID_INTEL,
- pvt->dev_info->err_dev, pvt->bridge_ck);
+ pvt->dev_info->err_dev, pvt->bridge_ck);
if (pvt->bridge_ck == NULL)
pvt->bridge_ck = pci_scan_single_device(pdev->bus,
return 1;
}
- dev = pci_get_device(PCI_VENDOR_ID_INTEL, e752x_devs[dev_idx].ctl_dev,
- NULL);
+ dev = pci_get_device(PCI_VENDOR_ID_INTEL,
+ e752x_devs[dev_idx].ctl_dev,
+ NULL);
if (dev == NULL)
goto fail;
module_param(force_function_unhide, int, 0444);
MODULE_PARM_DESC(force_function_unhide, "if BIOS sets Dev0:Fun1 up as hidden:"
- " 1=force unhide and hope BIOS doesn't fight driver for Dev0:Fun1 access");
+ " 1=force unhide and hope BIOS doesn't fight driver for "
+ "Dev0:Fun1 access");
module_param(edac_op_state, int, 0444);
MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
module_param(sysbus_parity, int, 0444);
MODULE_PARM_DESC(sysbus_parity, "0=disable system bus parity checking,"
" 1=enable system bus parity checking, default=auto-detect");
+module_param(report_non_memory_errors, int, 0644);
+MODULE_PARM_DESC(report_non_memory_errors, "0=disable non-memory error "
+ "reporting, 1=enable non-memory error reporting");
return edac_mc_poll_msec;
}
+static int edac_set_poll_msec(const char *val, struct kernel_param *kp)
+{
+ long l;
+ int ret;
+
+ if (!val)
+ return -EINVAL;
+
+ ret = strict_strtol(val, 0, &l);
+ if (ret == -EINVAL || ((int)l != l))
+ return -EINVAL;
+ *((int *)kp->arg) = l;
+
+ /* notify edac_mc engine to reset the poll period */
+ edac_mc_reset_delay_period(l);
+
+ return 0;
+}
+
/* Parameter declarations for above */
module_param(edac_mc_panic_on_ue, int, 0644);
MODULE_PARM_DESC(edac_mc_panic_on_ue, "Panic on uncorrected error: 0=off 1=on");
module_param(edac_mc_log_ce, int, 0644);
MODULE_PARM_DESC(edac_mc_log_ce,
"Log correctable error to console: 0=off 1=on");
-module_param(edac_mc_poll_msec, int, 0644);
+module_param_call(edac_mc_poll_msec, edac_set_poll_msec, param_get_int,
+ &edac_mc_poll_msec, 0644);
MODULE_PARM_DESC(edac_mc_poll_msec, "Polling period in milliseconds");
/*
-/*
- * /sys/devices/system/edac/mc;
- * data structures and methods
- */
-static ssize_t memctrl_int_show(void *ptr, char *buffer)
-{
- int *value = (int *)ptr;
- return sprintf(buffer, "%u\n", *value);
-}
-
static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count)
{
int *value = (int *)ptr;
return count;
}
-/*
- * mc poll_msec time value
- */
-static ssize_t poll_msec_int_store(void *ptr, const char *buffer, size_t count)
-{
- int *value = (int *)ptr;
-
- if (isdigit(*buffer)) {
- *value = simple_strtoul(buffer, NULL, 0);
-
- /* notify edac_mc engine to reset the poll period */
- edac_mc_reset_delay_period(*value);
- }
-
- return count;
-}
-
/* EDAC sysfs CSROW data structures and methods
*/
static ssize_t channel_dimm_label_show(struct csrow_info *csrow,
char *data, int channel)
{
- return snprintf(data, EDAC_MC_LABEL_LEN, "%s",
+ /* if field has not been initialized, there is nothing to send */
+ if (!csrow->channels[channel].label[0])
+ return 0;
+
+ return snprintf(data, EDAC_MC_LABEL_LEN, "%s\n",
csrow->channels[channel].label);
}
.default_attrs = (struct attribute **)mci_attr,
};
-/* show/store, tables, etc for the MC kset */
-
-
-struct memctrl_dev_attribute {
- struct attribute attr;
- void *value;
- ssize_t(*show) (void *, char *);
- ssize_t(*store) (void *, const char *, size_t);
-};
-
-/* Set of show/store abstract level functions for memory control object */
-static ssize_t memctrl_dev_show(struct kobject *kobj,
- struct attribute *attr, char *buffer)
-{
- struct memctrl_dev_attribute *memctrl_dev;
- memctrl_dev = (struct memctrl_dev_attribute *)attr;
-
- if (memctrl_dev->show)
- return memctrl_dev->show(memctrl_dev->value, buffer);
-
- return -EIO;
-}
-
-static ssize_t memctrl_dev_store(struct kobject *kobj, struct attribute *attr,
- const char *buffer, size_t count)
-{
- struct memctrl_dev_attribute *memctrl_dev;
- memctrl_dev = (struct memctrl_dev_attribute *)attr;
-
- if (memctrl_dev->store)
- return memctrl_dev->store(memctrl_dev->value, buffer, count);
-
- return -EIO;
-}
-
-static struct sysfs_ops memctrlfs_ops = {
- .show = memctrl_dev_show,
- .store = memctrl_dev_store
-};
-
-#define MEMCTRL_ATTR(_name, _mode, _show, _store) \
-static struct memctrl_dev_attribute attr_##_name = { \
- .attr = {.name = __stringify(_name), .mode = _mode }, \
- .value = &_name, \
- .show = _show, \
- .store = _store, \
-};
-
-#define MEMCTRL_STRING_ATTR(_name, _data, _mode, _show, _store) \
-static struct memctrl_dev_attribute attr_##_name = { \
- .attr = {.name = __stringify(_name), .mode = _mode }, \
- .value = _data, \
- .show = _show, \
- .store = _store, \
-};
-
-/* csrow<id> control files */
-MEMCTRL_ATTR(edac_mc_panic_on_ue,
- S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store);
-
-MEMCTRL_ATTR(edac_mc_log_ue,
- S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store);
-
-MEMCTRL_ATTR(edac_mc_log_ce,
- S_IRUGO | S_IWUSR, memctrl_int_show, memctrl_int_store);
-
-MEMCTRL_ATTR(edac_mc_poll_msec,
- S_IRUGO | S_IWUSR, memctrl_int_show, poll_msec_int_store);
-
-/* Base Attributes of the memory ECC object */
-static struct memctrl_dev_attribute *memctrl_attr[] = {
- &attr_edac_mc_panic_on_ue,
- &attr_edac_mc_log_ue,
- &attr_edac_mc_log_ce,
- &attr_edac_mc_poll_msec,
- NULL,
-};
-
-
-/* the ktype for the mc_kset internal kobj */
-static struct kobj_type ktype_mc_set_attribs = {
- .sysfs_ops = &memctrlfs_ops,
- .default_attrs = (struct attribute **)memctrl_attr,
-};
-
/* EDAC memory controller sysfs kset:
* /sys/devices/system/edac/mc
*/
-static struct kset mc_kset = {
- .kobj = {.ktype = &ktype_mc_set_attribs },
-};
-
+static struct kset *mc_kset;
/*
* edac_mc_register_sysfs_main_kobj
}
/* this instance become part of the mc_kset */
- kobj_mci->kset = &mc_kset;
+ kobj_mci->kset = mc_kset;
/* register the mc<id> kobject to the mc_kset */
err = kobject_init_and_add(kobj_mci, &ktype_mci, NULL,
}
/* Init the MC's kobject */
- kobject_set_name(&mc_kset.kobj, "mc");
- mc_kset.kobj.parent = &edac_class->kset.kobj;
-
- /* register the mc_kset */
- err = kset_register(&mc_kset);
- if (err) {
+ mc_kset = kset_create_and_add("mc", NULL, &edac_class->kset.kobj);
+ if (!mc_kset) {
+ err = -ENOMEM;
debugf1("%s() Failed to register '.../edac/mc'\n", __func__);
goto fail_out;
}
*/
void edac_sysfs_teardown_mc_kset(void)
{
- kset_unregister(&mc_kset);
+ kset_unregister(mc_kset);
}
static atomic_t pci_parity_count = ATOMIC_INIT(0);
static atomic_t pci_nonparity_count = ATOMIC_INIT(0);
-static struct kobject edac_pci_top_main_kobj;
+static struct kobject *edac_pci_top_main_kobj;
static atomic_t edac_pci_sysfs_refcount = ATOMIC_INIT(0);
/* getter functions for the data variables */
pci = to_instance(kobj);
/* decrement reference count on top main kobj */
- kobject_put(&edac_pci_top_main_kobj);
+ kobject_put(edac_pci_top_main_kobj);
kfree(pci); /* Free the control struct */
}
* track the number of PCI instances we have, and thus nest
* properly on keeping the module loaded
*/
- main_kobj = kobject_get(&edac_pci_top_main_kobj);
+ main_kobj = kobject_get(edac_pci_top_main_kobj);
if (!main_kobj) {
err = -ENODEV;
goto error_out;
/* And now register this new kobject under the main kobj */
err = kobject_init_and_add(&pci->kobj, &ktype_pci_instance,
- &edac_pci_top_main_kobj, "pci%d", idx);
+ edac_pci_top_main_kobj, "pci%d", idx);
if (err != 0) {
debugf2("%s() failed to register instance pci%d\n",
__func__, idx);
- kobject_put(&edac_pci_top_main_kobj);
+ kobject_put(edac_pci_top_main_kobj);
goto error_out;
}
*/
static void edac_pci_release_main_kobj(struct kobject *kobj)
{
-
debugf0("%s() here to module_put(THIS_MODULE)\n", __func__);
+ kfree(kobj);
+
/* last reference to top EDAC PCI kobject has been removed,
* NOW release our ref count on the core module
*/
goto decrement_count_fail;
}
+ edac_pci_top_main_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
+ if (!edac_pci_top_main_kobj) {
+ debugf1("Failed to allocate\n");
+ err = -ENOMEM;
+ goto kzalloc_fail;
+ }
+
/* Instanstiate the pci object */
- err = kobject_init_and_add(&edac_pci_top_main_kobj, &ktype_edac_pci_main_kobj,
+ err = kobject_init_and_add(edac_pci_top_main_kobj,
+ &ktype_edac_pci_main_kobj,
&edac_class->kset.kobj, "pci");
if (err) {
debugf1("Failed to register '.../edac/pci'\n");
* for EDAC PCI, then edac_pci_main_kobj_teardown()
* must be used, for resources to be cleaned up properly
*/
- kobject_uevent(&edac_pci_top_main_kobj, KOBJ_ADD);
+ kobject_uevent(edac_pci_top_main_kobj, KOBJ_ADD);
debugf1("Registered '.../edac/pci' kobject\n");
return 0;
/* Error unwind statck */
kobject_init_and_add_fail:
+ kfree(edac_pci_top_main_kobj);
+
+kzalloc_fail:
module_put(THIS_MODULE);
decrement_count_fail:
if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) {
debugf0("%s() called kobject_put on main kobj\n",
__func__);
- kobject_put(&edac_pci_top_main_kobj);
+ kobject_put(edac_pci_top_main_kobj);
}
}
--- /dev/null
+/*
+ * Intel 5100 Memory Controllers kernel module
+ *
+ * This file may be distributed under the terms of the
+ * GNU General Public License.
+ *
+ * This module is based on the following document:
+ *
+ * Intel 5100X Chipset Memory Controller Hub (MCH) - Datasheet
+ * http://download.intel.com/design/chipsets/datashts/318378.pdf
+ *
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/slab.h>
+#include <linux/edac.h>
+#include <linux/delay.h>
+#include <linux/mmzone.h>
+
+#include "edac_core.h"
+
+/* register addresses */
+
+/* device 16, func 1 */
+#define I5100_MC 0x40 /* Memory Control Register */
+#define I5100_MS 0x44 /* Memory Status Register */
+#define I5100_SPDDATA 0x48 /* Serial Presence Detect Status Reg */
+#define I5100_SPDCMD 0x4c /* Serial Presence Detect Command Reg */
+#define I5100_TOLM 0x6c /* Top of Low Memory */
+#define I5100_MIR0 0x80 /* Memory Interleave Range 0 */
+#define I5100_MIR1 0x84 /* Memory Interleave Range 1 */
+#define I5100_AMIR_0 0x8c /* Adjusted Memory Interleave Range 0 */
+#define I5100_AMIR_1 0x90 /* Adjusted Memory Interleave Range 1 */
+#define I5100_FERR_NF_MEM 0xa0 /* MC First Non Fatal Errors */
+#define I5100_FERR_NF_MEM_M16ERR_MASK (1 << 16)
+#define I5100_FERR_NF_MEM_M15ERR_MASK (1 << 15)
+#define I5100_FERR_NF_MEM_M14ERR_MASK (1 << 14)
+#define I5100_FERR_NF_MEM_M12ERR_MASK (1 << 12)
+#define I5100_FERR_NF_MEM_M11ERR_MASK (1 << 11)
+#define I5100_FERR_NF_MEM_M10ERR_MASK (1 << 10)
+#define I5100_FERR_NF_MEM_M6ERR_MASK (1 << 6)
+#define I5100_FERR_NF_MEM_M5ERR_MASK (1 << 5)
+#define I5100_FERR_NF_MEM_M4ERR_MASK (1 << 4)
+#define I5100_FERR_NF_MEM_M1ERR_MASK 1
+#define I5100_FERR_NF_MEM_ANY_MASK \
+ (I5100_FERR_NF_MEM_M16ERR_MASK | \
+ I5100_FERR_NF_MEM_M15ERR_MASK | \
+ I5100_FERR_NF_MEM_M14ERR_MASK | \
+ I5100_FERR_NF_MEM_M12ERR_MASK | \
+ I5100_FERR_NF_MEM_M11ERR_MASK | \
+ I5100_FERR_NF_MEM_M10ERR_MASK | \
+ I5100_FERR_NF_MEM_M6ERR_MASK | \
+ I5100_FERR_NF_MEM_M5ERR_MASK | \
+ I5100_FERR_NF_MEM_M4ERR_MASK | \
+ I5100_FERR_NF_MEM_M1ERR_MASK)
+#define I5100_NERR_NF_MEM 0xa4 /* MC Next Non-Fatal Errors */
+#define I5100_EMASK_MEM 0xa8 /* MC Error Mask Register */
+
+/* device 21 and 22, func 0 */
+#define I5100_MTR_0 0x154 /* Memory Technology Registers 0-3 */
+#define I5100_DMIR 0x15c /* DIMM Interleave Range */
+#define I5100_VALIDLOG 0x18c /* Valid Log Markers */
+#define I5100_NRECMEMA 0x190 /* Non-Recoverable Memory Error Log Reg A */
+#define I5100_NRECMEMB 0x194 /* Non-Recoverable Memory Error Log Reg B */
+#define I5100_REDMEMA 0x198 /* Recoverable Memory Data Error Log Reg A */
+#define I5100_REDMEMB 0x19c /* Recoverable Memory Data Error Log Reg B */
+#define I5100_RECMEMA 0x1a0 /* Recoverable Memory Error Log Reg A */
+#define I5100_RECMEMB 0x1a4 /* Recoverable Memory Error Log Reg B */
+#define I5100_MTR_4 0x1b0 /* Memory Technology Registers 4,5 */
+
+/* bit field accessors */
+
+static inline u32 i5100_mc_errdeten(u32 mc)
+{
+ return mc >> 5 & 1;
+}
+
+static inline u16 i5100_spddata_rdo(u16 a)
+{
+ return a >> 15 & 1;
+}
+
+static inline u16 i5100_spddata_sbe(u16 a)
+{
+ return a >> 13 & 1;
+}
+
+static inline u16 i5100_spddata_busy(u16 a)
+{
+ return a >> 12 & 1;
+}
+
+static inline u16 i5100_spddata_data(u16 a)
+{
+ return a & ((1 << 8) - 1);
+}
+
+static inline u32 i5100_spdcmd_create(u32 dti, u32 ckovrd, u32 sa, u32 ba,
+ u32 data, u32 cmd)
+{
+ return ((dti & ((1 << 4) - 1)) << 28) |
+ ((ckovrd & 1) << 27) |
+ ((sa & ((1 << 3) - 1)) << 24) |
+ ((ba & ((1 << 8) - 1)) << 16) |
+ ((data & ((1 << 8) - 1)) << 8) |
+ (cmd & 1);
+}
+
+static inline u16 i5100_tolm_tolm(u16 a)
+{
+ return a >> 12 & ((1 << 4) - 1);
+}
+
+static inline u16 i5100_mir_limit(u16 a)
+{
+ return a >> 4 & ((1 << 12) - 1);
+}
+
+static inline u16 i5100_mir_way1(u16 a)
+{
+ return a >> 1 & 1;
+}
+
+static inline u16 i5100_mir_way0(u16 a)
+{
+ return a & 1;
+}
+
+static inline u32 i5100_ferr_nf_mem_chan_indx(u32 a)
+{
+ return a >> 28 & 1;
+}
+
+static inline u32 i5100_ferr_nf_mem_any(u32 a)
+{
+ return a & I5100_FERR_NF_MEM_ANY_MASK;
+}
+
+static inline u32 i5100_nerr_nf_mem_any(u32 a)
+{
+ return i5100_ferr_nf_mem_any(a);
+}
+
+static inline u32 i5100_dmir_limit(u32 a)
+{
+ return a >> 16 & ((1 << 11) - 1);
+}
+
+static inline u32 i5100_dmir_rank(u32 a, u32 i)
+{
+ return a >> (4 * i) & ((1 << 2) - 1);
+}
+
+static inline u16 i5100_mtr_present(u16 a)
+{
+ return a >> 10 & 1;
+}
+
+static inline u16 i5100_mtr_ethrottle(u16 a)
+{
+ return a >> 9 & 1;
+}
+
+static inline u16 i5100_mtr_width(u16 a)
+{
+ return a >> 8 & 1;
+}
+
+static inline u16 i5100_mtr_numbank(u16 a)
+{
+ return a >> 6 & 1;
+}
+
+static inline u16 i5100_mtr_numrow(u16 a)
+{
+ return a >> 2 & ((1 << 2) - 1);
+}
+
+static inline u16 i5100_mtr_numcol(u16 a)
+{
+ return a & ((1 << 2) - 1);
+}
+
+
+static inline u32 i5100_validlog_redmemvalid(u32 a)
+{
+ return a >> 2 & 1;
+}
+
+static inline u32 i5100_validlog_recmemvalid(u32 a)
+{
+ return a >> 1 & 1;
+}
+
+static inline u32 i5100_validlog_nrecmemvalid(u32 a)
+{
+ return a & 1;
+}
+
+static inline u32 i5100_nrecmema_merr(u32 a)
+{
+ return a >> 15 & ((1 << 5) - 1);
+}
+
+static inline u32 i5100_nrecmema_bank(u32 a)
+{
+ return a >> 12 & ((1 << 3) - 1);
+}
+
+static inline u32 i5100_nrecmema_rank(u32 a)
+{
+ return a >> 8 & ((1 << 3) - 1);
+}
+
+static inline u32 i5100_nrecmema_dm_buf_id(u32 a)
+{
+ return a & ((1 << 8) - 1);
+}
+
+static inline u32 i5100_nrecmemb_cas(u32 a)
+{
+ return a >> 16 & ((1 << 13) - 1);
+}
+
+static inline u32 i5100_nrecmemb_ras(u32 a)
+{
+ return a & ((1 << 16) - 1);
+}
+
+static inline u32 i5100_redmemb_ecc_locator(u32 a)
+{
+ return a & ((1 << 18) - 1);
+}
+
+static inline u32 i5100_recmema_merr(u32 a)
+{
+ return i5100_nrecmema_merr(a);
+}
+
+static inline u32 i5100_recmema_bank(u32 a)
+{
+ return i5100_nrecmema_bank(a);
+}
+
+static inline u32 i5100_recmema_rank(u32 a)
+{
+ return i5100_nrecmema_rank(a);
+}
+
+static inline u32 i5100_recmema_dm_buf_id(u32 a)
+{
+ return i5100_nrecmema_dm_buf_id(a);
+}
+
+static inline u32 i5100_recmemb_cas(u32 a)
+{
+ return i5100_nrecmemb_cas(a);
+}
+
+static inline u32 i5100_recmemb_ras(u32 a)
+{
+ return i5100_nrecmemb_ras(a);
+}
+
+/* some generic limits */
+#define I5100_MAX_RANKS_PER_CTLR 6
+#define I5100_MAX_CTLRS 2
+#define I5100_MAX_RANKS_PER_DIMM 4
+#define I5100_DIMM_ADDR_LINES (6 - 3) /* 64 bits / 8 bits per byte */
+#define I5100_MAX_DIMM_SLOTS_PER_CTLR 4
+#define I5100_MAX_RANK_INTERLEAVE 4
+#define I5100_MAX_DMIRS 5
+
+struct i5100_priv {
+ /* ranks on each dimm -- 0 maps to not present -- obtained via SPD */
+ int dimm_numrank[I5100_MAX_CTLRS][I5100_MAX_DIMM_SLOTS_PER_CTLR];
+
+ /*
+ * mainboard chip select map -- maps i5100 chip selects to
+ * DIMM slot chip selects. In the case of only 4 ranks per
+ * controller, the mapping is fairly obvious but not unique.
+ * we map -1 -> NC and assume both controllers use the same
+ * map...
+ *
+ */
+ int dimm_csmap[I5100_MAX_DIMM_SLOTS_PER_CTLR][I5100_MAX_RANKS_PER_DIMM];
+
+ /* memory interleave range */
+ struct {
+ u64 limit;
+ unsigned way[2];
+ } mir[I5100_MAX_CTLRS];
+
+ /* adjusted memory interleave range register */
+ unsigned amir[I5100_MAX_CTLRS];
+
+ /* dimm interleave range */
+ struct {
+ unsigned rank[I5100_MAX_RANK_INTERLEAVE];
+ u64 limit;
+ } dmir[I5100_MAX_CTLRS][I5100_MAX_DMIRS];
+
+ /* memory technology registers... */
+ struct {
+ unsigned present; /* 0 or 1 */
+ unsigned ethrottle; /* 0 or 1 */
+ unsigned width; /* 4 or 8 bits */
+ unsigned numbank; /* 2 or 3 lines */
+ unsigned numrow; /* 13 .. 16 lines */
+ unsigned numcol; /* 11 .. 12 lines */
+ } mtr[I5100_MAX_CTLRS][I5100_MAX_RANKS_PER_CTLR];
+
+ u64 tolm; /* top of low memory in bytes */
+ unsigned ranksperctlr; /* number of ranks per controller */
+
+ struct pci_dev *mc; /* device 16 func 1 */
+ struct pci_dev *ch0mm; /* device 21 func 0 */
+ struct pci_dev *ch1mm; /* device 22 func 0 */
+};
+
+/* map a rank/ctlr to a slot number on the mainboard */
+static int i5100_rank_to_slot(const struct mem_ctl_info *mci,
+ int ctlr, int rank)
+{
+ const struct i5100_priv *priv = mci->pvt_info;
+ int i;
+
+ for (i = 0; i < I5100_MAX_DIMM_SLOTS_PER_CTLR; i++) {
+ int j;
+ const int numrank = priv->dimm_numrank[ctlr][i];
+
+ for (j = 0; j < numrank; j++)
+ if (priv->dimm_csmap[i][j] == rank)
+ return i * 2 + ctlr;
+ }
+
+ return -1;
+}
+
+static const char *i5100_err_msg(unsigned err)
+{
+ static const char *merrs[] = {
+ "unknown", /* 0 */
+ "uncorrectable data ECC on replay", /* 1 */
+ "unknown", /* 2 */
+ "unknown", /* 3 */
+ "aliased uncorrectable demand data ECC", /* 4 */
+ "aliased uncorrectable spare-copy data ECC", /* 5 */
+ "aliased uncorrectable patrol data ECC", /* 6 */
+ "unknown", /* 7 */
+ "unknown", /* 8 */
+ "unknown", /* 9 */
+ "non-aliased uncorrectable demand data ECC", /* 10 */
+ "non-aliased uncorrectable spare-copy data ECC", /* 11 */
+ "non-aliased uncorrectable patrol data ECC", /* 12 */
+ "unknown", /* 13 */
+ "correctable demand data ECC", /* 14 */
+ "correctable spare-copy data ECC", /* 15 */
+ "correctable patrol data ECC", /* 16 */
+ "unknown", /* 17 */
+ "SPD protocol error", /* 18 */
+ "unknown", /* 19 */
+ "spare copy initiated", /* 20 */
+ "spare copy completed", /* 21 */
+ };
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(merrs); i++)
+ if (1 << i & err)
+ return merrs[i];
+
+ return "none";
+}
+
+/* convert csrow index into a rank (per controller -- 0..5) */
+static int i5100_csrow_to_rank(const struct mem_ctl_info *mci, int csrow)
+{
+ const struct i5100_priv *priv = mci->pvt_info;
+
+ return csrow % priv->ranksperctlr;
+}
+
+/* convert csrow index into a controller (0..1) */
+static int i5100_csrow_to_cntlr(const struct mem_ctl_info *mci, int csrow)
+{
+ const struct i5100_priv *priv = mci->pvt_info;
+
+ return csrow / priv->ranksperctlr;
+}
+
+static unsigned i5100_rank_to_csrow(const struct mem_ctl_info *mci,
+ int ctlr, int rank)
+{
+ const struct i5100_priv *priv = mci->pvt_info;
+
+ return ctlr * priv->ranksperctlr + rank;
+}
+
+static void i5100_handle_ce(struct mem_ctl_info *mci,
+ int ctlr,
+ unsigned bank,
+ unsigned rank,
+ unsigned long syndrome,
+ unsigned cas,
+ unsigned ras,
+ const char *msg)
+{
+ const int csrow = i5100_rank_to_csrow(mci, ctlr, rank);
+
+ printk(KERN_ERR
+ "CE ctlr %d, bank %u, rank %u, syndrome 0x%lx, "
+ "cas %u, ras %u, csrow %u, label \"%s\": %s\n",
+ ctlr, bank, rank, syndrome, cas, ras,
+ csrow, mci->csrows[csrow].channels[0].label, msg);
+
+ mci->ce_count++;
+ mci->csrows[csrow].ce_count++;
+ mci->csrows[csrow].channels[0].ce_count++;
+}
+
+static void i5100_handle_ue(struct mem_ctl_info *mci,
+ int ctlr,
+ unsigned bank,
+ unsigned rank,
+ unsigned long syndrome,
+ unsigned cas,
+ unsigned ras,
+ const char *msg)
+{
+ const int csrow = i5100_rank_to_csrow(mci, ctlr, rank);
+
+ printk(KERN_ERR
+ "UE ctlr %d, bank %u, rank %u, syndrome 0x%lx, "
+ "cas %u, ras %u, csrow %u, label \"%s\": %s\n",
+ ctlr, bank, rank, syndrome, cas, ras,
+ csrow, mci->csrows[csrow].channels[0].label, msg);
+
+ mci->ue_count++;
+ mci->csrows[csrow].ue_count++;
+}
+
+static void i5100_read_log(struct mem_ctl_info *mci, int ctlr,
+ u32 ferr, u32 nerr)
+{
+ struct i5100_priv *priv = mci->pvt_info;
+ struct pci_dev *pdev = (ctlr) ? priv->ch1mm : priv->ch0mm;
+ u32 dw;
+ u32 dw2;
+ unsigned syndrome = 0;
+ unsigned ecc_loc = 0;
+ unsigned merr;
+ unsigned bank;
+ unsigned rank;
+ unsigned cas;
+ unsigned ras;
+
+ pci_read_config_dword(pdev, I5100_VALIDLOG, &dw);
+
+ if (i5100_validlog_redmemvalid(dw)) {
+ pci_read_config_dword(pdev, I5100_REDMEMA, &dw2);
+ syndrome = dw2;
+ pci_read_config_dword(pdev, I5100_REDMEMB, &dw2);
+ ecc_loc = i5100_redmemb_ecc_locator(dw2);
+ }
+
+ if (i5100_validlog_recmemvalid(dw)) {
+ const char *msg;
+
+ pci_read_config_dword(pdev, I5100_RECMEMA, &dw2);
+ merr = i5100_recmema_merr(dw2);
+ bank = i5100_recmema_bank(dw2);
+ rank = i5100_recmema_rank(dw2);
+
+ pci_read_config_dword(pdev, I5100_RECMEMB, &dw2);
+ cas = i5100_recmemb_cas(dw2);
+ ras = i5100_recmemb_ras(dw2);
+
+ /* FIXME: not really sure if this is what merr is...
+ */
+ if (!merr)
+ msg = i5100_err_msg(ferr);
+ else
+ msg = i5100_err_msg(nerr);
+
+ i5100_handle_ce(mci, ctlr, bank, rank, syndrome, cas, ras, msg);
+ }
+
+ if (i5100_validlog_nrecmemvalid(dw)) {
+ const char *msg;
+
+ pci_read_config_dword(pdev, I5100_NRECMEMA, &dw2);
+ merr = i5100_nrecmema_merr(dw2);
+ bank = i5100_nrecmema_bank(dw2);
+ rank = i5100_nrecmema_rank(dw2);
+
+ pci_read_config_dword(pdev, I5100_NRECMEMB, &dw2);
+ cas = i5100_nrecmemb_cas(dw2);
+ ras = i5100_nrecmemb_ras(dw2);
+
+ /* FIXME: not really sure if this is what merr is...
+ */
+ if (!merr)
+ msg = i5100_err_msg(ferr);
+ else
+ msg = i5100_err_msg(nerr);
+
+ i5100_handle_ue(mci, ctlr, bank, rank, syndrome, cas, ras, msg);
+ }
+
+ pci_write_config_dword(pdev, I5100_VALIDLOG, dw);
+}
+
+static void i5100_check_error(struct mem_ctl_info *mci)
+{
+ struct i5100_priv *priv = mci->pvt_info;
+ u32 dw;
+
+
+ pci_read_config_dword(priv->mc, I5100_FERR_NF_MEM, &dw);
+ if (i5100_ferr_nf_mem_any(dw)) {
+ u32 dw2;
+
+ pci_read_config_dword(priv->mc, I5100_NERR_NF_MEM, &dw2);
+ if (dw2)
+ pci_write_config_dword(priv->mc, I5100_NERR_NF_MEM,
+ dw2);
+ pci_write_config_dword(priv->mc, I5100_FERR_NF_MEM, dw);
+
+ i5100_read_log(mci, i5100_ferr_nf_mem_chan_indx(dw),
+ i5100_ferr_nf_mem_any(dw),
+ i5100_nerr_nf_mem_any(dw2));
+ }
+}
+
+static struct pci_dev *pci_get_device_func(unsigned vendor,
+ unsigned device,
+ unsigned func)
+{
+ struct pci_dev *ret = NULL;
+
+ while (1) {
+ ret = pci_get_device(vendor, device, ret);
+
+ if (!ret)
+ break;
+
+ if (PCI_FUNC(ret->devfn) == func)
+ break;
+ }
+
+ return ret;
+}
+
+static unsigned long __devinit i5100_npages(struct mem_ctl_info *mci,
+ int csrow)
+{
+ struct i5100_priv *priv = mci->pvt_info;
+ const unsigned ctlr_rank = i5100_csrow_to_rank(mci, csrow);
+ const unsigned ctlr = i5100_csrow_to_cntlr(mci, csrow);
+ unsigned addr_lines;
+
+ /* dimm present? */
+ if (!priv->mtr[ctlr][ctlr_rank].present)
+ return 0ULL;
+
+ addr_lines =
+ I5100_DIMM_ADDR_LINES +
+ priv->mtr[ctlr][ctlr_rank].numcol +
+ priv->mtr[ctlr][ctlr_rank].numrow +
+ priv->mtr[ctlr][ctlr_rank].numbank;
+
+ return (unsigned long)
+ ((unsigned long long) (1ULL << addr_lines) / PAGE_SIZE);
+}
+
+static void __devinit i5100_init_mtr(struct mem_ctl_info *mci)
+{
+ struct i5100_priv *priv = mci->pvt_info;
+ struct pci_dev *mms[2] = { priv->ch0mm, priv->ch1mm };
+ int i;
+
+ for (i = 0; i < I5100_MAX_CTLRS; i++) {
+ int j;
+ struct pci_dev *pdev = mms[i];
+
+ for (j = 0; j < I5100_MAX_RANKS_PER_CTLR; j++) {
+ const unsigned addr =
+ (j < 4) ? I5100_MTR_0 + j * 2 :
+ I5100_MTR_4 + (j - 4) * 2;
+ u16 w;
+
+ pci_read_config_word(pdev, addr, &w);
+
+ priv->mtr[i][j].present = i5100_mtr_present(w);
+ priv->mtr[i][j].ethrottle = i5100_mtr_ethrottle(w);
+ priv->mtr[i][j].width = 4 + 4 * i5100_mtr_width(w);
+ priv->mtr[i][j].numbank = 2 + i5100_mtr_numbank(w);
+ priv->mtr[i][j].numrow = 13 + i5100_mtr_numrow(w);
+ priv->mtr[i][j].numcol = 10 + i5100_mtr_numcol(w);
+ }
+ }
+}
+
+/*
+ * FIXME: make this into a real i2c adapter (so that dimm-decode
+ * will work)?
+ */
+static int i5100_read_spd_byte(const struct mem_ctl_info *mci,
+ u8 ch, u8 slot, u8 addr, u8 *byte)
+{
+ struct i5100_priv *priv = mci->pvt_info;
+ u16 w;
+ unsigned long et;
+
+ pci_read_config_word(priv->mc, I5100_SPDDATA, &w);
+ if (i5100_spddata_busy(w))
+ return -1;
+
+ pci_write_config_dword(priv->mc, I5100_SPDCMD,
+ i5100_spdcmd_create(0xa, 1, ch * 4 + slot, addr,
+ 0, 0));
+
+ /* wait up to 100ms */
+ et = jiffies + HZ / 10;
+ udelay(100);
+ while (1) {
+ pci_read_config_word(priv->mc, I5100_SPDDATA, &w);
+ if (!i5100_spddata_busy(w))
+ break;
+ udelay(100);
+ }
+
+ if (!i5100_spddata_rdo(w) || i5100_spddata_sbe(w))
+ return -1;
+
+ *byte = i5100_spddata_data(w);
+
+ return 0;
+}
+
+/*
+ * fill dimm chip select map
+ *
+ * FIXME:
+ * o only valid for 4 ranks per controller
+ * o not the only way to may chip selects to dimm slots
+ * o investigate if there is some way to obtain this map from the bios
+ */
+static void __devinit i5100_init_dimm_csmap(struct mem_ctl_info *mci)
+{
+ struct i5100_priv *priv = mci->pvt_info;
+ int i;
+
+ WARN_ON(priv->ranksperctlr != 4);
+
+ for (i = 0; i < I5100_MAX_DIMM_SLOTS_PER_CTLR; i++) {
+ int j;
+
+ for (j = 0; j < I5100_MAX_RANKS_PER_DIMM; j++)
+ priv->dimm_csmap[i][j] = -1; /* default NC */
+ }
+
+ /* only 2 chip selects per slot... */
+ priv->dimm_csmap[0][0] = 0;
+ priv->dimm_csmap[0][1] = 3;
+ priv->dimm_csmap[1][0] = 1;
+ priv->dimm_csmap[1][1] = 2;
+ priv->dimm_csmap[2][0] = 2;
+ priv->dimm_csmap[3][0] = 3;
+}
+
+static void __devinit i5100_init_dimm_layout(struct pci_dev *pdev,
+ struct mem_ctl_info *mci)
+{
+ struct i5100_priv *priv = mci->pvt_info;
+ int i;
+
+ for (i = 0; i < I5100_MAX_CTLRS; i++) {
+ int j;
+
+ for (j = 0; j < I5100_MAX_DIMM_SLOTS_PER_CTLR; j++) {
+ u8 rank;
+
+ if (i5100_read_spd_byte(mci, i, j, 5, &rank) < 0)
+ priv->dimm_numrank[i][j] = 0;
+ else
+ priv->dimm_numrank[i][j] = (rank & 3) + 1;
+ }
+ }
+
+ i5100_init_dimm_csmap(mci);
+}
+
+static void __devinit i5100_init_interleaving(struct pci_dev *pdev,
+ struct mem_ctl_info *mci)
+{
+ u16 w;
+ u32 dw;
+ struct i5100_priv *priv = mci->pvt_info;
+ struct pci_dev *mms[2] = { priv->ch0mm, priv->ch1mm };
+ int i;
+
+ pci_read_config_word(pdev, I5100_TOLM, &w);
+ priv->tolm = (u64) i5100_tolm_tolm(w) * 256 * 1024 * 1024;
+
+ pci_read_config_word(pdev, I5100_MIR0, &w);
+ priv->mir[0].limit = (u64) i5100_mir_limit(w) << 28;
+ priv->mir[0].way[1] = i5100_mir_way1(w);
+ priv->mir[0].way[0] = i5100_mir_way0(w);
+
+ pci_read_config_word(pdev, I5100_MIR1, &w);
+ priv->mir[1].limit = (u64) i5100_mir_limit(w) << 28;
+ priv->mir[1].way[1] = i5100_mir_way1(w);
+ priv->mir[1].way[0] = i5100_mir_way0(w);
+
+ pci_read_config_word(pdev, I5100_AMIR_0, &w);
+ priv->amir[0] = w;
+ pci_read_config_word(pdev, I5100_AMIR_1, &w);
+ priv->amir[1] = w;
+
+ for (i = 0; i < I5100_MAX_CTLRS; i++) {
+ int j;
+
+ for (j = 0; j < 5; j++) {
+ int k;
+
+ pci_read_config_dword(mms[i], I5100_DMIR + j * 4, &dw);
+
+ priv->dmir[i][j].limit =
+ (u64) i5100_dmir_limit(dw) << 28;
+ for (k = 0; k < I5100_MAX_RANKS_PER_DIMM; k++)
+ priv->dmir[i][j].rank[k] =
+ i5100_dmir_rank(dw, k);
+ }
+ }
+
+ i5100_init_mtr(mci);
+}
+
+static void __devinit i5100_init_csrows(struct mem_ctl_info *mci)
+{
+ int i;
+ unsigned long total_pages = 0UL;
+ struct i5100_priv *priv = mci->pvt_info;
+
+ for (i = 0; i < mci->nr_csrows; i++) {
+ const unsigned long npages = i5100_npages(mci, i);
+ const unsigned cntlr = i5100_csrow_to_cntlr(mci, i);
+ const unsigned rank = i5100_csrow_to_rank(mci, i);
+
+ if (!npages)
+ continue;
+
+ /*
+ * FIXME: these two are totally bogus -- I don't see how to
+ * map them correctly to this structure...
+ */
+ mci->csrows[i].first_page = total_pages;
+ mci->csrows[i].last_page = total_pages + npages - 1;
+ mci->csrows[i].page_mask = 0UL;
+
+ mci->csrows[i].nr_pages = npages;
+ mci->csrows[i].grain = 32;
+ mci->csrows[i].csrow_idx = i;
+ mci->csrows[i].dtype =
+ (priv->mtr[cntlr][rank].width == 4) ? DEV_X4 : DEV_X8;
+ mci->csrows[i].ue_count = 0;
+ mci->csrows[i].ce_count = 0;
+ mci->csrows[i].mtype = MEM_RDDR2;
+ mci->csrows[i].edac_mode = EDAC_SECDED;
+ mci->csrows[i].mci = mci;
+ mci->csrows[i].nr_channels = 1;
+ mci->csrows[i].channels[0].chan_idx = 0;
+ mci->csrows[i].channels[0].ce_count = 0;
+ mci->csrows[i].channels[0].csrow = mci->csrows + i;
+ snprintf(mci->csrows[i].channels[0].label,
+ sizeof(mci->csrows[i].channels[0].label),
+ "DIMM%u", i5100_rank_to_slot(mci, cntlr, rank));
+
+ total_pages += npages;
+ }
+}
+
+static int __devinit i5100_init_one(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ int rc;
+ struct mem_ctl_info *mci;
+ struct i5100_priv *priv;
+ struct pci_dev *ch0mm, *ch1mm;
+ int ret = 0;
+ u32 dw;
+ int ranksperch;
+
+ if (PCI_FUNC(pdev->devfn) != 1)
+ return -ENODEV;
+
+ rc = pci_enable_device(pdev);
+ if (rc < 0) {
+ ret = rc;
+ goto bail;
+ }
+
+ /* ECC enabled? */
+ pci_read_config_dword(pdev, I5100_MC, &dw);
+ if (!i5100_mc_errdeten(dw)) {
+ printk(KERN_INFO "i5100_edac: ECC not enabled.\n");
+ ret = -ENODEV;
+ goto bail_pdev;
+ }
+
+ /* figure out how many ranks, from strapped state of 48GB_Mode input */
+ pci_read_config_dword(pdev, I5100_MS, &dw);
+ ranksperch = !!(dw & (1 << 8)) * 2 + 4;
+
+ if (ranksperch != 4) {
+ /* FIXME: get 6 ranks / controller to work - need hw... */
+ printk(KERN_INFO "i5100_edac: unsupported configuration.\n");
+ ret = -ENODEV;
+ goto bail_pdev;
+ }
+
+ /* enable error reporting... */
+ pci_read_config_dword(pdev, I5100_EMASK_MEM, &dw);
+ dw &= ~I5100_FERR_NF_MEM_ANY_MASK;
+ pci_write_config_dword(pdev, I5100_EMASK_MEM, dw);
+
+ /* device 21, func 0, Channel 0 Memory Map, Error Flag/Mask, etc... */
+ ch0mm = pci_get_device_func(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_5100_21, 0);
+ if (!ch0mm) {
+ ret = -ENODEV;
+ goto bail_pdev;
+ }
+
+ rc = pci_enable_device(ch0mm);
+ if (rc < 0) {
+ ret = rc;
+ goto bail_ch0;
+ }
+
+ /* device 22, func 0, Channel 1 Memory Map, Error Flag/Mask, etc... */
+ ch1mm = pci_get_device_func(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_5100_22, 0);
+ if (!ch1mm) {
+ ret = -ENODEV;
+ goto bail_disable_ch0;
+ }
+
+ rc = pci_enable_device(ch1mm);
+ if (rc < 0) {
+ ret = rc;
+ goto bail_ch1;
+ }
+
+ mci = edac_mc_alloc(sizeof(*priv), ranksperch * 2, 1, 0);
+ if (!mci) {
+ ret = -ENOMEM;
+ goto bail_disable_ch1;
+ }
+
+ mci->dev = &pdev->dev;
+
+ priv = mci->pvt_info;
+ priv->ranksperctlr = ranksperch;
+ priv->mc = pdev;
+ priv->ch0mm = ch0mm;
+ priv->ch1mm = ch1mm;
+
+ i5100_init_dimm_layout(pdev, mci);
+ i5100_init_interleaving(pdev, mci);
+
+ mci->mtype_cap = MEM_FLAG_FB_DDR2;
+ mci->edac_ctl_cap = EDAC_FLAG_SECDED;
+ mci->edac_cap = EDAC_FLAG_SECDED;
+ mci->mod_name = "i5100_edac.c";
+ mci->mod_ver = "not versioned";
+ mci->ctl_name = "i5100";
+ mci->dev_name = pci_name(pdev);
+ mci->ctl_page_to_phys = NULL;
+
+ mci->edac_check = i5100_check_error;
+
+ i5100_init_csrows(mci);
+
+ /* this strange construction seems to be in every driver, dunno why */
+ switch (edac_op_state) {
+ case EDAC_OPSTATE_POLL:
+ case EDAC_OPSTATE_NMI:
+ break;
+ default:
+ edac_op_state = EDAC_OPSTATE_POLL;
+ break;
+ }
+
+ if (edac_mc_add_mc(mci)) {
+ ret = -ENODEV;
+ goto bail_mc;
+ }
+
+ return ret;
+
+bail_mc:
+ edac_mc_free(mci);
+
+bail_disable_ch1:
+ pci_disable_device(ch1mm);
+
+bail_ch1:
+ pci_dev_put(ch1mm);
+
+bail_disable_ch0:
+ pci_disable_device(ch0mm);
+
+bail_ch0:
+ pci_dev_put(ch0mm);
+
+bail_pdev:
+ pci_disable_device(pdev);
+
+bail:
+ return ret;
+}
+
+static void __devexit i5100_remove_one(struct pci_dev *pdev)
+{
+ struct mem_ctl_info *mci;
+ struct i5100_priv *priv;
+
+ mci = edac_mc_del_mc(&pdev->dev);
+
+ if (!mci)
+ return;
+
+ priv = mci->pvt_info;
+ pci_disable_device(pdev);
+ pci_disable_device(priv->ch0mm);
+ pci_disable_device(priv->ch1mm);
+ pci_dev_put(priv->ch0mm);
+ pci_dev_put(priv->ch1mm);
+
+ edac_mc_free(mci);
+}
+
+static const struct pci_device_id i5100_pci_tbl[] __devinitdata = {
+ /* Device 16, Function 0, Channel 0 Memory Map, Error Flag/Mask, ... */
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5100_16) },
+ { 0, }
+};
+MODULE_DEVICE_TABLE(pci, i5100_pci_tbl);
+
+static struct pci_driver i5100_driver = {
+ .name = KBUILD_BASENAME,
+ .probe = i5100_init_one,
+ .remove = __devexit_p(i5100_remove_one),
+ .id_table = i5100_pci_tbl,
+};
+
+static int __init i5100_init(void)
+{
+ int pci_rc;
+
+ pci_rc = pci_register_driver(&i5100_driver);
+
+ return (pci_rc < 0) ? pci_rc : 0;
+}
+
+static void __exit i5100_exit(void)
+{
+ pci_unregister_driver(&i5100_driver);
+}
+
+module_init(i5100_init);
+module_exit(i5100_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR
+ ("Arthur Jones <ajones@riverbed.com>");
+MODULE_DESCRIPTION("MC Driver for Intel I5100 memory controllers");
return IRQ_HANDLED;
}
-static int __devinit mpc85xx_pci_err_probe(struct platform_device *pdev)
+static int __devinit mpc85xx_pci_err_probe(struct of_device *op,
+ const struct of_device_id *match)
{
struct edac_pci_ctl_info *pci;
struct mpc85xx_pci_pdata *pdata;
- struct resource *r;
+ struct resource r;
int res = 0;
- if (!devres_open_group(&pdev->dev, mpc85xx_pci_err_probe, GFP_KERNEL))
+ if (!devres_open_group(&op->dev, mpc85xx_pci_err_probe, GFP_KERNEL))
return -ENOMEM;
pci = edac_pci_alloc_ctl_info(sizeof(*pdata), "mpc85xx_pci_err");
pdata = pci->pvt_info;
pdata->name = "mpc85xx_pci_err";
pdata->irq = NO_IRQ;
- platform_set_drvdata(pdev, pci);
- pci->dev = &pdev->dev;
+ dev_set_drvdata(&op->dev, pci);
+ pci->dev = &op->dev;
pci->mod_name = EDAC_MOD_STR;
pci->ctl_name = pdata->name;
- pci->dev_name = pdev->dev.bus_id;
+ pci->dev_name = op->dev.bus_id;
if (edac_op_state == EDAC_OPSTATE_POLL)
pci->edac_check = mpc85xx_pci_check;
pdata->edac_idx = edac_pci_idx++;
- r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!r) {
+ res = of_address_to_resource(op->node, 0, &r);
+ if (res) {
printk(KERN_ERR "%s: Unable to get resource for "
"PCI err regs\n", __func__);
goto err;
}
- if (!devm_request_mem_region(&pdev->dev, r->start,
- r->end - r->start + 1, pdata->name)) {
+ /* we only need the error registers */
+ r.start += 0xe00;
+
+ if (!devm_request_mem_region(&op->dev, r.start,
+ r.end - r.start + 1, pdata->name)) {
printk(KERN_ERR "%s: Error while requesting mem region\n",
__func__);
res = -EBUSY;
goto err;
}
- pdata->pci_vbase = devm_ioremap(&pdev->dev, r->start,
- r->end - r->start + 1);
+ pdata->pci_vbase = devm_ioremap(&op->dev, r.start,
+ r.end - r.start + 1);
if (!pdata->pci_vbase) {
printk(KERN_ERR "%s: Unable to setup PCI err regs\n", __func__);
res = -ENOMEM;
}
if (edac_op_state == EDAC_OPSTATE_INT) {
- pdata->irq = platform_get_irq(pdev, 0);
- res = devm_request_irq(&pdev->dev, pdata->irq,
+ pdata->irq = irq_of_parse_and_map(op->node, 0);
+ res = devm_request_irq(&op->dev, pdata->irq,
mpc85xx_pci_isr, IRQF_DISABLED,
"[EDAC] PCI err", pci);
if (res < 0) {
printk(KERN_ERR
"%s: Unable to requiest irq %d for "
"MPC85xx PCI err\n", __func__, pdata->irq);
+ irq_dispose_mapping(pdata->irq);
res = -ENODEV;
goto err2;
}
pdata->irq);
}
- devres_remove_group(&pdev->dev, mpc85xx_pci_err_probe);
+ devres_remove_group(&op->dev, mpc85xx_pci_err_probe);
debugf3("%s(): success\n", __func__);
printk(KERN_INFO EDAC_MOD_STR " PCI err registered\n");
return 0;
err2:
- edac_pci_del_device(&pdev->dev);
+ edac_pci_del_device(&op->dev);
err:
edac_pci_free_ctl_info(pci);
- devres_release_group(&pdev->dev, mpc85xx_pci_err_probe);
+ devres_release_group(&op->dev, mpc85xx_pci_err_probe);
return res;
}
-static int mpc85xx_pci_err_remove(struct platform_device *pdev)
+static int mpc85xx_pci_err_remove(struct of_device *op)
{
- struct edac_pci_ctl_info *pci = platform_get_drvdata(pdev);
+ struct edac_pci_ctl_info *pci = dev_get_drvdata(&op->dev);
struct mpc85xx_pci_pdata *pdata = pci->pvt_info;
debugf0("%s()\n", __func__);
return 0;
}
-static struct platform_driver mpc85xx_pci_err_driver = {
+static struct of_device_id mpc85xx_pci_err_of_match[] = {
+ {
+ .compatible = "fsl,mpc8540-pcix",
+ },
+ {
+ .compatible = "fsl,mpc8540-pci",
+ },
+ {},
+};
+
+static struct of_platform_driver mpc85xx_pci_err_driver = {
+ .owner = THIS_MODULE,
+ .name = "mpc85xx_pci_err",
+ .match_table = mpc85xx_pci_err_of_match,
.probe = mpc85xx_pci_err_probe,
.remove = __devexit_p(mpc85xx_pci_err_remove),
.driver = {
- .name = "mpc85xx_pci_err",
- }
+ .name = "mpc85xx_pci_err",
+ .owner = THIS_MODULE,
+ },
};
#endif /* CONFIG_PCI */
printk(KERN_WARNING EDAC_MOD_STR "L2 fails to register\n");
#ifdef CONFIG_PCI
- res = platform_driver_register(&mpc85xx_pci_err_driver);
+ res = of_register_platform_driver(&mpc85xx_pci_err_driver);
if (res)
printk(KERN_WARNING EDAC_MOD_STR "PCI fails to register\n");
#endif
{
mtspr(SPRN_HID1, orig_hid1);
#ifdef CONFIG_PCI
- platform_driver_unregister(&mpc85xx_pci_err_driver);
+ of_unregister_platform_driver(&mpc85xx_pci_err_driver);
#endif
of_unregister_platform_driver(&mpc85xx_l2_err_driver);
of_unregister_platform_driver(&mpc85xx_mc_err_driver);
return IRQ_HANDLED;
}
+/*
+ * Bit 0 of MV64x60_PCIx_ERR_MASK does not exist on the 64360 and because of
+ * errata FEr-#11 and FEr-##16 for the 64460, it should be 0 on that chip as
+ * well. IOW, don't set bit 0.
+ */
+
+/* Erratum FEr PCI-#16: clear bit 0 of PCI SERRn Mask reg. */
+static int __init mv64x60_pci_fixup(struct platform_device *pdev)
+{
+ struct resource *r;
+ void __iomem *pci_serr;
+
+ r = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ if (!r) {
+ printk(KERN_ERR "%s: Unable to get resource for "
+ "PCI err regs\n", __func__);
+ return -ENOENT;
+ }
+
+ pci_serr = ioremap(r->start, r->end - r->start + 1);
+ if (!pci_serr)
+ return -ENOMEM;
+
+ out_le32(pci_serr, in_le32(pci_serr) & ~0x1);
+ iounmap(pci_serr);
+
+ return 0;
+}
+
static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev)
{
struct edac_pci_ctl_info *pci;
goto err;
}
+ res = mv64x60_pci_fixup(pdev);
+ if (res < 0) {
+ printk(KERN_ERR "%s: PCI fixup failed\n", __func__);
+ goto err;
+ }
+
out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_CAUSE, 0);
out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK, 0);
out_le32(pdata->pci_vbase + MV64X60_PCI_ERROR_MASK,
if (!np)
return;
- reg = get_property(np, "reg", NULL);
+ reg = of_get_property(np, "reg", NULL);
pdata->total_mem = reg[1];
}
struct bin_attribute *bin_attr,
char *buf, loff_t pos, size_t count)
{
- size_t max_read;
ssize_t ret;
mutex_lock(&smi_data_lock);
-
- if (pos >= smi_data_buf_size) {
- ret = 0;
- goto out;
- }
-
- max_read = smi_data_buf_size - pos;
- ret = min(max_read, count);
- memcpy(buf, smi_data_buf + pos, ret);
-out:
+ ret = memory_read_from_buffer(buf, count, &pos, smi_data_buf,
+ smi_data_buf_size);
mutex_unlock(&smi_data_lock);
return ret;
}
static ssize_t read_rbu_mono_data(char *buffer, loff_t pos, size_t count)
{
- unsigned char *ptemp = NULL;
- size_t bytes_left = 0;
- size_t data_length = 0;
- ssize_t ret_count = 0;
-
/* check to see if we have something to return */
if ((rbu_data.image_update_buffer == NULL) ||
(rbu_data.bios_image_size == 0)) {
"bios_image_size %lu\n",
rbu_data.image_update_buffer,
rbu_data.bios_image_size);
- ret_count = -ENOMEM;
- goto read_rbu_data_exit;
- }
-
- if (pos > rbu_data.bios_image_size) {
- ret_count = 0;
- goto read_rbu_data_exit;
+ return -ENOMEM;
}
- bytes_left = rbu_data.bios_image_size - pos;
- data_length = min(bytes_left, count);
-
- ptemp = rbu_data.image_update_buffer;
- memcpy(buffer, (ptemp + pos), data_length);
-
- if ((pos + count) > rbu_data.bios_image_size)
- /* this was the last copy */
- ret_count = bytes_left;
- else
- ret_count = count;
- read_rbu_data_exit:
- return ret_count;
+ return memory_read_from_buffer(buffer, count, &pos,
+ rbu_data.image_update_buffer, rbu_data.bios_image_size);
}
static ssize_t read_rbu_data(struct kobject *kobj,
# GPIO infrastructure and expanders
#
-config HAVE_GPIO_LIB
+config ARCH_WANT_OPTIONAL_GPIOLIB
bool
+ help
+ Select this config option from the architecture Kconfig, if
+ it is possible to use gpiolib on the architecture, but let the
+ user decide whether to actually build it or not.
+ Select this instead of ARCH_REQUIRE_GPIOLIB, if your architecture does
+ not depend on GPIOs being available, but rather let the user
+ decide whether he needs it or not.
+
+config ARCH_REQUIRE_GPIOLIB
+ bool
+ select GPIOLIB
help
Platforms select gpiolib if they use this infrastructure
for all their GPIOs, usually starting with ones integrated
into SOC processors.
+ Selecting this from the architecture code will cause the gpiolib
+ code to always get built in.
+
+
+
+menuconfig GPIOLIB
+ bool "GPIO Support"
+ depends on ARCH_WANT_OPTIONAL_GPIOLIB || ARCH_REQUIRE_GPIOLIB
+ select GENERIC_GPIO
+ help
+ This enables GPIO support through the generic GPIO library.
+ You only need to enable this, if you also want to enable
+ one or more of the GPIO expansion card drivers below.
-menu "GPIO Support"
- depends on HAVE_GPIO_LIB
+ If unsure, say N.
+
+if GPIOLIB
config DEBUG_GPIO
bool "Debug GPIO calls"
slower. The diagnostics help catch the type of setup errors
that are most common when setting up new platforms or boards.
+config GPIO_SYSFS
+ bool "/sys/class/gpio/... (sysfs interface)"
+ depends on SYSFS && EXPERIMENTAL
+ help
+ Say Y here to add a sysfs interface for GPIOs.
+
+ This is mostly useful to work around omissions in a system's
+ kernel support. Those are common in custom and semicustom
+ hardware assembled using standard kernels with a minimum of
+ custom patches. In those cases, userspace code may import
+ a given GPIO from the kernel, if no kernel driver requested it.
+
+ Kernel drivers may also request that a particular GPIO be
+ exported to userspace; this can be useful when debugging.
+
# put expanders in the right section, in alphabetical order
comment "I2C GPIO expanders:"
+config GPIO_MAX732X
+ tristate "MAX7319, MAX7320-7327 I2C Port Expanders"
+ depends on I2C
+ help
+ Say yes here to support the MAX7319, MAX7320-7327 series of I2C
+ Port Expanders. Each IO port on these chips has a fixed role of
+ Input (designated by 'I'), Push-Pull Output ('O'), or Open-Drain
+ Input and Output (designed by 'P'). The combinations are listed
+ below:
+
+ 8 bits: max7319 (8I), max7320 (8O), max7321 (8P),
+ max7322 (4I4O), max7323 (4P4O)
+
+ 16 bits: max7324 (8I8O), max7325 (8P8O),
+ max7326 (4I12O), max7327 (4P12O)
+
+ Board setup code must specify the model to use, and the start
+ number for these GPIOs.
+
config GPIO_PCA953X
tristate "PCA953x, PCA955x, and MAX7310 I/O ports"
depends on I2C
This driver provides an in-kernel interface to those GPIOs using
platform-neutral GPIO calls.
+comment "PCI GPIO expanders:"
+
+config GPIO_BT8XX
+ tristate "BT8XX GPIO abuser"
+ depends on PCI && VIDEO_BT848=n
+ help
+ The BT8xx frame grabber chip has 24 GPIO pins than can be abused
+ as a cheap PCI GPIO card.
+
+ This chip can be found on Miro, Hauppauge and STB TV-cards.
+
+ The card needs to be physically altered for using it as a
+ GPIO card. For more information on how to build a GPIO card
+ from a BT8xx TV card, see the documentation file at
+ Documentation/bt8xxgpio.txt
+
+ If unsure, say N.
+
comment "SPI GPIO expanders:"
config GPIO_MAX7301
SPI driver for Microchip MCP23S08 I/O expander. This provides
a GPIO interface supporting inputs and outputs.
-endmenu
+endif
ccflags-$(CONFIG_DEBUG_GPIO) += -DDEBUG
-obj-$(CONFIG_HAVE_GPIO_LIB) += gpiolib.o
+obj-$(CONFIG_GPIOLIB) += gpiolib.o
obj-$(CONFIG_GPIO_MAX7301) += max7301.o
+obj-$(CONFIG_GPIO_MAX732X) += max732x.o
obj-$(CONFIG_GPIO_MCP23S08) += mcp23s08.o
obj-$(CONFIG_GPIO_PCA953X) += pca953x.o
obj-$(CONFIG_GPIO_PCF857X) += pcf857x.o
+obj-$(CONFIG_GPIO_BT8XX) += bt8xxgpio.o
--- /dev/null
+/*
+
+ bt8xx GPIO abuser
+
+ Copyright (C) 2008 Michael Buesch <mb@bu3sch.de>
+
+ Please do _only_ contact the people listed _above_ with issues related to this driver.
+ All the other people listed below are not related to this driver. Their names
+ are only here, because this driver is derived from the bt848 driver.
+
+
+ Derived from the bt848 driver:
+
+ Copyright (C) 1996,97,98 Ralph Metzler
+ & Marcus Metzler
+ (c) 1999-2002 Gerd Knorr
+
+ some v4l2 code lines are taken from Justin's bttv2 driver which is
+ (c) 2000 Justin Schoeman
+
+ V4L1 removal from:
+ (c) 2005-2006 Nickolay V. Shmyrev
+
+ Fixes to be fully V4L2 compliant by
+ (c) 2006 Mauro Carvalho Chehab
+
+ Cropping and overscan support
+ Copyright (C) 2005, 2006 Michael H. Schimek
+ Sponsored by OPQ Systems AB
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+
+#include <asm/gpio.h>
+
+/* Steal the hardware definitions from the bttv driver. */
+#include "../media/video/bt8xx/bt848.h"
+
+
+#define BT8XXGPIO_NR_GPIOS 24 /* We have 24 GPIO pins */
+
+
+struct bt8xxgpio {
+ spinlock_t lock;
+
+ void __iomem *mmio;
+ struct pci_dev *pdev;
+ struct gpio_chip gpio;
+
+#ifdef CONFIG_PM
+ u32 saved_outen;
+ u32 saved_data;
+#endif
+};
+
+#define bgwrite(dat, adr) writel((dat), bg->mmio+(adr))
+#define bgread(adr) readl(bg->mmio+(adr))
+
+
+static int modparam_gpiobase = -1/* dynamic */;
+module_param_named(gpiobase, modparam_gpiobase, int, 0444);
+MODULE_PARM_DESC(gpiobase, "The GPIO number base. -1 means dynamic, which is the default.");
+
+
+static int bt8xxgpio_gpio_direction_input(struct gpio_chip *gpio, unsigned nr)
+{
+ struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio);
+ unsigned long flags;
+ u32 outen, data;
+
+ spin_lock_irqsave(&bg->lock, flags);
+
+ data = bgread(BT848_GPIO_DATA);
+ data &= ~(1 << nr);
+ bgwrite(data, BT848_GPIO_DATA);
+
+ outen = bgread(BT848_GPIO_OUT_EN);
+ outen &= ~(1 << nr);
+ bgwrite(outen, BT848_GPIO_OUT_EN);
+
+ spin_unlock_irqrestore(&bg->lock, flags);
+
+ return 0;
+}
+
+static int bt8xxgpio_gpio_get(struct gpio_chip *gpio, unsigned nr)
+{
+ struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio);
+ unsigned long flags;
+ u32 val;
+
+ spin_lock_irqsave(&bg->lock, flags);
+ val = bgread(BT848_GPIO_DATA);
+ spin_unlock_irqrestore(&bg->lock, flags);
+
+ return !!(val & (1 << nr));
+}
+
+static int bt8xxgpio_gpio_direction_output(struct gpio_chip *gpio,
+ unsigned nr, int val)
+{
+ struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio);
+ unsigned long flags;
+ u32 outen, data;
+
+ spin_lock_irqsave(&bg->lock, flags);
+
+ outen = bgread(BT848_GPIO_OUT_EN);
+ outen |= (1 << nr);
+ bgwrite(outen, BT848_GPIO_OUT_EN);
+
+ data = bgread(BT848_GPIO_DATA);
+ if (val)
+ data |= (1 << nr);
+ else
+ data &= ~(1 << nr);
+ bgwrite(data, BT848_GPIO_DATA);
+
+ spin_unlock_irqrestore(&bg->lock, flags);
+
+ return 0;
+}
+
+static void bt8xxgpio_gpio_set(struct gpio_chip *gpio,
+ unsigned nr, int val)
+{
+ struct bt8xxgpio *bg = container_of(gpio, struct bt8xxgpio, gpio);
+ unsigned long flags;
+ u32 data;
+
+ spin_lock_irqsave(&bg->lock, flags);
+
+ data = bgread(BT848_GPIO_DATA);
+ if (val)
+ data |= (1 << nr);
+ else
+ data &= ~(1 << nr);
+ bgwrite(data, BT848_GPIO_DATA);
+
+ spin_unlock_irqrestore(&bg->lock, flags);
+}
+
+static void bt8xxgpio_gpio_setup(struct bt8xxgpio *bg)
+{
+ struct gpio_chip *c = &bg->gpio;
+
+ c->label = bg->pdev->dev.bus_id;
+ c->owner = THIS_MODULE;
+ c->direction_input = bt8xxgpio_gpio_direction_input;
+ c->get = bt8xxgpio_gpio_get;
+ c->direction_output = bt8xxgpio_gpio_direction_output;
+ c->set = bt8xxgpio_gpio_set;
+ c->dbg_show = NULL;
+ c->base = modparam_gpiobase;
+ c->ngpio = BT8XXGPIO_NR_GPIOS;
+ c->can_sleep = 0;
+}
+
+static int bt8xxgpio_probe(struct pci_dev *dev,
+ const struct pci_device_id *pci_id)
+{
+ struct bt8xxgpio *bg;
+ int err;
+
+ bg = kzalloc(sizeof(*bg), GFP_KERNEL);
+ if (!bg)
+ return -ENOMEM;
+
+ bg->pdev = dev;
+ spin_lock_init(&bg->lock);
+
+ err = pci_enable_device(dev);
+ if (err) {
+ printk(KERN_ERR "bt8xxgpio: Can't enable device.\n");
+ goto err_freebg;
+ }
+ if (!request_mem_region(pci_resource_start(dev, 0),
+ pci_resource_len(dev, 0),
+ "bt8xxgpio")) {
+ printk(KERN_WARNING "bt8xxgpio: Can't request iomem (0x%llx).\n",
+ (unsigned long long)pci_resource_start(dev, 0));
+ err = -EBUSY;
+ goto err_disable;
+ }
+ pci_set_master(dev);
+ pci_set_drvdata(dev, bg);
+
+ bg->mmio = ioremap(pci_resource_start(dev, 0), 0x1000);
+ if (!bg->mmio) {
+ printk(KERN_ERR "bt8xxgpio: ioremap() failed\n");
+ err = -EIO;
+ goto err_release_mem;
+ }
+
+ /* Disable interrupts */
+ bgwrite(0, BT848_INT_MASK);
+
+ /* gpio init */
+ bgwrite(0, BT848_GPIO_DMA_CTL);
+ bgwrite(0, BT848_GPIO_REG_INP);
+ bgwrite(0, BT848_GPIO_OUT_EN);
+
+ bt8xxgpio_gpio_setup(bg);
+ err = gpiochip_add(&bg->gpio);
+ if (err) {
+ printk(KERN_ERR "bt8xxgpio: Failed to register GPIOs\n");
+ goto err_release_mem;
+ }
+
+ printk(KERN_INFO "bt8xxgpio: Abusing BT8xx card for GPIOs %d to %d\n",
+ bg->gpio.base, bg->gpio.base + BT8XXGPIO_NR_GPIOS - 1);
+
+ return 0;
+
+err_release_mem:
+ release_mem_region(pci_resource_start(dev, 0),
+ pci_resource_len(dev, 0));
+ pci_set_drvdata(dev, NULL);
+err_disable:
+ pci_disable_device(dev);
+err_freebg:
+ kfree(bg);
+
+ return err;
+}
+
+static void bt8xxgpio_remove(struct pci_dev *pdev)
+{
+ struct bt8xxgpio *bg = pci_get_drvdata(pdev);
+
+ gpiochip_remove(&bg->gpio);
+
+ bgwrite(0, BT848_INT_MASK);
+ bgwrite(~0x0, BT848_INT_STAT);
+ bgwrite(0x0, BT848_GPIO_OUT_EN);
+
+ iounmap(bg->mmio);
+ release_mem_region(pci_resource_start(pdev, 0),
+ pci_resource_len(pdev, 0));
+ pci_disable_device(pdev);
+
+ pci_set_drvdata(pdev, NULL);
+ kfree(bg);
+}
+
+#ifdef CONFIG_PM
+static int bt8xxgpio_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+ struct bt8xxgpio *bg = pci_get_drvdata(pdev);
+ unsigned long flags;
+
+ spin_lock_irqsave(&bg->lock, flags);
+
+ bg->saved_outen = bgread(BT848_GPIO_OUT_EN);
+ bg->saved_data = bgread(BT848_GPIO_DATA);
+
+ bgwrite(0, BT848_INT_MASK);
+ bgwrite(~0x0, BT848_INT_STAT);
+ bgwrite(0x0, BT848_GPIO_OUT_EN);
+
+ spin_unlock_irqrestore(&bg->lock, flags);
+
+ pci_save_state(pdev);
+ pci_disable_device(pdev);
+ pci_set_power_state(pdev, pci_choose_state(pdev, state));
+
+ return 0;
+}
+
+static int bt8xxgpio_resume(struct pci_dev *pdev)
+{
+ struct bt8xxgpio *bg = pci_get_drvdata(pdev);
+ unsigned long flags;
+ int err;
+
+ pci_set_power_state(pdev, 0);
+ err = pci_enable_device(pdev);
+ if (err)
+ return err;
+ pci_restore_state(pdev);
+
+ spin_lock_irqsave(&bg->lock, flags);
+
+ bgwrite(0, BT848_INT_MASK);
+ bgwrite(0, BT848_GPIO_DMA_CTL);
+ bgwrite(0, BT848_GPIO_REG_INP);
+ bgwrite(bg->saved_outen, BT848_GPIO_OUT_EN);
+ bgwrite(bg->saved_data & bg->saved_outen,
+ BT848_GPIO_DATA);
+
+ spin_unlock_irqrestore(&bg->lock, flags);
+
+ return 0;
+}
+#else
+#define bt8xxgpio_suspend NULL
+#define bt8xxgpio_resume NULL
+#endif /* CONFIG_PM */
+
+static struct pci_device_id bt8xxgpio_pci_tbl[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT848) },
+ { PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT849) },
+ { PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT878) },
+ { PCI_DEVICE(PCI_VENDOR_ID_BROOKTREE, PCI_DEVICE_ID_BT879) },
+ { 0, },
+};
+MODULE_DEVICE_TABLE(pci, bt8xxgpio_pci_tbl);
+
+static struct pci_driver bt8xxgpio_pci_driver = {
+ .name = "bt8xxgpio",
+ .id_table = bt8xxgpio_pci_tbl,
+ .probe = bt8xxgpio_probe,
+ .remove = bt8xxgpio_remove,
+ .suspend = bt8xxgpio_suspend,
+ .resume = bt8xxgpio_resume,
+};
+
+static int bt8xxgpio_init(void)
+{
+ return pci_register_driver(&bt8xxgpio_pci_driver);
+}
+module_init(bt8xxgpio_init)
+
+static void bt8xxgpio_exit(void)
+{
+ pci_unregister_driver(&bt8xxgpio_pci_driver);
+}
+module_exit(bt8xxgpio_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michael Buesch");
+MODULE_DESCRIPTION("Abuse a BT8xx framegrabber card as generic GPIO card");
#include <linux/module.h>
#include <linux/irq.h>
#include <linux/spinlock.h>
-
-#include <asm/gpio.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/gpio.h>
/* Optional implementation infrastructure for GPIO interfaces.
#define FLAG_REQUESTED 0
#define FLAG_IS_OUT 1
#define FLAG_RESERVED 2
+#define FLAG_EXPORT 3 /* protected by sysfs_lock */
+#define FLAG_SYSFS 4 /* exported via /sys/class/gpio/control */
#ifdef CONFIG_DEBUG_FS
const char *label;
return ret;
}
+#ifdef CONFIG_GPIO_SYSFS
+
+/* lock protects against unexport_gpio() being called while
+ * sysfs files are active.
+ */
+static DEFINE_MUTEX(sysfs_lock);
+
+/*
+ * /sys/class/gpio/gpioN... only for GPIOs that are exported
+ * /direction
+ * * MAY BE OMITTED if kernel won't allow direction changes
+ * * is read/write as "in" or "out"
+ * * may also be written as "high" or "low", initializing
+ * output value as specified ("out" implies "low")
+ * /value
+ * * always readable, subject to hardware behavior
+ * * may be writable, as zero/nonzero
+ *
+ * REVISIT there will likely be an attribute for configuring async
+ * notifications, e.g. to specify polling interval or IRQ trigger type
+ * that would for example trigger a poll() on the "value".
+ */
+
+static ssize_t gpio_direction_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ const struct gpio_desc *desc = dev_get_drvdata(dev);
+ ssize_t status;
+
+ mutex_lock(&sysfs_lock);
+
+ if (!test_bit(FLAG_EXPORT, &desc->flags))
+ status = -EIO;
+ else
+ status = sprintf(buf, "%s\n",
+ test_bit(FLAG_IS_OUT, &desc->flags)
+ ? "out" : "in");
+
+ mutex_unlock(&sysfs_lock);
+ return status;
+}
+
+static ssize_t gpio_direction_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ const struct gpio_desc *desc = dev_get_drvdata(dev);
+ unsigned gpio = desc - gpio_desc;
+ ssize_t status;
+
+ mutex_lock(&sysfs_lock);
+
+ if (!test_bit(FLAG_EXPORT, &desc->flags))
+ status = -EIO;
+ else if (sysfs_streq(buf, "high"))
+ status = gpio_direction_output(gpio, 1);
+ else if (sysfs_streq(buf, "out") || sysfs_streq(buf, "low"))
+ status = gpio_direction_output(gpio, 0);
+ else if (sysfs_streq(buf, "in"))
+ status = gpio_direction_input(gpio);
+ else
+ status = -EINVAL;
+
+ mutex_unlock(&sysfs_lock);
+ return status ? : size;
+}
+
+static const DEVICE_ATTR(direction, 0644,
+ gpio_direction_show, gpio_direction_store);
+
+static ssize_t gpio_value_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ const struct gpio_desc *desc = dev_get_drvdata(dev);
+ unsigned gpio = desc - gpio_desc;
+ ssize_t status;
+
+ mutex_lock(&sysfs_lock);
+
+ if (!test_bit(FLAG_EXPORT, &desc->flags))
+ status = -EIO;
+ else
+ status = sprintf(buf, "%d\n", gpio_get_value_cansleep(gpio));
+
+ mutex_unlock(&sysfs_lock);
+ return status;
+}
+
+static ssize_t gpio_value_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ const struct gpio_desc *desc = dev_get_drvdata(dev);
+ unsigned gpio = desc - gpio_desc;
+ ssize_t status;
+
+ mutex_lock(&sysfs_lock);
+
+ if (!test_bit(FLAG_EXPORT, &desc->flags))
+ status = -EIO;
+ else if (!test_bit(FLAG_IS_OUT, &desc->flags))
+ status = -EPERM;
+ else {
+ long value;
+
+ status = strict_strtol(buf, 0, &value);
+ if (status == 0) {
+ gpio_set_value_cansleep(gpio, value != 0);
+ status = size;
+ }
+ }
+
+ mutex_unlock(&sysfs_lock);
+ return status;
+}
+
+static /*const*/ DEVICE_ATTR(value, 0644,
+ gpio_value_show, gpio_value_store);
+
+static const struct attribute *gpio_attrs[] = {
+ &dev_attr_direction.attr,
+ &dev_attr_value.attr,
+ NULL,
+};
+
+static const struct attribute_group gpio_attr_group = {
+ .attrs = (struct attribute **) gpio_attrs,
+};
+
+/*
+ * /sys/class/gpio/gpiochipN/
+ * /base ... matching gpio_chip.base (N)
+ * /label ... matching gpio_chip.label
+ * /ngpio ... matching gpio_chip.ngpio
+ */
+
+static ssize_t chip_base_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ const struct gpio_chip *chip = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d\n", chip->base);
+}
+static DEVICE_ATTR(base, 0444, chip_base_show, NULL);
+
+static ssize_t chip_label_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ const struct gpio_chip *chip = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%s\n", chip->label ? : "");
+}
+static DEVICE_ATTR(label, 0444, chip_label_show, NULL);
+
+static ssize_t chip_ngpio_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ const struct gpio_chip *chip = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%u\n", chip->ngpio);
+}
+static DEVICE_ATTR(ngpio, 0444, chip_ngpio_show, NULL);
+
+static const struct attribute *gpiochip_attrs[] = {
+ &dev_attr_base.attr,
+ &dev_attr_label.attr,
+ &dev_attr_ngpio.attr,
+ NULL,
+};
+
+static const struct attribute_group gpiochip_attr_group = {
+ .attrs = (struct attribute **) gpiochip_attrs,
+};
+
+/*
+ * /sys/class/gpio/export ... write-only
+ * integer N ... number of GPIO to export (full access)
+ * /sys/class/gpio/unexport ... write-only
+ * integer N ... number of GPIO to unexport
+ */
+static ssize_t export_store(struct class *class, const char *buf, size_t len)
+{
+ long gpio;
+ int status;
+
+ status = strict_strtol(buf, 0, &gpio);
+ if (status < 0)
+ goto done;
+
+ /* No extra locking here; FLAG_SYSFS just signifies that the
+ * request and export were done by on behalf of userspace, so
+ * they may be undone on its behalf too.
+ */
+
+ status = gpio_request(gpio, "sysfs");
+ if (status < 0)
+ goto done;
+
+ status = gpio_export(gpio, true);
+ if (status < 0)
+ gpio_free(gpio);
+ else
+ set_bit(FLAG_SYSFS, &gpio_desc[gpio].flags);
+
+done:
+ if (status)
+ pr_debug("%s: status %d\n", __func__, status);
+ return status ? : len;
+}
+
+static ssize_t unexport_store(struct class *class, const char *buf, size_t len)
+{
+ long gpio;
+ int status;
+
+ status = strict_strtol(buf, 0, &gpio);
+ if (status < 0)
+ goto done;
+
+ status = -EINVAL;
+
+ /* reject bogus commands (gpio_unexport ignores them) */
+ if (!gpio_is_valid(gpio))
+ goto done;
+
+ /* No extra locking here; FLAG_SYSFS just signifies that the
+ * request and export were done by on behalf of userspace, so
+ * they may be undone on its behalf too.
+ */
+ if (test_and_clear_bit(FLAG_SYSFS, &gpio_desc[gpio].flags)) {
+ status = 0;
+ gpio_free(gpio);
+ }
+done:
+ if (status)
+ pr_debug("%s: status %d\n", __func__, status);
+ return status ? : len;
+}
+
+static struct class_attribute gpio_class_attrs[] = {
+ __ATTR(export, 0200, NULL, export_store),
+ __ATTR(unexport, 0200, NULL, unexport_store),
+ __ATTR_NULL,
+};
+
+static struct class gpio_class = {
+ .name = "gpio",
+ .owner = THIS_MODULE,
+
+ .class_attrs = gpio_class_attrs,
+};
+
+
+/**
+ * gpio_export - export a GPIO through sysfs
+ * @gpio: gpio to make available, already requested
+ * @direction_may_change: true if userspace may change gpio direction
+ * Context: arch_initcall or later
+ *
+ * When drivers want to make a GPIO accessible to userspace after they
+ * have requested it -- perhaps while debugging, or as part of their
+ * public interface -- they may use this routine. If the GPIO can
+ * change direction (some can't) and the caller allows it, userspace
+ * will see "direction" sysfs attribute which may be used to change
+ * the gpio's direction. A "value" attribute will always be provided.
+ *
+ * Returns zero on success, else an error.
+ */
+int gpio_export(unsigned gpio, bool direction_may_change)
+{
+ unsigned long flags;
+ struct gpio_desc *desc;
+ int status = -EINVAL;
+
+ /* can't export until sysfs is available ... */
+ if (!gpio_class.p) {
+ pr_debug("%s: called too early!\n", __func__);
+ return -ENOENT;
+ }
+
+ if (!gpio_is_valid(gpio))
+ goto done;
+
+ mutex_lock(&sysfs_lock);
+
+ spin_lock_irqsave(&gpio_lock, flags);
+ desc = &gpio_desc[gpio];
+ if (test_bit(FLAG_REQUESTED, &desc->flags)
+ && !test_bit(FLAG_EXPORT, &desc->flags)) {
+ status = 0;
+ if (!desc->chip->direction_input
+ || !desc->chip->direction_output)
+ direction_may_change = false;
+ }
+ spin_unlock_irqrestore(&gpio_lock, flags);
+
+ if (status == 0) {
+ struct device *dev;
+
+ dev = device_create(&gpio_class, desc->chip->dev, MKDEV(0, 0),
+ desc, "gpio%d", gpio);
+ if (dev) {
+ if (direction_may_change)
+ status = sysfs_create_group(&dev->kobj,
+ &gpio_attr_group);
+ else
+ status = device_create_file(dev,
+ &dev_attr_value);
+ if (status != 0)
+ device_unregister(dev);
+ } else
+ status = -ENODEV;
+ if (status == 0)
+ set_bit(FLAG_EXPORT, &desc->flags);
+ }
+
+ mutex_unlock(&sysfs_lock);
+
+done:
+ if (status)
+ pr_debug("%s: gpio%d status %d\n", __func__, gpio, status);
+
+ return status;
+}
+EXPORT_SYMBOL_GPL(gpio_export);
+
+static int match_export(struct device *dev, void *data)
+{
+ return dev_get_drvdata(dev) == data;
+}
+
+/**
+ * gpio_unexport - reverse effect of gpio_export()
+ * @gpio: gpio to make unavailable
+ *
+ * This is implicit on gpio_free().
+ */
+void gpio_unexport(unsigned gpio)
+{
+ struct gpio_desc *desc;
+ int status = -EINVAL;
+
+ if (!gpio_is_valid(gpio))
+ goto done;
+
+ mutex_lock(&sysfs_lock);
+
+ desc = &gpio_desc[gpio];
+ if (test_bit(FLAG_EXPORT, &desc->flags)) {
+ struct device *dev = NULL;
+
+ dev = class_find_device(&gpio_class, NULL, desc, match_export);
+ if (dev) {
+ clear_bit(FLAG_EXPORT, &desc->flags);
+ put_device(dev);
+ device_unregister(dev);
+ status = 0;
+ } else
+ status = -ENODEV;
+ }
+
+ mutex_unlock(&sysfs_lock);
+done:
+ if (status)
+ pr_debug("%s: gpio%d status %d\n", __func__, gpio, status);
+}
+EXPORT_SYMBOL_GPL(gpio_unexport);
+
+static int gpiochip_export(struct gpio_chip *chip)
+{
+ int status;
+ struct device *dev;
+
+ /* Many systems register gpio chips for SOC support very early,
+ * before driver model support is available. In those cases we
+ * export this later, in gpiolib_sysfs_init() ... here we just
+ * verify that _some_ field of gpio_class got initialized.
+ */
+ if (!gpio_class.p)
+ return 0;
+
+ /* use chip->base for the ID; it's already known to be unique */
+ mutex_lock(&sysfs_lock);
+ dev = device_create(&gpio_class, chip->dev, MKDEV(0, 0), chip,
+ "gpiochip%d", chip->base);
+ if (dev) {
+ status = sysfs_create_group(&dev->kobj,
+ &gpiochip_attr_group);
+ } else
+ status = -ENODEV;
+ chip->exported = (status == 0);
+ mutex_unlock(&sysfs_lock);
+
+ if (status) {
+ unsigned long flags;
+ unsigned gpio;
+
+ spin_lock_irqsave(&gpio_lock, flags);
+ gpio = chip->base;
+ while (gpio_desc[gpio].chip == chip)
+ gpio_desc[gpio++].chip = NULL;
+ spin_unlock_irqrestore(&gpio_lock, flags);
+
+ pr_debug("%s: chip %s status %d\n", __func__,
+ chip->label, status);
+ }
+
+ return status;
+}
+
+static void gpiochip_unexport(struct gpio_chip *chip)
+{
+ int status;
+ struct device *dev;
+
+ mutex_lock(&sysfs_lock);
+ dev = class_find_device(&gpio_class, NULL, chip, match_export);
+ if (dev) {
+ put_device(dev);
+ device_unregister(dev);
+ chip->exported = 0;
+ status = 0;
+ } else
+ status = -ENODEV;
+ mutex_unlock(&sysfs_lock);
+
+ if (status)
+ pr_debug("%s: chip %s status %d\n", __func__,
+ chip->label, status);
+}
+
+static int __init gpiolib_sysfs_init(void)
+{
+ int status;
+ unsigned long flags;
+ unsigned gpio;
+
+ status = class_register(&gpio_class);
+ if (status < 0)
+ return status;
+
+ /* Scan and register the gpio_chips which registered very
+ * early (e.g. before the class_register above was called).
+ *
+ * We run before arch_initcall() so chip->dev nodes can have
+ * registered, and so arch_initcall() can always gpio_export().
+ */
+ spin_lock_irqsave(&gpio_lock, flags);
+ for (gpio = 0; gpio < ARCH_NR_GPIOS; gpio++) {
+ struct gpio_chip *chip;
+
+ chip = gpio_desc[gpio].chip;
+ if (!chip || chip->exported)
+ continue;
+
+ spin_unlock_irqrestore(&gpio_lock, flags);
+ status = gpiochip_export(chip);
+ spin_lock_irqsave(&gpio_lock, flags);
+ }
+ spin_unlock_irqrestore(&gpio_lock, flags);
+
+
+ return status;
+}
+postcore_initcall(gpiolib_sysfs_init);
+
+#else
+static inline int gpiochip_export(struct gpio_chip *chip)
+{
+ return 0;
+}
+
+static inline void gpiochip_unexport(struct gpio_chip *chip)
+{
+}
+
+#endif /* CONFIG_GPIO_SYSFS */
+
/**
* gpiochip_add() - register a gpio_chip
* @chip: the chip to register, with chip->base initialized
* because the chip->base is invalid or already associated with a
* different chip. Otherwise it returns zero as a success code.
*
+ * When gpiochip_add() is called very early during boot, so that GPIOs
+ * can be freely used, the chip->dev device must be registered before
+ * the gpio framework's arch_initcall(). Otherwise sysfs initialization
+ * for GPIOs will fail rudely.
+ *
* If chip->base is negative, this requests dynamic assignment of
* a range of valid GPIOs.
*/
base = gpiochip_find_base(chip->ngpio);
if (base < 0) {
status = base;
- goto fail_unlock;
+ goto unlock;
}
chip->base = base;
}
if (status == 0) {
for (id = base; id < base + chip->ngpio; id++) {
gpio_desc[id].chip = chip;
- gpio_desc[id].flags = 0;
+
+ /* REVISIT: most hardware initializes GPIOs as
+ * inputs (often with pullups enabled) so power
+ * usage is minimized. Linux code should set the
+ * gpio direction first thing; but until it does,
+ * we may expose the wrong direction in sysfs.
+ */
+ gpio_desc[id].flags = !chip->direction_input
+ ? (1 << FLAG_IS_OUT)
+ : 0;
}
}
-fail_unlock:
+unlock:
spin_unlock_irqrestore(&gpio_lock, flags);
+ if (status == 0)
+ status = gpiochip_export(chip);
fail:
/* failures here can mean systems won't boot... */
if (status)
}
spin_unlock_irqrestore(&gpio_lock, flags);
+
+ if (status == 0)
+ gpiochip_unexport(chip);
+
return status;
}
EXPORT_SYMBOL_GPL(gpiochip_remove);
return;
}
+ gpio_unexport(gpio);
+
spin_lock_irqsave(&gpio_lock, flags);
desc = &gpio_desc[gpio];
#ifdef CONFIG_DEBUG_FS
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-
static void gpiolib_dbg_show(struct seq_file *s, struct gpio_chip *chip)
{
unsigned i;
/* REVISIT this isn't locked against gpio_chip removal ... */
for (gpio = 0; gpio_is_valid(gpio); gpio++) {
+ struct device *dev;
+
if (chip == gpio_desc[gpio].chip)
continue;
chip = gpio_desc[gpio].chip;
if (!chip)
continue;
- seq_printf(s, "%sGPIOs %d-%d, %s%s:\n",
+ seq_printf(s, "%sGPIOs %d-%d",
started ? "\n" : "",
- chip->base, chip->base + chip->ngpio - 1,
- chip->label ? : "generic",
- chip->can_sleep ? ", can sleep" : "");
+ chip->base, chip->base + chip->ngpio - 1);
+ dev = chip->dev;
+ if (dev)
+ seq_printf(s, ", %s/%s",
+ dev->bus ? dev->bus->name : "no-bus",
+ dev->bus_id);
+ if (chip->label)
+ seq_printf(s, ", %s", chip->label);
+ if (chip->can_sleep)
+ seq_printf(s, ", can sleep");
+ seq_printf(s, ":\n");
+
started = 1;
if (chip->dbg_show)
chip->dbg_show(s, chip);
--- /dev/null
+/*
+ * max732x.c - I2C Port Expander with 8/16 I/O
+ *
+ * Copyright (C) 2007 Marvell International Ltd.
+ * Copyright (C) 2008 Jack Ren <jack.ren@marvell.com>
+ * Copyright (C) 2008 Eric Miao <eric.miao@marvell.com>
+ *
+ * Derived from drivers/gpio/pca953x.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/gpio.h>
+
+#include <linux/i2c.h>
+#include <linux/i2c/max732x.h>
+
+
+/*
+ * Each port of MAX732x (including MAX7319) falls into one of the
+ * following three types:
+ *
+ * - Push Pull Output
+ * - Input
+ * - Open Drain I/O
+ *
+ * designated by 'O', 'I' and 'P' individually according to MAXIM's
+ * datasheets.
+ *
+ * There are two groups of I/O ports, each group usually includes
+ * up to 8 I/O ports, and is accessed by a specific I2C address:
+ *
+ * - Group A : by I2C address 0b'110xxxx
+ * - Group B : by I2C address 0b'101xxxx
+ *
+ * where 'xxxx' is decided by the connections of pin AD2/AD0. The
+ * address used also affects the initial state of output signals.
+ *
+ * Within each group of ports, there are five known combinations of
+ * I/O ports: 4I4O, 4P4O, 8I, 8P, 8O, see the definitions below for
+ * the detailed organization of these ports.
+ *
+ * GPIO numbers start from 'gpio_base + 0' to 'gpio_base + 8/16',
+ * and GPIOs from GROUP_A are numbered before those from GROUP_B
+ * (if there are two groups).
+ *
+ * NOTE: MAX7328/MAX7329 are drop-in replacements for PCF8574/a, so
+ * they are not supported by this driver.
+ */
+
+#define PORT_NONE 0x0 /* '/' No Port */
+#define PORT_OUTPUT 0x1 /* 'O' Push-Pull, Output Only */
+#define PORT_INPUT 0x2 /* 'I' Input Only */
+#define PORT_OPENDRAIN 0x3 /* 'P' Open-Drain, I/O */
+
+#define IO_4I4O 0x5AA5 /* O7 O6 I5 I4 I3 I2 O1 O0 */
+#define IO_4P4O 0x5FF5 /* O7 O6 P5 P4 P3 P2 O1 O0 */
+#define IO_8I 0xAAAA /* I7 I6 I5 I4 I3 I2 I1 I0 */
+#define IO_8P 0xFFFF /* P7 P6 P5 P4 P3 P2 P1 P0 */
+#define IO_8O 0x5555 /* O7 O6 O5 O4 O3 O2 O1 O0 */
+
+#define GROUP_A(x) ((x) & 0xffff) /* I2C Addr: 0b'110xxxx */
+#define GROUP_B(x) ((x) << 16) /* I2C Addr: 0b'101xxxx */
+
+static const struct i2c_device_id max732x_id[] = {
+ { "max7319", GROUP_A(IO_8I) },
+ { "max7320", GROUP_B(IO_8O) },
+ { "max7321", GROUP_A(IO_8P) },
+ { "max7322", GROUP_A(IO_4I4O) },
+ { "max7323", GROUP_A(IO_4P4O) },
+ { "max7324", GROUP_A(IO_8I) | GROUP_B(IO_8O) },
+ { "max7325", GROUP_A(IO_8P) | GROUP_B(IO_8O) },
+ { "max7326", GROUP_A(IO_4I4O) | GROUP_B(IO_8O) },
+ { "max7327", GROUP_A(IO_4P4O) | GROUP_B(IO_8O) },
+ { },
+};
+MODULE_DEVICE_TABLE(i2c, max732x_id);
+
+struct max732x_chip {
+ struct gpio_chip gpio_chip;
+
+ struct i2c_client *client; /* "main" client */
+ struct i2c_client *client_dummy;
+ struct i2c_client *client_group_a;
+ struct i2c_client *client_group_b;
+
+ unsigned int mask_group_a;
+ unsigned int dir_input;
+ unsigned int dir_output;
+
+ struct mutex lock;
+ uint8_t reg_out[2];
+};
+
+static int max732x_write(struct max732x_chip *chip, int group_a, uint8_t val)
+{
+ struct i2c_client *client;
+ int ret;
+
+ client = group_a ? chip->client_group_a : chip->client_group_b;
+ ret = i2c_smbus_write_byte(client, val);
+ if (ret < 0) {
+ dev_err(&client->dev, "failed writing\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int max732x_read(struct max732x_chip *chip, int group_a, uint8_t *val)
+{
+ struct i2c_client *client;
+ int ret;
+
+ client = group_a ? chip->client_group_a : chip->client_group_b;
+ ret = i2c_smbus_read_byte(client);
+ if (ret < 0) {
+ dev_err(&client->dev, "failed reading\n");
+ return ret;
+ }
+
+ *val = (uint8_t)ret;
+ return 0;
+}
+
+static inline int is_group_a(struct max732x_chip *chip, unsigned off)
+{
+ return (1u << off) & chip->mask_group_a;
+}
+
+static int max732x_gpio_get_value(struct gpio_chip *gc, unsigned off)
+{
+ struct max732x_chip *chip;
+ uint8_t reg_val;
+ int ret;
+
+ chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+ ret = max732x_read(chip, is_group_a(chip, off), ®_val);
+ if (ret < 0)
+ return 0;
+
+ return reg_val & (1u << (off & 0x7));
+}
+
+static void max732x_gpio_set_value(struct gpio_chip *gc, unsigned off, int val)
+{
+ struct max732x_chip *chip;
+ uint8_t reg_out, mask = 1u << (off & 0x7);
+ int ret;
+
+ chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+ mutex_lock(&chip->lock);
+
+ reg_out = (off > 7) ? chip->reg_out[1] : chip->reg_out[0];
+ reg_out = (val) ? reg_out | mask : reg_out & ~mask;
+
+ ret = max732x_write(chip, is_group_a(chip, off), reg_out);
+ if (ret < 0)
+ goto out;
+
+ /* update the shadow register then */
+ if (off > 7)
+ chip->reg_out[1] = reg_out;
+ else
+ chip->reg_out[0] = reg_out;
+out:
+ mutex_unlock(&chip->lock);
+}
+
+static int max732x_gpio_direction_input(struct gpio_chip *gc, unsigned off)
+{
+ struct max732x_chip *chip;
+ unsigned int mask = 1u << off;
+
+ chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+ if ((mask & chip->dir_input) == 0) {
+ dev_dbg(&chip->client->dev, "%s port %d is output only\n",
+ chip->client->name, off);
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+static int max732x_gpio_direction_output(struct gpio_chip *gc,
+ unsigned off, int val)
+{
+ struct max732x_chip *chip;
+ unsigned int mask = 1u << off;
+
+ chip = container_of(gc, struct max732x_chip, gpio_chip);
+
+ if ((mask & chip->dir_output) == 0) {
+ dev_dbg(&chip->client->dev, "%s port %d is input only\n",
+ chip->client->name, off);
+ return -EACCES;
+ }
+
+ max732x_gpio_set_value(gc, off, val);
+ return 0;
+}
+
+static int __devinit max732x_setup_gpio(struct max732x_chip *chip,
+ const struct i2c_device_id *id,
+ unsigned gpio_start)
+{
+ struct gpio_chip *gc = &chip->gpio_chip;
+ uint32_t id_data = id->driver_data;
+ int i, port = 0;
+
+ for (i = 0; i < 16; i++, id_data >>= 2) {
+ unsigned int mask = 1 << port;
+
+ switch (id_data & 0x3) {
+ case PORT_OUTPUT:
+ chip->dir_output |= mask;
+ break;
+ case PORT_INPUT:
+ chip->dir_input |= mask;
+ break;
+ case PORT_OPENDRAIN:
+ chip->dir_output |= mask;
+ chip->dir_input |= mask;
+ break;
+ default:
+ continue;
+ }
+
+ if (i < 8)
+ chip->mask_group_a |= mask;
+ port++;
+ }
+
+ if (chip->dir_input)
+ gc->direction_input = max732x_gpio_direction_input;
+ if (chip->dir_output) {
+ gc->direction_output = max732x_gpio_direction_output;
+ gc->set = max732x_gpio_set_value;
+ }
+ gc->get = max732x_gpio_get_value;
+ gc->can_sleep = 1;
+
+ gc->base = gpio_start;
+ gc->ngpio = port;
+ gc->label = chip->client->name;
+ gc->owner = THIS_MODULE;
+
+ return port;
+}
+
+static int __devinit max732x_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct max732x_platform_data *pdata;
+ struct max732x_chip *chip;
+ struct i2c_client *c;
+ uint16_t addr_a, addr_b;
+ int ret, nr_port;
+
+ pdata = client->dev.platform_data;
+ if (pdata == NULL)
+ return -ENODEV;
+
+ chip = kzalloc(sizeof(struct max732x_chip), GFP_KERNEL);
+ if (chip == NULL)
+ return -ENOMEM;
+ chip->client = client;
+
+ nr_port = max732x_setup_gpio(chip, id, pdata->gpio_base);
+
+ addr_a = (client->addr & 0x0f) | 0x60;
+ addr_b = (client->addr & 0x0f) | 0x50;
+
+ switch (client->addr & 0x70) {
+ case 0x60:
+ chip->client_group_a = client;
+ if (nr_port > 7) {
+ c = i2c_new_dummy(client->adapter, addr_b);
+ chip->client_group_b = chip->client_dummy = c;
+ }
+ break;
+ case 0x50:
+ chip->client_group_b = client;
+ if (nr_port > 7) {
+ c = i2c_new_dummy(client->adapter, addr_a);
+ chip->client_group_a = chip->client_dummy = c;
+ }
+ break;
+ default:
+ dev_err(&client->dev, "invalid I2C address specified %02x\n",
+ client->addr);
+ ret = -EINVAL;
+ goto out_failed;
+ }
+
+ mutex_init(&chip->lock);
+
+ max732x_read(chip, is_group_a(chip, 0), &chip->reg_out[0]);
+ if (nr_port > 7)
+ max732x_read(chip, is_group_a(chip, 8), &chip->reg_out[1]);
+
+ ret = gpiochip_add(&chip->gpio_chip);
+ if (ret)
+ goto out_failed;
+
+ if (pdata->setup) {
+ ret = pdata->setup(client, chip->gpio_chip.base,
+ chip->gpio_chip.ngpio, pdata->context);
+ if (ret < 0)
+ dev_warn(&client->dev, "setup failed, %d\n", ret);
+ }
+
+ i2c_set_clientdata(client, chip);
+ return 0;
+
+out_failed:
+ kfree(chip);
+ return ret;
+}
+
+static int __devexit max732x_remove(struct i2c_client *client)
+{
+ struct max732x_platform_data *pdata = client->dev.platform_data;
+ struct max732x_chip *chip = i2c_get_clientdata(client);
+ int ret;
+
+ if (pdata->teardown) {
+ ret = pdata->teardown(client, chip->gpio_chip.base,
+ chip->gpio_chip.ngpio, pdata->context);
+ if (ret < 0) {
+ dev_err(&client->dev, "%s failed, %d\n",
+ "teardown", ret);
+ return ret;
+ }
+ }
+
+ ret = gpiochip_remove(&chip->gpio_chip);
+ if (ret) {
+ dev_err(&client->dev, "%s failed, %d\n",
+ "gpiochip_remove()", ret);
+ return ret;
+ }
+
+ /* unregister any dummy i2c_client */
+ if (chip->client_dummy)
+ i2c_unregister_device(chip->client_dummy);
+
+ kfree(chip);
+ return 0;
+}
+
+static struct i2c_driver max732x_driver = {
+ .driver = {
+ .name = "max732x",
+ .owner = THIS_MODULE,
+ },
+ .probe = max732x_probe,
+ .remove = __devexit_p(max732x_remove),
+ .id_table = max732x_id,
+};
+
+static int __init max732x_init(void)
+{
+ return i2c_add_driver(&max732x_driver);
+}
+module_init(max732x_init);
+
+static void __exit max732x_exit(void)
+{
+ i2c_del_driver(&max732x_driver);
+}
+module_exit(max732x_exit);
+
+MODULE_AUTHOR("Eric Miao <eric.miao@marvell.com>");
+MODULE_DESCRIPTION("GPIO expander driver for MAX732X");
+MODULE_LICENSE("GPL");
struct spi_device *spi;
u8 addr;
+ u8 cache[11];
/* lock protects the cached values */
struct mutex lock;
- u8 cache[11];
struct gpio_chip chip;
struct work_struct work;
};
+/* A given spi_device can represent up to four mcp23s08 chips
+ * sharing the same chipselect but using different addresses
+ * (e.g. chips #0 and #3 might be populated, but not #1 or $2).
+ * Driver data holds all the per-chip data.
+ */
+struct mcp23s08_driver_data {
+ unsigned ngpio;
+ struct mcp23s08 *mcp[4];
+ struct mcp23s08 chip[];
+};
+
static int mcp23s08_read(struct mcp23s08 *mcp, unsigned reg)
{
u8 tx[2], rx[1];
/*----------------------------------------------------------------------*/
-static int mcp23s08_probe(struct spi_device *spi)
+static int mcp23s08_probe_one(struct spi_device *spi, unsigned addr,
+ unsigned base, unsigned pullups)
{
- struct mcp23s08 *mcp;
- struct mcp23s08_platform_data *pdata;
+ struct mcp23s08_driver_data *data = spi_get_drvdata(spi);
+ struct mcp23s08 *mcp = data->mcp[addr];
int status;
int do_update = 0;
- pdata = spi->dev.platform_data;
- if (!pdata || pdata->slave > 3 || !pdata->base)
- return -ENODEV;
-
- mcp = kzalloc(sizeof *mcp, GFP_KERNEL);
- if (!mcp)
- return -ENOMEM;
-
mutex_init(&mcp->lock);
mcp->spi = spi;
- mcp->addr = 0x40 | (pdata->slave << 1);
+ mcp->addr = 0x40 | (addr << 1);
mcp->chip.label = "mcp23s08",
mcp->chip.set = mcp23s08_set;
mcp->chip.dbg_show = mcp23s08_dbg_show;
- mcp->chip.base = pdata->base;
+ mcp->chip.base = base;
mcp->chip.ngpio = 8;
mcp->chip.can_sleep = 1;
+ mcp->chip.dev = &spi->dev;
mcp->chip.owner = THIS_MODULE;
- spi_set_drvdata(spi, mcp);
-
- /* verify MCP_IOCON.SEQOP = 0, so sequential reads work */
+ /* verify MCP_IOCON.SEQOP = 0, so sequential reads work,
+ * and MCP_IOCON.HAEN = 1, so we work with all chips.
+ */
status = mcp23s08_read(mcp, MCP_IOCON);
if (status < 0)
goto fail;
- if (status & IOCON_SEQOP) {
+ if ((status & IOCON_SEQOP) || !(status & IOCON_HAEN)) {
status &= ~IOCON_SEQOP;
+ status |= IOCON_HAEN;
status = mcp23s08_write(mcp, MCP_IOCON, (u8) status);
if (status < 0)
goto fail;
}
/* configure ~100K pullups */
- status = mcp23s08_write(mcp, MCP_GPPU, pdata->pullups);
+ status = mcp23s08_write(mcp, MCP_GPPU, pullups);
if (status < 0)
goto fail;
tx[1] = MCP_IPOL;
memcpy(&tx[2], &mcp->cache[MCP_IPOL], sizeof(tx) - 2);
status = spi_write_then_read(mcp->spi, tx, sizeof tx, NULL, 0);
-
- /* FIXME check status... */
+ if (status < 0)
+ goto fail;
}
status = gpiochip_add(&mcp->chip);
+fail:
+ if (status < 0)
+ dev_dbg(&spi->dev, "can't setup chip %d, --> %d\n",
+ addr, status);
+ return status;
+}
+
+static int mcp23s08_probe(struct spi_device *spi)
+{
+ struct mcp23s08_platform_data *pdata;
+ unsigned addr;
+ unsigned chips = 0;
+ struct mcp23s08_driver_data *data;
+ int status;
+ unsigned base;
+
+ pdata = spi->dev.platform_data;
+ if (!pdata || !gpio_is_valid(pdata->base))
+ return -ENODEV;
+
+ for (addr = 0; addr < 4; addr++) {
+ if (!pdata->chip[addr].is_present)
+ continue;
+ chips++;
+ }
+ if (!chips)
+ return -ENODEV;
+
+ data = kzalloc(sizeof *data + chips * sizeof(struct mcp23s08),
+ GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ spi_set_drvdata(spi, data);
+
+ base = pdata->base;
+ for (addr = 0; addr < 4; addr++) {
+ if (!pdata->chip[addr].is_present)
+ continue;
+ chips--;
+ data->mcp[addr] = &data->chip[chips];
+ status = mcp23s08_probe_one(spi, addr, base,
+ pdata->chip[addr].pullups);
+ if (status < 0)
+ goto fail;
+ base += 8;
+ }
+ data->ngpio = base - pdata->base;
/* NOTE: these chips have a relatively sane IRQ framework, with
* per-signal masking and level/edge triggering. It's not yet
*/
if (pdata->setup) {
- status = pdata->setup(spi, mcp->chip.base,
- mcp->chip.ngpio, pdata->context);
+ status = pdata->setup(spi,
+ pdata->base, data->ngpio,
+ pdata->context);
if (status < 0)
dev_dbg(&spi->dev, "setup --> %d\n", status);
}
return 0;
fail:
- kfree(mcp);
+ for (addr = 0; addr < 4; addr++) {
+ int tmp;
+
+ if (!data->mcp[addr])
+ continue;
+ tmp = gpiochip_remove(&data->mcp[addr]->chip);
+ if (tmp < 0)
+ dev_err(&spi->dev, "%s --> %d\n", "remove", tmp);
+ }
+ kfree(data);
return status;
}
static int mcp23s08_remove(struct spi_device *spi)
{
- struct mcp23s08 *mcp = spi_get_drvdata(spi);
+ struct mcp23s08_driver_data *data = spi_get_drvdata(spi);
struct mcp23s08_platform_data *pdata = spi->dev.platform_data;
+ unsigned addr;
int status = 0;
if (pdata->teardown) {
status = pdata->teardown(spi,
- mcp->chip.base, mcp->chip.ngpio,
+ pdata->base, data->ngpio,
pdata->context);
if (status < 0) {
dev_err(&spi->dev, "%s --> %d\n", "teardown", status);
}
}
- status = gpiochip_remove(&mcp->chip);
+ for (addr = 0; addr < 4; addr++) {
+ int tmp;
+
+ if (!data->mcp[addr])
+ continue;
+
+ tmp = gpiochip_remove(&data->mcp[addr]->chip);
+ if (tmp < 0) {
+ dev_err(&spi->dev, "%s --> %d\n", "remove", tmp);
+ status = tmp;
+ }
+ }
if (status == 0)
- kfree(mcp);
- else
- dev_err(&spi->dev, "%s --> %d\n", "remove", status);
+ kfree(data);
return status;
}
module_exit(mcp23s08_exit);
MODULE_LICENSE("GPL");
-
gc->base = chip->gpio_start;
gc->ngpio = gpios;
gc->label = chip->client->name;
+ gc->dev = &chip->client->dev;
gc->owner = THIS_MODULE;
}
gpio->chip.base = pdata->gpio_base;
gpio->chip.can_sleep = 1;
+ gpio->chip.dev = &client->dev;
gpio->chip.owner = THIS_MODULE;
/* NOTE: the OnSemi jlc1562b is also largely compatible with
config TPS65010
tristate "TPS6501x Power Management chips"
- depends on HAVE_GPIO_LIB
+ depends on GPIOLIB
default y if MACH_OMAP_H2 || MACH_OMAP_H3 || MACH_OMAP_OSK
help
If you say yes here you get support for the TPS6501x series of
tps->outmask = board->outmask;
tps->chip.label = client->name;
+ tps->chip.dev = &client->dev;
+ tps->chip.owner = THIS_MODULE;
tps->chip.set = tps65010_gpio_set;
tps->chip.direction_output = tps65010_output;
#include <linux/semaphore.h>
#include <linux/slab.h>
#include <linux/hil.h>
-#include <linux/semaphore.h>
#include <asm/io.h>
#include <asm/system.h>
#define ERR(format, arg...) \
printk(KERN_ERR "%s:%s: " format "\n" , __FILE__, __func__ , ## arg)
-#define WARN(format, arg...) \
+#define WARNING(format, arg...) \
printk(KERN_WARNING "%s:%s: " format "\n" , __FILE__, __func__ , ## arg)
#define INFO(format, arg...) \
({ \
int status; \
if ((status = usb_submit_urb(urb, mem_flags)) < 0) { \
- WARN("usb_submit_urb failed,status=%d", status); \
+ WARNING("usb_submit_urb failed,status=%d", status); \
} \
status; \
})
DBG(4,"urb killed status %d", urb->status);
return; // Give up
default:
- WARN("urb status %d",urb->status);
+ WARNING("urb status %d",urb->status);
if (b_out->busy == 0) {
st5481_usb_pipe_reset(adapter, (bcs->channel+1)*2 | USB_DIR_OUT, NULL, NULL);
}
B_L1L2(bcs, PH_DEACTIVATE | INDICATION, NULL);
break;
default:
- WARN("pr %#x\n", pr);
+ WARNING("pr %#x\n", pr);
}
}
DBG(1,"urb killed status %d", urb->status);
break;
default:
- WARN("urb status %d",urb->status);
+ WARNING("urb status %d",urb->status);
if (d_out->busy == 0) {
st5481_usb_pipe_reset(adapter, EP_D_OUT | USB_DIR_OUT, fifo_reseted, adapter);
}
isdnhdlc_out_init(&d_out->hdlc_state, 1, 0);
if (test_and_set_bit(buf_nr, &d_out->busy)) {
- WARN("ep %d urb %d busy %#lx", EP_D_OUT, buf_nr, d_out->busy);
+ WARNING("ep %d urb %d busy %#lx", EP_D_OUT, buf_nr, d_out->busy);
return;
}
urb = d_out->urb[buf_nr];
FsmEvent(&adapter->d_out.fsm, EV_DOUT_START_XMIT, NULL);
break;
default:
- WARN("pr %#x\n", pr);
+ WARNING("pr %#x\n", pr);
break;
}
}
struct ctrl_msg *ctrl_msg;
if ((w_index = fifo_add(&ctrl->msg_fifo.f)) < 0) {
- WARN("control msg FIFO full");
+ WARNING("control msg FIFO full");
return;
}
ctrl_msg = &ctrl->msg_fifo.data[w_index];
DBG(1,"urb killed status %d", urb->status);
return; // Give up
default:
- WARN("urb status %d",urb->status);
+ WARNING("urb status %d",urb->status);
break;
}
}
DBG(2, "urb shutting down with status: %d", urb->status);
return;
default:
- WARN("nonzero urb status received: %d", urb->status);
+ WARNING("nonzero urb status received: %d", urb->status);
goto exit;
}
exit:
status = usb_submit_urb (urb, GFP_ATOMIC);
if (status)
- WARN("usb_submit_urb failed with result %d", status);
+ WARNING("usb_submit_urb failed with result %d", status);
}
/* ======================================================================
DBG(2,"");
if ((status = usb_reset_configuration (dev)) < 0) {
- WARN("reset_configuration failed,status=%d",status);
+ WARNING("reset_configuration failed,status=%d",status);
return status;
}
// Check if the config is sane
if ( altsetting->desc.bNumEndpoints != 7 ) {
- WARN("expecting 7 got %d endpoints!", altsetting->desc.bNumEndpoints);
+ WARNING("expecting 7 got %d endpoints!", altsetting->desc.bNumEndpoints);
return -EINVAL;
}
// Use alternative setting 3 on interface 0 to have 2B+D
if ((status = usb_set_interface (dev, 0, 3)) < 0) {
- WARN("usb_set_interface failed,status=%d",status);
+ WARNING("usb_set_interface failed,status=%d",status);
return status;
}
DBG(1,"urb killed status %d", urb->status);
return; // Give up
default:
- WARN("urb status %d",urb->status);
+ WARNING("urb status %d",urb->status);
break;
}
}
DBG(4,"count=%d",status);
DBG_PACKET(0x400, in->rcvbuf, status);
if (!(skb = dev_alloc_skb(status))) {
- WARN("receive out of memory\n");
+ WARNING("receive out of memory\n");
break;
}
memcpy(skb_put(skb, status), in->rcvbuf, status);
return features;
}
-static void lg_set_features(struct virtio_device *vdev, u32 features)
+static void lg_finalize_features(struct virtio_device *vdev)
{
- unsigned int i;
+ unsigned int i, bits;
struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
/* Second half of bitmap is features we accept. */
u8 *out_features = lg_features(desc) + desc->feature_len;
+ /* Give virtio_ring a chance to accept features. */
+ vring_transport_features(vdev);
+
memset(out_features, 0, desc->feature_len);
- for (i = 0; i < min(desc->feature_len * 8, 32); i++) {
- if (features & (1 << i))
+ bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
+ for (i = 0; i < bits; i++) {
+ if (test_bit(i, vdev->features))
out_features[i / 8] |= (1 << (i % 8));
}
}
/* The ops structure which hooks everything together. */
static struct virtio_config_ops lguest_config_ops = {
.get_features = lg_get_features,
- .set_features = lg_set_features,
+ .finalize_features = lg_finalize_features,
.get = lg_get,
.set = lg_set,
.get_status = lg_get_status,
interface. The device may be connected by PCI or local bus with
varying functions enabled.
+config MFD_SM501_GPIO
+ bool "Export GPIO via GPIO layer"
+ depends on MFD_SM501 && HAVE_GPIO_LIB
+ ---help---
+ This option uses the gpio library layer to export the 64 GPIO
+ lines on the SM501. The platform data is used to supply the
+ base number for the first GPIO line to register.
+
config MFD_ASIC3
bool "Support for Compaq ASIC3"
depends on GENERIC_HARDIRQS && HAVE_GPIO_LIB && ARM
config HTC_EGPIO
bool "HTC EGPIO support"
- depends on GENERIC_HARDIRQS && HAVE_GPIO_LIB && ARM
+ depends on GENERIC_HARDIRQS && GPIOLIB && ARM
help
This driver supports the CPLD egpio chip present on
several HTC phones. It provides basic support for input
config MFD_TC6393XB
bool "Support Toshiba TC6393XB"
- depends on HAVE_GPIO_LIB
+ depends on GPIOLIB
select MFD_CORE
help
Support for Toshiba Mobile IO Controller TC6393XB
ei->chip[i].dev = &(pdev->dev);
chip = &(ei->chip[i].chip);
chip->label = "htc-egpio";
+ chip->dev = &pdev->dev;
+ chip->owner = THIS_MODULE;
chip->get = egpio_get;
chip->set = egpio_set;
chip->direction_input = egpio_direction_input;
return 0;
}
+MODULE_ALIAS("platform:pasic3");
+
static struct platform_driver pasic3_driver = {
.driver = {
.name = "pasic3",
/*
* The driver for the SA11x0 MCP port.
*/
+MODULE_ALIAS("platform:sa11x0-mcp");
+
static struct platform_driver mcp_sa11x0_driver = {
.probe = mcp_sa11x0_probe,
.remove = mcp_sa11x0_remove,
if (ret)
goto fail_device;
- memzero(res, sizeof(res));
+ memset(res, 0, sizeof(res));
for (r = 0; r < cell->num_resources; r++) {
res[r].name = cell->resources[r].name;
res[r].flags = cell->resources[r].flags;
#include <linux/device.h>
#include <linux/platform_device.h>
#include <linux/pci.h>
+#include <linux/i2c-gpio.h>
#include <linux/sm501.h>
#include <linux/sm501-regs.h>
struct platform_device pdev;
};
+struct sm501_gpio;
+
+#ifdef CONFIG_MFD_SM501_GPIO
+#include <linux/gpio.h>
+
+struct sm501_gpio_chip {
+ struct gpio_chip gpio;
+ struct sm501_gpio *ourgpio; /* to get back to parent. */
+ void __iomem *regbase;
+};
+
+struct sm501_gpio {
+ struct sm501_gpio_chip low;
+ struct sm501_gpio_chip high;
+ spinlock_t lock;
+
+ unsigned int registered : 1;
+ void __iomem *regs;
+ struct resource *regs_res;
+};
+#else
+struct sm501_gpio {
+ /* no gpio support, empty definition for sm501_devdata. */
+};
+#endif
+
struct sm501_devdata {
spinlock_t reg_lock;
struct mutex clock_lock;
struct list_head devices;
+ struct sm501_gpio gpio;
struct device *dev;
struct resource *io_res;
struct resource *regs_claim;
struct sm501_platdata *platdata;
+
unsigned int in_suspend;
unsigned long pm_misc;
unsigned int rev;
};
+
#define MHZ (1000 * 1000)
#ifdef DEBUG
EXPORT_SYMBOL_GPL(sm501_modify_reg);
-unsigned long sm501_gpio_get(struct device *dev,
- unsigned long gpio)
-{
- struct sm501_devdata *sm = dev_get_drvdata(dev);
- unsigned long result;
- unsigned long reg;
-
- reg = (gpio > 32) ? SM501_GPIO_DATA_HIGH : SM501_GPIO_DATA_LOW;
- result = readl(sm->regs + reg);
-
- result >>= (gpio & 31);
- return result & 1UL;
-}
-
-EXPORT_SYMBOL_GPL(sm501_gpio_get);
-
-void sm501_gpio_set(struct device *dev,
- unsigned long gpio,
- unsigned int to,
- unsigned int dir)
-{
- struct sm501_devdata *sm = dev_get_drvdata(dev);
-
- unsigned long bit = 1 << (gpio & 31);
- unsigned long base;
- unsigned long save;
- unsigned long val;
-
- base = (gpio > 32) ? SM501_GPIO_DATA_HIGH : SM501_GPIO_DATA_LOW;
- base += SM501_GPIO;
-
- spin_lock_irqsave(&sm->reg_lock, save);
-
- val = readl(sm->regs + base) & ~bit;
- if (to)
- val |= bit;
- writel(val, sm->regs + base);
-
- val = readl(sm->regs + SM501_GPIO_DDR_LOW) & ~bit;
- if (dir)
- val |= bit;
-
- writel(val, sm->regs + SM501_GPIO_DDR_LOW);
- sm501_sync_regs(sm);
-
- spin_unlock_irqrestore(&sm->reg_lock, save);
-
-}
-
-EXPORT_SYMBOL_GPL(sm501_gpio_set);
-
-
/* sm501_unit_power
*
* alters the power active gate to set specific units on or off
return sm501_register_device(sm, pdev);
}
+#ifdef CONFIG_MFD_SM501_GPIO
+
+static inline struct sm501_gpio_chip *to_sm501_gpio(struct gpio_chip *gc)
+{
+ return container_of(gc, struct sm501_gpio_chip, gpio);
+}
+
+static inline struct sm501_devdata *sm501_gpio_to_dev(struct sm501_gpio *gpio)
+{
+ return container_of(gpio, struct sm501_devdata, gpio);
+}
+
+static int sm501_gpio_get(struct gpio_chip *chip, unsigned offset)
+
+{
+ struct sm501_gpio_chip *smgpio = to_sm501_gpio(chip);
+ unsigned long result;
+
+ result = readl(smgpio->regbase + SM501_GPIO_DATA_LOW);
+ result >>= offset;
+
+ return result & 1UL;
+}
+
+static void sm501_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+
+{
+ struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
+ struct sm501_gpio *smgpio = smchip->ourgpio;
+ unsigned long bit = 1 << offset;
+ void __iomem *regs = smchip->regbase;
+ unsigned long save;
+ unsigned long val;
+
+ dev_dbg(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d)\n",
+ __func__, chip, offset);
+
+ spin_lock_irqsave(&smgpio->lock, save);
+
+ val = readl(regs + SM501_GPIO_DATA_LOW) & ~bit;
+ if (value)
+ val |= bit;
+ writel(val, regs);
+
+ sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+ spin_unlock_irqrestore(&smgpio->lock, save);
+}
+
+static int sm501_gpio_input(struct gpio_chip *chip, unsigned offset)
+{
+ struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
+ struct sm501_gpio *smgpio = smchip->ourgpio;
+ void __iomem *regs = smchip->regbase;
+ unsigned long bit = 1 << offset;
+ unsigned long save;
+ unsigned long ddr;
+
+ dev_info(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d)\n",
+ __func__, chip, offset);
+
+ spin_lock_irqsave(&smgpio->lock, save);
+
+ ddr = readl(regs + SM501_GPIO_DDR_LOW);
+ writel(ddr & ~bit, regs + SM501_GPIO_DDR_LOW);
+
+ sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+ spin_unlock_irqrestore(&smgpio->lock, save);
+
+ return 0;
+}
+
+static int sm501_gpio_output(struct gpio_chip *chip,
+ unsigned offset, int value)
+{
+ struct sm501_gpio_chip *smchip = to_sm501_gpio(chip);
+ struct sm501_gpio *smgpio = smchip->ourgpio;
+ unsigned long bit = 1 << offset;
+ void __iomem *regs = smchip->regbase;
+ unsigned long save;
+ unsigned long val;
+ unsigned long ddr;
+
+ dev_dbg(sm501_gpio_to_dev(smgpio)->dev, "%s(%p,%d,%d)\n",
+ __func__, chip, offset, value);
+
+ spin_lock_irqsave(&smgpio->lock, save);
+
+ val = readl(regs + SM501_GPIO_DATA_LOW);
+ if (value)
+ val |= bit;
+ else
+ val &= ~bit;
+ writel(val, regs);
+
+ ddr = readl(regs + SM501_GPIO_DDR_LOW);
+ writel(ddr | bit, regs + SM501_GPIO_DDR_LOW);
+
+ sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+ writel(val, regs + SM501_GPIO_DATA_LOW);
+
+ sm501_sync_regs(sm501_gpio_to_dev(smgpio));
+ spin_unlock_irqrestore(&smgpio->lock, save);
+
+ return 0;
+}
+
+static struct gpio_chip gpio_chip_template = {
+ .ngpio = 32,
+ .direction_input = sm501_gpio_input,
+ .direction_output = sm501_gpio_output,
+ .set = sm501_gpio_set,
+ .get = sm501_gpio_get,
+};
+
+static int __devinit sm501_gpio_register_chip(struct sm501_devdata *sm,
+ struct sm501_gpio *gpio,
+ struct sm501_gpio_chip *chip)
+{
+ struct sm501_platdata *pdata = sm->platdata;
+ struct gpio_chip *gchip = &chip->gpio;
+ int base = pdata->gpio_base;
+
+ chip->gpio = gpio_chip_template;
+
+ if (chip == &gpio->high) {
+ if (base > 0)
+ base += 32;
+ chip->regbase = gpio->regs + SM501_GPIO_DATA_HIGH;
+ gchip->label = "SM501-HIGH";
+ } else {
+ chip->regbase = gpio->regs + SM501_GPIO_DATA_LOW;
+ gchip->label = "SM501-LOW";
+ }
+
+ gchip->base = base;
+ chip->ourgpio = gpio;
+
+ return gpiochip_add(gchip);
+}
+
+static int sm501_register_gpio(struct sm501_devdata *sm)
+{
+ struct sm501_gpio *gpio = &sm->gpio;
+ resource_size_t iobase = sm->io_res->start + SM501_GPIO;
+ int ret;
+ int tmp;
+
+ dev_dbg(sm->dev, "registering gpio block %08llx\n",
+ (unsigned long long)iobase);
+
+ spin_lock_init(&gpio->lock);
+
+ gpio->regs_res = request_mem_region(iobase, 0x20, "sm501-gpio");
+ if (gpio->regs_res == NULL) {
+ dev_err(sm->dev, "gpio: failed to request region\n");
+ return -ENXIO;
+ }
+
+ gpio->regs = ioremap(iobase, 0x20);
+ if (gpio->regs == NULL) {
+ dev_err(sm->dev, "gpio: failed to remap registers\n");
+ ret = -ENXIO;
+ goto err_claimed;
+ }
+
+ /* Register both our chips. */
+
+ ret = sm501_gpio_register_chip(sm, gpio, &gpio->low);
+ if (ret) {
+ dev_err(sm->dev, "failed to add low chip\n");
+ goto err_mapped;
+ }
+
+ ret = sm501_gpio_register_chip(sm, gpio, &gpio->high);
+ if (ret) {
+ dev_err(sm->dev, "failed to add high chip\n");
+ goto err_low_chip;
+ }
+
+ gpio->registered = 1;
+
+ return 0;
+
+ err_low_chip:
+ tmp = gpiochip_remove(&gpio->low.gpio);
+ if (tmp) {
+ dev_err(sm->dev, "cannot remove low chip, cannot tidy up\n");
+ return ret;
+ }
+
+ err_mapped:
+ iounmap(gpio->regs);
+
+ err_claimed:
+ release_resource(gpio->regs_res);
+ kfree(gpio->regs_res);
+
+ return ret;
+}
+
+static void sm501_gpio_remove(struct sm501_devdata *sm)
+{
+ struct sm501_gpio *gpio = &sm->gpio;
+ int ret;
+
+ if (!sm->gpio.registered)
+ return;
+
+ ret = gpiochip_remove(&gpio->low.gpio);
+ if (ret)
+ dev_err(sm->dev, "cannot remove low chip, cannot tidy up\n");
+
+ ret = gpiochip_remove(&gpio->high.gpio);
+ if (ret)
+ dev_err(sm->dev, "cannot remove high chip, cannot tidy up\n");
+
+ iounmap(gpio->regs);
+ release_resource(gpio->regs_res);
+ kfree(gpio->regs_res);
+}
+
+static inline int sm501_gpio_pin2nr(struct sm501_devdata *sm, unsigned int pin)
+{
+ struct sm501_gpio *gpio = &sm->gpio;
+ int base = (pin < 32) ? gpio->low.gpio.base : gpio->high.gpio.base;
+
+ return (pin % 32) + base;
+}
+
+static inline int sm501_gpio_isregistered(struct sm501_devdata *sm)
+{
+ return sm->gpio.registered;
+}
+#else
+static inline int sm501_register_gpio(struct sm501_devdata *sm)
+{
+ return 0;
+}
+
+static inline void sm501_gpio_remove(struct sm501_devdata *sm)
+{
+}
+
+static inline int sm501_gpio_pin2nr(struct sm501_devdata *sm, unsigned int pin)
+{
+ return -1;
+}
+
+static inline int sm501_gpio_isregistered(struct sm501_devdata *sm)
+{
+ return 0;
+}
+#endif
+
+static int sm501_register_gpio_i2c_instance(struct sm501_devdata *sm,
+ struct sm501_platdata_gpio_i2c *iic)
+{
+ struct i2c_gpio_platform_data *icd;
+ struct platform_device *pdev;
+
+ pdev = sm501_create_subdev(sm, "i2c-gpio", 0,
+ sizeof(struct i2c_gpio_platform_data));
+ if (!pdev)
+ return -ENOMEM;
+
+ icd = pdev->dev.platform_data;
+
+ /* We keep the pin_sda and pin_scl fields relative in case the
+ * same platform data is passed to >1 SM501.
+ */
+
+ icd->sda_pin = sm501_gpio_pin2nr(sm, iic->pin_sda);
+ icd->scl_pin = sm501_gpio_pin2nr(sm, iic->pin_scl);
+ icd->timeout = iic->timeout;
+ icd->udelay = iic->udelay;
+
+ /* note, we can't use either of the pin numbers, as the i2c-gpio
+ * driver uses the platform.id field to generate the bus number
+ * to register with the i2c core; The i2c core doesn't have enough
+ * entries to deal with anything we currently use.
+ */
+
+ pdev->id = iic->bus_num;
+
+ dev_info(sm->dev, "registering i2c-%d: sda=%d (%d), scl=%d (%d)\n",
+ iic->bus_num,
+ icd->sda_pin, iic->pin_sda, icd->scl_pin, iic->pin_scl);
+
+ return sm501_register_device(sm, pdev);
+}
+
+static int sm501_register_gpio_i2c(struct sm501_devdata *sm,
+ struct sm501_platdata *pdata)
+{
+ struct sm501_platdata_gpio_i2c *iic = pdata->gpio_i2c;
+ int index;
+ int ret;
+
+ for (index = 0; index < pdata->gpio_i2c_nr; index++, iic++) {
+ ret = sm501_register_gpio_i2c_instance(sm, iic);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
/* sm501_dbg_regs
*
* Debug attribute to attach to parent device to show core registers
static int sm501_init_dev(struct sm501_devdata *sm)
{
struct sm501_initdata *idata;
+ struct sm501_platdata *pdata;
resource_size_t mem_avail;
unsigned long dramctrl;
unsigned long devid;
/* check to see if we have some device initialisation */
- idata = sm->platdata ? sm->platdata->init : NULL;
+ pdata = sm->platdata;
+ idata = pdata ? pdata->init : NULL;
+
if (idata) {
sm501_init_regs(sm, idata);
sm501_register_usbhost(sm, &mem_avail);
if (idata->devices & (SM501_USE_UART0 | SM501_USE_UART1))
sm501_register_uart(sm, idata->devices);
+ if (idata->devices & SM501_USE_GPIO)
+ sm501_register_gpio(sm);
+ }
+
+ if (pdata->gpio_i2c != NULL && pdata->gpio_i2c_nr > 0) {
+ if (!sm501_gpio_isregistered(sm))
+ dev_err(sm->dev, "no gpio available for i2c gpio.\n");
+ else
+ sm501_register_gpio_i2c(sm, pdata);
}
ret = sm501_check_clocks(sm);
}
#ifdef CONFIG_PM
+
/* power management support */
+static void sm501_set_power(struct sm501_devdata *sm, int on)
+{
+ struct sm501_platdata *pd = sm->platdata;
+
+ if (pd == NULL)
+ return;
+
+ if (pd->get_power) {
+ if (pd->get_power(sm->dev) == on) {
+ dev_dbg(sm->dev, "is already %d\n", on);
+ return;
+ }
+ }
+
+ if (pd->set_power) {
+ dev_dbg(sm->dev, "setting power to %d\n", on);
+
+ pd->set_power(sm->dev, on);
+ sm501_mdelay(sm, 10);
+ }
+}
+
static int sm501_plat_suspend(struct platform_device *pdev, pm_message_t state)
{
struct sm501_devdata *sm = platform_get_drvdata(pdev);
sm->pm_misc = readl(sm->regs + SM501_MISC_CONTROL);
sm501_dump_regs(sm);
+
+ if (sm->platdata) {
+ if (sm->platdata->flags & SM501_FLAG_SUSPEND_OFF)
+ sm501_set_power(sm, 0);
+ }
+
return 0;
}
{
struct sm501_devdata *sm = platform_get_drvdata(pdev);
+ sm501_set_power(sm, 1);
+
sm501_dump_regs(sm);
sm501_dump_gate(sm);
sm501_dump_clk(sm);
static struct sm501_platdata sm501_pci_platdata = {
.init = &sm501_pci_initdata,
.fb = &sm501_fb_pdata,
+ .gpio_base = -1,
};
static int sm501_pci_probe(struct pci_dev *dev,
sm501_remove_sub(sm, smdev);
device_remove_file(sm->dev, &dev_attr_dbg_regs);
+
+ sm501_gpio_remove(sm);
}
static void sm501_pci_remove(struct pci_dev *dev)
.remove = sm501_pci_remove,
};
+MODULE_ALIAS("platform:sm501");
+
static struct platform_driver sm501_plat_drv = {
.driver = {
.name = "sm501",
for your IBM server.
config PHANTOM
- tristate "Sensable PHANToM"
+ tristate "Sensable PHANToM (PCI)"
depends on PCI
help
Say Y here if you want to build a driver for Sensable PHANToM device.
+ This driver is only for PCI PHANToMs.
+
If you choose to build module, its name will be phantom. If unsure,
say N here.
This is a driver for the WMI extensions (wireless and bluetooth power
control) of the HP Compaq TC1100 tablet.
+config HP_WMI
+ tristate "HP WMI extras"
+ depends on ACPI_WMI
+ depends on INPUT
+ depends on RFKILL
+ help
+ Say Y here if you want to support WMI-based hotkeys on HP laptops and
+ to read data from WMI such as docking or ambient light sensor state.
+
+ To compile this driver as a module, choose M here: the module will
+ be called hp-wmi.
+
config MSI_LAPTOP
tristate "MSI Laptop Extras"
depends on X86
config HP_ILO
tristate "Channel interface driver for HP iLO/iLO2 processor"
+ depends on PCI
default n
help
The channel interface driver allows applications to communicate
obj-$(CONFIG_ATMEL_PWM) += atmel_pwm.o
obj-$(CONFIG_ATMEL_SSC) += atmel-ssc.o
obj-$(CONFIG_ATMEL_TCLIB) += atmel_tclib.o
+obj-$(CONFIG_HP_WMI) += hp-wmi.o
obj-$(CONFIG_TC1100_WMI) += tc1100-wmi.o
obj-$(CONFIG_LKDTM) += lkdtm.o
obj-$(CONFIG_TIFM_CORE) += tifm_core.o
--- /dev/null
+/*
+ * HP WMI hotkeys
+ *
+ * Copyright (C) 2008 Red Hat <mjg@redhat.com>
+ *
+ * Portions based on wistron_btns.c:
+ * Copyright (C) 2005 Miloslav Trmac <mitr@volny.cz>
+ * Copyright (C) 2005 Bernhard Rosenkraenzer <bero@arklinux.org>
+ * Copyright (C) 2005 Dmitry Torokhov <dtor@mail.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/input.h>
+#include <acpi/acpi_drivers.h>
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+#include <linux/rfkill.h>
+#include <linux/string.h>
+
+MODULE_AUTHOR("Matthew Garrett <mjg59@srcf.ucam.org>");
+MODULE_DESCRIPTION("HP laptop WMI hotkeys driver");
+MODULE_LICENSE("GPL");
+
+MODULE_ALIAS("wmi:95F24279-4D7B-4334-9387-ACCDC67EF61C");
+MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4");
+
+#define HPWMI_EVENT_GUID "95F24279-4D7B-4334-9387-ACCDC67EF61C"
+#define HPWMI_BIOS_GUID "5FB7F034-2C63-45e9-BE91-3D44E2C707E4"
+
+#define HPWMI_DISPLAY_QUERY 0x1
+#define HPWMI_HDDTEMP_QUERY 0x2
+#define HPWMI_ALS_QUERY 0x3
+#define HPWMI_DOCK_QUERY 0x4
+#define HPWMI_WIRELESS_QUERY 0x5
+
+static int __init hp_wmi_bios_setup(struct platform_device *device);
+static int __exit hp_wmi_bios_remove(struct platform_device *device);
+
+struct bios_args {
+ u32 signature;
+ u32 command;
+ u32 commandtype;
+ u32 datasize;
+ u32 data;
+};
+
+struct bios_return {
+ u32 sigpass;
+ u32 return_code;
+ u32 value;
+};
+
+struct key_entry {
+ char type; /* See KE_* below */
+ u8 code;
+ u16 keycode;
+};
+
+enum { KE_KEY, KE_SW, KE_END };
+
+static struct key_entry hp_wmi_keymap[] = {
+ {KE_SW, 0x01, SW_DOCK},
+ {KE_KEY, 0x02, KEY_BRIGHTNESSUP},
+ {KE_KEY, 0x03, KEY_BRIGHTNESSDOWN},
+ {KE_KEY, 0x04, KEY_HELP},
+ {KE_END, 0}
+};
+
+static struct input_dev *hp_wmi_input_dev;
+static struct platform_device *hp_wmi_platform_dev;
+
+static struct rfkill *wifi_rfkill;
+static struct rfkill *bluetooth_rfkill;
+static struct rfkill *wwan_rfkill;
+
+static struct platform_driver hp_wmi_driver = {
+ .driver = {
+ .name = "hp-wmi",
+ .owner = THIS_MODULE,
+ },
+ .probe = hp_wmi_bios_setup,
+ .remove = hp_wmi_bios_remove,
+};
+
+static int hp_wmi_perform_query(int query, int write, int value)
+{
+ struct bios_return bios_return;
+ acpi_status status;
+ union acpi_object *obj;
+ struct bios_args args = {
+ .signature = 0x55434553,
+ .command = write ? 0x2 : 0x1,
+ .commandtype = query,
+ .datasize = write ? 0x4 : 0,
+ .data = value,
+ };
+ struct acpi_buffer input = { sizeof(struct bios_args), &args };
+ struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
+
+ status = wmi_evaluate_method(HPWMI_BIOS_GUID, 0, 0x3, &input, &output);
+
+ obj = output.pointer;
+
+ if (!obj || obj->type != ACPI_TYPE_BUFFER)
+ return -EINVAL;
+
+ bios_return = *((struct bios_return *)obj->buffer.pointer);
+ if (bios_return.return_code > 0)
+ return bios_return.return_code * -1;
+ else
+ return bios_return.value;
+}
+
+static int hp_wmi_display_state(void)
+{
+ return hp_wmi_perform_query(HPWMI_DISPLAY_QUERY, 0, 0);
+}
+
+static int hp_wmi_hddtemp_state(void)
+{
+ return hp_wmi_perform_query(HPWMI_HDDTEMP_QUERY, 0, 0);
+}
+
+static int hp_wmi_als_state(void)
+{
+ return hp_wmi_perform_query(HPWMI_ALS_QUERY, 0, 0);
+}
+
+static int hp_wmi_dock_state(void)
+{
+ return hp_wmi_perform_query(HPWMI_DOCK_QUERY, 0, 0);
+}
+
+static int hp_wmi_wifi_set(void *data, enum rfkill_state state)
+{
+ if (state)
+ return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x101);
+ else
+ return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x100);
+}
+
+static int hp_wmi_bluetooth_set(void *data, enum rfkill_state state)
+{
+ if (state)
+ return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x202);
+ else
+ return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x200);
+}
+
+static int hp_wmi_wwan_set(void *data, enum rfkill_state state)
+{
+ if (state)
+ return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x404);
+ else
+ return hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 1, 0x400);
+}
+
+static int hp_wmi_wifi_state(void)
+{
+ int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0);
+
+ if (wireless & 0x100)
+ return 1;
+ else
+ return 0;
+}
+
+static int hp_wmi_bluetooth_state(void)
+{
+ int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0);
+
+ if (wireless & 0x10000)
+ return 1;
+ else
+ return 0;
+}
+
+static int hp_wmi_wwan_state(void)
+{
+ int wireless = hp_wmi_perform_query(HPWMI_WIRELESS_QUERY, 0, 0);
+
+ if (wireless & 0x1000000)
+ return 1;
+ else
+ return 0;
+}
+
+static ssize_t show_display(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ int value = hp_wmi_display_state();
+ if (value < 0)
+ return -EINVAL;
+ return sprintf(buf, "%d\n", value);
+}
+
+static ssize_t show_hddtemp(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ int value = hp_wmi_hddtemp_state();
+ if (value < 0)
+ return -EINVAL;
+ return sprintf(buf, "%d\n", value);
+}
+
+static ssize_t show_als(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ int value = hp_wmi_als_state();
+ if (value < 0)
+ return -EINVAL;
+ return sprintf(buf, "%d\n", value);
+}
+
+static ssize_t show_dock(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ int value = hp_wmi_dock_state();
+ if (value < 0)
+ return -EINVAL;
+ return sprintf(buf, "%d\n", value);
+}
+
+static ssize_t set_als(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ u32 tmp = simple_strtoul(buf, NULL, 10);
+ hp_wmi_perform_query(HPWMI_ALS_QUERY, 1, tmp);
+ return count;
+}
+
+static DEVICE_ATTR(display, S_IRUGO, show_display, NULL);
+static DEVICE_ATTR(hddtemp, S_IRUGO, show_hddtemp, NULL);
+static DEVICE_ATTR(als, S_IRUGO | S_IWUSR, show_als, set_als);
+static DEVICE_ATTR(dock, S_IRUGO, show_dock, NULL);
+
+static struct key_entry *hp_wmi_get_entry_by_scancode(int code)
+{
+ struct key_entry *key;
+
+ for (key = hp_wmi_keymap; key->type != KE_END; key++)
+ if (code == key->code)
+ return key;
+
+ return NULL;
+}
+
+static struct key_entry *hp_wmi_get_entry_by_keycode(int keycode)
+{
+ struct key_entry *key;
+
+ for (key = hp_wmi_keymap; key->type != KE_END; key++)
+ if (key->type == KE_KEY && keycode == key->keycode)
+ return key;
+
+ return NULL;
+}
+
+static int hp_wmi_getkeycode(struct input_dev *dev, int scancode, int *keycode)
+{
+ struct key_entry *key = hp_wmi_get_entry_by_scancode(scancode);
+
+ if (key && key->type == KE_KEY) {
+ *keycode = key->keycode;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int hp_wmi_setkeycode(struct input_dev *dev, int scancode, int keycode)
+{
+ struct key_entry *key;
+ int old_keycode;
+
+ if (keycode < 0 || keycode > KEY_MAX)
+ return -EINVAL;
+
+ key = hp_wmi_get_entry_by_scancode(scancode);
+ if (key && key->type == KE_KEY) {
+ old_keycode = key->keycode;
+ key->keycode = keycode;
+ set_bit(keycode, dev->keybit);
+ if (!hp_wmi_get_entry_by_keycode(old_keycode))
+ clear_bit(old_keycode, dev->keybit);
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+void hp_wmi_notify(u32 value, void *context)
+{
+ struct acpi_buffer response = { ACPI_ALLOCATE_BUFFER, NULL };
+ static struct key_entry *key;
+ union acpi_object *obj;
+
+ wmi_get_event_data(value, &response);
+
+ obj = (union acpi_object *)response.pointer;
+
+ if (obj && obj->type == ACPI_TYPE_BUFFER && obj->buffer.length == 8) {
+ int eventcode = *((u8 *) obj->buffer.pointer);
+ key = hp_wmi_get_entry_by_scancode(eventcode);
+ if (key) {
+ switch (key->type) {
+ case KE_KEY:
+ input_report_key(hp_wmi_input_dev,
+ key->keycode, 1);
+ input_sync(hp_wmi_input_dev);
+ input_report_key(hp_wmi_input_dev,
+ key->keycode, 0);
+ input_sync(hp_wmi_input_dev);
+ break;
+ case KE_SW:
+ input_report_switch(hp_wmi_input_dev,
+ key->keycode,
+ hp_wmi_dock_state());
+ input_sync(hp_wmi_input_dev);
+ break;
+ }
+ } else if (eventcode == 0x5) {
+ if (wifi_rfkill)
+ wifi_rfkill->state = hp_wmi_wifi_state();
+ if (bluetooth_rfkill)
+ bluetooth_rfkill->state =
+ hp_wmi_bluetooth_state();
+ if (wwan_rfkill)
+ wwan_rfkill->state = hp_wmi_wwan_state();
+ } else
+ printk(KERN_INFO "HP WMI: Unknown key pressed - %x\n",
+ eventcode);
+ } else
+ printk(KERN_INFO "HP WMI: Unknown response received\n");
+}
+
+static int __init hp_wmi_input_setup(void)
+{
+ struct key_entry *key;
+ int err;
+
+ hp_wmi_input_dev = input_allocate_device();
+
+ hp_wmi_input_dev->name = "HP WMI hotkeys";
+ hp_wmi_input_dev->phys = "wmi/input0";
+ hp_wmi_input_dev->id.bustype = BUS_HOST;
+ hp_wmi_input_dev->getkeycode = hp_wmi_getkeycode;
+ hp_wmi_input_dev->setkeycode = hp_wmi_setkeycode;
+
+ for (key = hp_wmi_keymap; key->type != KE_END; key++) {
+ switch (key->type) {
+ case KE_KEY:
+ set_bit(EV_KEY, hp_wmi_input_dev->evbit);
+ set_bit(key->keycode, hp_wmi_input_dev->keybit);
+ break;
+ case KE_SW:
+ set_bit(EV_SW, hp_wmi_input_dev->evbit);
+ set_bit(key->keycode, hp_wmi_input_dev->swbit);
+ break;
+ }
+ }
+
+ err = input_register_device(hp_wmi_input_dev);
+
+ if (err) {
+ input_free_device(hp_wmi_input_dev);
+ return err;
+ }
+
+ return 0;
+}
+
+static void cleanup_sysfs(struct platform_device *device)
+{
+ device_remove_file(&device->dev, &dev_attr_display);
+ device_remove_file(&device->dev, &dev_attr_hddtemp);
+ device_remove_file(&device->dev, &dev_attr_als);
+ device_remove_file(&device->dev, &dev_attr_dock);
+}
+
+static int __init hp_wmi_bios_setup(struct platform_device *device)
+{
+ int err;
+
+ err = device_create_file(&device->dev, &dev_attr_display);
+ if (err)
+ goto add_sysfs_error;
+ err = device_create_file(&device->dev, &dev_attr_hddtemp);
+ if (err)
+ goto add_sysfs_error;
+ err = device_create_file(&device->dev, &dev_attr_als);
+ if (err)
+ goto add_sysfs_error;
+ err = device_create_file(&device->dev, &dev_attr_dock);
+ if (err)
+ goto add_sysfs_error;
+
+ wifi_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WLAN);
+ wifi_rfkill->name = "hp-wifi";
+ wifi_rfkill->state = hp_wmi_wifi_state();
+ wifi_rfkill->toggle_radio = hp_wmi_wifi_set;
+ wifi_rfkill->user_claim_unsupported = 1;
+
+ bluetooth_rfkill = rfkill_allocate(&device->dev,
+ RFKILL_TYPE_BLUETOOTH);
+ bluetooth_rfkill->name = "hp-bluetooth";
+ bluetooth_rfkill->state = hp_wmi_bluetooth_state();
+ bluetooth_rfkill->toggle_radio = hp_wmi_bluetooth_set;
+ bluetooth_rfkill->user_claim_unsupported = 1;
+
+ wwan_rfkill = rfkill_allocate(&device->dev, RFKILL_TYPE_WIMAX);
+ wwan_rfkill->name = "hp-wwan";
+ wwan_rfkill->state = hp_wmi_wwan_state();
+ wwan_rfkill->toggle_radio = hp_wmi_wwan_set;
+ wwan_rfkill->user_claim_unsupported = 1;
+
+ rfkill_register(wifi_rfkill);
+ rfkill_register(bluetooth_rfkill);
+ rfkill_register(wwan_rfkill);
+
+ return 0;
+add_sysfs_error:
+ cleanup_sysfs(device);
+ return err;
+}
+
+static int __exit hp_wmi_bios_remove(struct platform_device *device)
+{
+ cleanup_sysfs(device);
+
+ rfkill_unregister(wifi_rfkill);
+ rfkill_unregister(bluetooth_rfkill);
+ rfkill_unregister(wwan_rfkill);
+
+ return 0;
+}
+
+static int __init hp_wmi_init(void)
+{
+ int err;
+
+ if (wmi_has_guid(HPWMI_EVENT_GUID)) {
+ err = wmi_install_notify_handler(HPWMI_EVENT_GUID,
+ hp_wmi_notify, NULL);
+ if (!err)
+ hp_wmi_input_setup();
+ }
+
+ if (wmi_has_guid(HPWMI_BIOS_GUID)) {
+ err = platform_driver_register(&hp_wmi_driver);
+ if (err)
+ return 0;
+ hp_wmi_platform_dev = platform_device_alloc("hp-wmi", -1);
+ if (!hp_wmi_platform_dev) {
+ platform_driver_unregister(&hp_wmi_driver);
+ return 0;
+ }
+ platform_device_add(hp_wmi_platform_dev);
+ }
+
+ return 0;
+}
+
+static void __exit hp_wmi_exit(void)
+{
+ if (wmi_has_guid(HPWMI_EVENT_GUID)) {
+ wmi_remove_notify_handler(HPWMI_EVENT_GUID);
+ input_unregister_device(hp_wmi_input_dev);
+ }
+ if (hp_wmi_platform_dev) {
+ platform_device_del(hp_wmi_platform_dev);
+ platform_driver_unregister(&hp_wmi_driver);
+ }
+}
+
+module_init(hp_wmi_init);
+module_exit(hp_wmi_exit);
module_exit(phantom_exit);
MODULE_AUTHOR("Jiri Slaby <jirislaby@gmail.com>");
-MODULE_DESCRIPTION("Sensable Phantom driver");
+MODULE_DESCRIPTION("Sensable Phantom driver (PCI devices)");
MODULE_LICENSE("GPL");
MODULE_VERSION(PHANTOM_VERSION);
* @name: MTD device name or number string
* @vid_hdr_offs: VID header offset
*/
-struct mtd_dev_param
-{
+struct mtd_dev_param {
char name[MTD_PARAM_LEN_MAX];
int vid_hdr_offs;
};
/* Numbers of elements set in the @mtd_dev_param array */
-static int mtd_devs = 0;
+static int mtd_devs;
/* MTD devices specification parameters */
static struct mtd_dev_param mtd_dev_param[UBI_MAX_DEVICES];
}
/**
- * ubi_get_by_major - get UBI device description object by character device
- * major number.
+ * ubi_get_by_major - get UBI device by character device major number.
* @major: major number
*
* This function is similar to 'ubi_get_device()', but it searches the device
ubi_free_volume(ubi, ubi->volumes[i]);
}
+/**
+ * free_user_volumes - free all user volumes.
+ * @ubi: UBI device description object
+ *
+ * Normally the volumes are freed at the release function of the volume device
+ * objects. However, on error paths the volumes have to be freed before the
+ * device objects have been initialized.
+ */
+static void free_user_volumes(struct ubi_device *ubi)
+{
+ int i;
+
+ for (i = 0; i < ubi->vtbl_slots; i++)
+ if (ubi->volumes[i]) {
+ kfree(ubi->volumes[i]->eba_tbl);
+ kfree(ubi->volumes[i]);
+ }
+}
+
/**
* uif_init - initialize user interfaces for an UBI device.
* @ubi: UBI device description object
*
* This function returns zero in case of success and a negative error code in
- * case of failure.
+ * case of failure. Note, this function destroys all volumes if it failes.
*/
static int uif_init(struct ubi_device *ubi)
{
- int i, err;
+ int i, err, do_free = 0;
dev_t dev;
sprintf(ubi->ubi_name, UBI_NAME_STR "%d", ubi->ubi_num);
ubi_assert(MINOR(dev) == 0);
cdev_init(&ubi->cdev, &ubi_cdev_operations);
- dbg_msg("%s major is %u", ubi->ubi_name, MAJOR(dev));
+ dbg_gen("%s major is %u", ubi->ubi_name, MAJOR(dev));
ubi->cdev.owner = THIS_MODULE;
err = cdev_add(&ubi->cdev, dev, 1);
out_volumes:
kill_volumes(ubi);
+ do_free = 0;
out_sysfs:
ubi_sysfs_close(ubi);
cdev_del(&ubi->cdev);
out_unreg:
+ if (do_free)
+ free_user_volumes(ubi);
unregister_chrdev_region(ubi->cdev.dev, ubi->vtbl_slots + 1);
ubi_err("cannot initialize UBI %s, error %d", ubi->ubi_name, err);
return err;
/**
* uif_close - close user interfaces for an UBI device.
* @ubi: UBI device description object
+ *
+ * Note, since this function un-registers UBI volume device objects (@vol->dev),
+ * the memory allocated voe the volumes is freed as well (in the release
+ * function).
*/
static void uif_close(struct ubi_device *ubi)
{
unregister_chrdev_region(ubi->cdev.dev, ubi->vtbl_slots + 1);
}
+/**
+ * free_internal_volumes - free internal volumes.
+ * @ubi: UBI device description object
+ */
+static void free_internal_volumes(struct ubi_device *ubi)
+{
+ int i;
+
+ for (i = ubi->vtbl_slots;
+ i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) {
+ kfree(ubi->volumes[i]->eba_tbl);
+ kfree(ubi->volumes[i]);
+ }
+}
+
/**
* attach_by_scanning - attach an MTD device using scanning method.
* @ubi: UBI device descriptor
out_wl:
ubi_wl_close(ubi);
out_vtbl:
+ free_internal_volumes(ubi);
vfree(ubi->vtbl);
out_si:
ubi_scan_destroy_si(si);
}
/**
- * io_init - initialize I/O unit for a given UBI device.
+ * io_init - initialize I/O sub-system for a given UBI device.
* @ubi: UBI device description object
*
* If @ubi->vid_hdr_offset or @ubi->leb_start is zero, default offsets are
ubi->min_io_size = ubi->mtd->writesize;
ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft;
- /* Make sure minimal I/O unit is power of 2 */
+ /*
+ * Make sure minimal I/O unit is power of 2. Note, there is no
+ * fundamental reason for this assumption. It is just an optimization
+ * which allows us to avoid costly division operations.
+ */
if (!is_power_of_2(ubi->min_io_size)) {
ubi_err("min. I/O unit (%d) is not power of 2",
ubi->min_io_size);
if (ubi->vid_hdr_offset < UBI_EC_HDR_SIZE ||
ubi->leb_start < ubi->vid_hdr_offset + UBI_VID_HDR_SIZE ||
ubi->leb_start > ubi->peb_size - UBI_VID_HDR_SIZE ||
- ubi->leb_start % ubi->min_io_size) {
+ ubi->leb_start & (ubi->min_io_size - 1)) {
ubi_err("bad VID header (%d) or data offsets (%d)",
ubi->vid_hdr_offset, ubi->leb_start);
return -EINVAL;
/*
* Clear the auto-resize flag in the volume in-memory copy of the
- * volume table, and 'ubi_resize_volume()' will propogate this change
+ * volume table, and 'ubi_resize_volume()' will propagate this change
* to the flash.
*/
ubi->vtbl[vol_id].flags &= ~UBI_VTBL_AUTORESIZE_FLG;
struct ubi_vtbl_record vtbl_rec;
/*
- * No avalilable PEBs to re-size the volume, clear the flag on
+ * No available PEBs to re-size the volume, clear the flag on
* flash and exit.
*/
memcpy(&vtbl_rec, &ubi->vtbl[vol_id],
/**
* ubi_attach_mtd_dev - attach an MTD device.
- * @mtd_dev: MTD device description object
+ * @mtd: MTD device description object
* @ubi_num: number to assign to the new UBI device
* @vid_hdr_offset: VID header offset
*
* This function attaches MTD device @mtd_dev to UBI and assign @ubi_num number
* to the newly created UBI device, unless @ubi_num is %UBI_DEV_NUM_AUTO, in
- * which case this function finds a vacant device nubert and assings it
+ * which case this function finds a vacant device number and assigns it
* automatically. Returns the new UBI device number in case of success and a
* negative error code in case of failure.
*
int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
{
struct ubi_device *ubi;
- int i, err;
+ int i, err, do_free = 1;
/*
* Check if we already have the same MTD device attached.
if (!ubi_devices[ubi_num])
break;
if (ubi_num == UBI_MAX_DEVICES) {
- dbg_err("only %d UBI devices may be created", UBI_MAX_DEVICES);
+ dbg_err("only %d UBI devices may be created",
+ UBI_MAX_DEVICES);
return -ENFILE;
}
} else {
mutex_init(&ubi->buf_mutex);
mutex_init(&ubi->ckvol_mutex);
+ mutex_init(&ubi->mult_mutex);
mutex_init(&ubi->volumes_mutex);
spin_lock_init(&ubi->volumes_lock);
err = uif_init(ubi);
if (err)
- goto out_detach;
+ goto out_nofree;
ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name);
if (IS_ERR(ubi->bgt_thread)) {
ubi->beb_rsvd_pebs);
ubi_msg("max/mean erase counter: %d/%d", ubi->max_ec, ubi->mean_ec);
- /* Enable the background thread */
- if (!DBG_DISABLE_BGT) {
+ if (!DBG_DISABLE_BGT)
ubi->thread_enabled = 1;
- wake_up_process(ubi->bgt_thread);
- }
+ wake_up_process(ubi->bgt_thread);
ubi_devices[ubi_num] = ubi;
return ubi_num;
out_uif:
uif_close(ubi);
+out_nofree:
+ do_free = 0;
out_detach:
- ubi_eba_close(ubi);
ubi_wl_close(ubi);
+ if (do_free)
+ free_user_volumes(ubi);
+ free_internal_volumes(ubi);
vfree(ubi->vtbl);
out_free:
vfree(ubi->peb_buf1);
kthread_stop(ubi->bgt_thread);
uif_close(ubi);
- ubi_eba_close(ubi);
ubi_wl_close(ubi);
+ free_internal_volumes(ubi);
vfree(ubi->vtbl);
put_mtd_device(ubi->mtd);
vfree(ubi->peb_buf1);
module_exit(ubi_exit);
/**
- * bytes_str_to_int - convert a string representing number of bytes to an
- * integer.
+ * bytes_str_to_int - convert a number of bytes string into an integer.
* @str: the string to convert
*
* This function returns positive resulting integer in case of success and a
#include <linux/stat.h>
#include <linux/ioctl.h>
#include <linux/capability.h>
+#include <linux/uaccess.h>
#include <linux/smp_lock.h>
#include <mtd/ubi-user.h>
-#include <asm/uaccess.h>
#include <asm/div64.h>
#include "ubi.h"
else
mode = UBI_READONLY;
- dbg_msg("open volume %d, mode %d", vol_id, mode);
+ dbg_gen("open volume %d, mode %d", vol_id, mode);
desc = ubi_open_volume(ubi_num, vol_id, mode);
unlock_kernel();
struct ubi_volume_desc *desc = file->private_data;
struct ubi_volume *vol = desc->vol;
- dbg_msg("release volume %d, mode %d", vol->vol_id, desc->mode);
+ dbg_gen("release volume %d, mode %d", vol->vol_id, desc->mode);
if (vol->updating) {
ubi_warn("update of volume %d not finished, volume is damaged",
vol->updating = 0;
vfree(vol->upd_buf);
} else if (vol->changing_leb) {
- dbg_msg("only %lld of %lld bytes received for atomic LEB change"
+ dbg_gen("only %lld of %lld bytes received for atomic LEB change"
" for volume %d:%d, cancel", vol->upd_received,
vol->upd_bytes, vol->ubi->ubi_num, vol->vol_id);
vol->changing_leb = 0;
return -EINVAL;
}
- dbg_msg("seek volume %d, offset %lld, origin %d, new offset %lld",
+ dbg_gen("seek volume %d, offset %lld, origin %d, new offset %lld",
vol->vol_id, offset, origin, new_offset);
file->f_pos = new_offset;
void *tbuf;
uint64_t tmp;
- dbg_msg("read %zd bytes from offset %lld of volume %d",
+ dbg_gen("read %zd bytes from offset %lld of volume %d",
count, *offp, vol->vol_id);
if (vol->updating) {
return 0;
if (vol->corrupted)
- dbg_msg("read from corrupted volume %d", vol->vol_id);
+ dbg_gen("read from corrupted volume %d", vol->vol_id);
if (*offp + count > vol->used_bytes)
count_save = count = vol->used_bytes - *offp;
char *tbuf;
uint64_t tmp;
- dbg_msg("requested: write %zd bytes to offset %lld of volume %u",
+ dbg_gen("requested: write %zd bytes to offset %lld of volume %u",
count, *offp, vol->vol_id);
if (vol->vol_type == UBI_STATIC_VOLUME)
off = do_div(tmp, vol->usable_leb_size);
lnum = tmp;
- if (off % ubi->min_io_size) {
+ if (off & (ubi->min_io_size - 1)) {
dbg_err("unaligned position");
return -EINVAL;
}
count_save = count = vol->used_bytes - *offp;
/* We can write only in fractions of the minimum I/O unit */
- if (count % ubi->min_io_size) {
+ if (count & (ubi->min_io_size - 1)) {
dbg_err("unaligned write length");
return -EINVAL;
}
}
#else
-#define vol_cdev_direct_write(file, buf, count, offp) -EPERM
+#define vol_cdev_direct_write(file, buf, count, offp) (-EPERM)
#endif /* CONFIG_MTD_UBI_DEBUG_USERSPACE_IO */
static ssize_t vol_cdev_write(struct file *file, const char __user *buf,
break;
}
- rsvd_bytes = vol->reserved_pebs * (ubi->leb_size-vol->data_pad);
+ rsvd_bytes = (long long)vol->reserved_pebs *
+ ubi->leb_size-vol->data_pad;
if (bytes < 0 || bytes > rsvd_bytes) {
err = -EINVAL;
break;
break;
}
- dbg_msg("erase LEB %d:%d", vol->vol_id, lnum);
+ dbg_gen("erase LEB %d:%d", vol->vol_id, lnum);
err = ubi_eba_unmap_leb(ubi, vol, lnum);
if (err)
break;
if (req->alignment > ubi->leb_size)
goto bad;
- n = req->alignment % ubi->min_io_size;
+ n = req->alignment & (ubi->min_io_size - 1);
if (req->alignment != 1 && n)
goto bad;
goto bad;
}
+ n = strnlen(req->name, req->name_len + 1);
+ if (n != req->name_len)
+ goto bad;
+
return 0;
bad:
return 0;
}
+/**
+ * rename_volumes - rename UBI volumes.
+ * @ubi: UBI device description object
+ * @req: volumes re-name request
+ *
+ * This is a helper function for the volume re-name IOCTL which validates the
+ * the request, opens the volume and calls corresponding volumes management
+ * function. Returns zero in case of success and a negative error code in case
+ * of failure.
+ */
+static int rename_volumes(struct ubi_device *ubi,
+ struct ubi_rnvol_req *req)
+{
+ int i, n, err;
+ struct list_head rename_list;
+ struct ubi_rename_entry *re, *re1;
+
+ if (req->count < 0 || req->count > UBI_MAX_RNVOL)
+ return -EINVAL;
+
+ if (req->count == 0)
+ return 0;
+
+ /* Validate volume IDs and names in the request */
+ for (i = 0; i < req->count; i++) {
+ if (req->ents[i].vol_id < 0 ||
+ req->ents[i].vol_id >= ubi->vtbl_slots)
+ return -EINVAL;
+ if (req->ents[i].name_len < 0)
+ return -EINVAL;
+ if (req->ents[i].name_len > UBI_VOL_NAME_MAX)
+ return -ENAMETOOLONG;
+ req->ents[i].name[req->ents[i].name_len] = '\0';
+ n = strlen(req->ents[i].name);
+ if (n != req->ents[i].name_len)
+ err = -EINVAL;
+ }
+
+ /* Make sure volume IDs and names are unique */
+ for (i = 0; i < req->count - 1; i++) {
+ for (n = i + 1; n < req->count; n++) {
+ if (req->ents[i].vol_id == req->ents[n].vol_id) {
+ dbg_err("duplicated volume id %d",
+ req->ents[i].vol_id);
+ return -EINVAL;
+ }
+ if (!strcmp(req->ents[i].name, req->ents[n].name)) {
+ dbg_err("duplicated volume name \"%s\"",
+ req->ents[i].name);
+ return -EINVAL;
+ }
+ }
+ }
+
+ /* Create the re-name list */
+ INIT_LIST_HEAD(&rename_list);
+ for (i = 0; i < req->count; i++) {
+ int vol_id = req->ents[i].vol_id;
+ int name_len = req->ents[i].name_len;
+ const char *name = req->ents[i].name;
+
+ re = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL);
+ if (!re) {
+ err = -ENOMEM;
+ goto out_free;
+ }
+
+ re->desc = ubi_open_volume(ubi->ubi_num, vol_id, UBI_EXCLUSIVE);
+ if (IS_ERR(re->desc)) {
+ err = PTR_ERR(re->desc);
+ dbg_err("cannot open volume %d, error %d", vol_id, err);
+ kfree(re);
+ goto out_free;
+ }
+
+ /* Skip this re-naming if the name does not really change */
+ if (re->desc->vol->name_len == name_len &&
+ !memcmp(re->desc->vol->name, name, name_len)) {
+ ubi_close_volume(re->desc);
+ kfree(re);
+ continue;
+ }
+
+ re->new_name_len = name_len;
+ memcpy(re->new_name, name, name_len);
+ list_add_tail(&re->list, &rename_list);
+ dbg_msg("will rename volume %d from \"%s\" to \"%s\"",
+ vol_id, re->desc->vol->name, name);
+ }
+
+ if (list_empty(&rename_list))
+ return 0;
+
+ /* Find out the volumes which have to be removed */
+ list_for_each_entry(re, &rename_list, list) {
+ struct ubi_volume_desc *desc;
+ int no_remove_needed = 0;
+
+ /*
+ * Volume @re->vol_id is going to be re-named to
+ * @re->new_name, while its current name is @name. If a volume
+ * with name @re->new_name currently exists, it has to be
+ * removed, unless it is also re-named in the request (@req).
+ */
+ list_for_each_entry(re1, &rename_list, list) {
+ if (re->new_name_len == re1->desc->vol->name_len &&
+ !memcmp(re->new_name, re1->desc->vol->name,
+ re1->desc->vol->name_len)) {
+ no_remove_needed = 1;
+ break;
+ }
+ }
+
+ if (no_remove_needed)
+ continue;
+
+ /*
+ * It seems we need to remove volume with name @re->new_name,
+ * if it exists.
+ */
+ desc = ubi_open_volume_nm(ubi->ubi_num, re->new_name, UBI_EXCLUSIVE);
+ if (IS_ERR(desc)) {
+ err = PTR_ERR(desc);
+ if (err == -ENODEV)
+ /* Re-naming into a non-existing volume name */
+ continue;
+
+ /* The volume exists but busy, or an error occurred */
+ dbg_err("cannot open volume \"%s\", error %d",
+ re->new_name, err);
+ goto out_free;
+ }
+
+ re = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL);
+ if (!re) {
+ err = -ENOMEM;
+ ubi_close_volume(desc);
+ goto out_free;
+ }
+
+ re->remove = 1;
+ re->desc = desc;
+ list_add(&re->list, &rename_list);
+ dbg_msg("will remove volume %d, name \"%s\"",
+ re->desc->vol->vol_id, re->desc->vol->name);
+ }
+
+ mutex_lock(&ubi->volumes_mutex);
+ err = ubi_rename_volumes(ubi, &rename_list);
+ mutex_unlock(&ubi->volumes_mutex);
+
+out_free:
+ list_for_each_entry_safe(re, re1, &rename_list, list) {
+ ubi_close_volume(re->desc);
+ list_del(&re->list);
+ kfree(re);
+ }
+ return err;
+}
+
static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long arg)
{
{
struct ubi_mkvol_req req;
- dbg_msg("create volume");
+ dbg_gen("create volume");
err = copy_from_user(&req, argp, sizeof(struct ubi_mkvol_req));
if (err) {
err = -EFAULT;
break;
}
+ req.name[req.name_len] = '\0';
err = verify_mkvol_req(ubi, &req);
if (err)
break;
- req.name[req.name_len] = '\0';
-
mutex_lock(&ubi->volumes_mutex);
err = ubi_create_volume(ubi, &req);
mutex_unlock(&ubi->volumes_mutex);
{
int vol_id;
- dbg_msg("remove volume");
+ dbg_gen("remove volume");
err = get_user(vol_id, (__user int32_t *)argp);
if (err) {
err = -EFAULT;
}
mutex_lock(&ubi->volumes_mutex);
- err = ubi_remove_volume(desc);
+ err = ubi_remove_volume(desc, 0);
mutex_unlock(&ubi->volumes_mutex);
/*
uint64_t tmp;
struct ubi_rsvol_req req;
- dbg_msg("re-size volume");
+ dbg_gen("re-size volume");
err = copy_from_user(&req, argp, sizeof(struct ubi_rsvol_req));
if (err) {
err = -EFAULT;
break;
}
+ /* Re-name volumes command */
+ case UBI_IOCRNVOL:
+ {
+ struct ubi_rnvol_req *req;
+
+ dbg_msg("re-name volumes");
+ req = kmalloc(sizeof(struct ubi_rnvol_req), GFP_KERNEL);
+ if (!req) {
+ err = -ENOMEM;
+ break;
+ };
+
+ err = copy_from_user(req, argp, sizeof(struct ubi_rnvol_req));
+ if (err) {
+ err = -EFAULT;
+ kfree(req);
+ break;
+ }
+
+ mutex_lock(&ubi->mult_mutex);
+ err = rename_volumes(ubi, req);
+ mutex_unlock(&ubi->mult_mutex);
+ kfree(req);
+ break;
+ }
+
default:
err = -ENOTTY;
break;
struct ubi_attach_req req;
struct mtd_info *mtd;
- dbg_msg("attach MTD device");
+ dbg_gen("attach MTD device");
err = copy_from_user(&req, argp, sizeof(struct ubi_attach_req));
if (err) {
err = -EFAULT;
{
int ubi_num;
- dbg_msg("dettach MTD device");
+ dbg_gen("dettach MTD device");
err = get_user(ubi_num, (__user int32_t *)argp);
if (err) {
err = -EFAULT;
* changes.
*/
-#ifdef CONFIG_MTD_UBI_DEBUG_MSG
+#ifdef CONFIG_MTD_UBI_DEBUG
#include "ubi.h"
*/
void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr)
{
- dbg_msg("erase counter header dump:");
- dbg_msg("magic %#08x", be32_to_cpu(ec_hdr->magic));
- dbg_msg("version %d", (int)ec_hdr->version);
- dbg_msg("ec %llu", (long long)be64_to_cpu(ec_hdr->ec));
- dbg_msg("vid_hdr_offset %d", be32_to_cpu(ec_hdr->vid_hdr_offset));
- dbg_msg("data_offset %d", be32_to_cpu(ec_hdr->data_offset));
- dbg_msg("hdr_crc %#08x", be32_to_cpu(ec_hdr->hdr_crc));
- dbg_msg("erase counter header hexdump:");
+ printk(KERN_DEBUG "Erase counter header dump:\n");
+ printk(KERN_DEBUG "\tmagic %#08x\n",
+ be32_to_cpu(ec_hdr->magic));
+ printk(KERN_DEBUG "\tversion %d\n", (int)ec_hdr->version);
+ printk(KERN_DEBUG "\tec %llu\n",
+ (long long)be64_to_cpu(ec_hdr->ec));
+ printk(KERN_DEBUG "\tvid_hdr_offset %d\n",
+ be32_to_cpu(ec_hdr->vid_hdr_offset));
+ printk(KERN_DEBUG "\tdata_offset %d\n",
+ be32_to_cpu(ec_hdr->data_offset));
+ printk(KERN_DEBUG "\thdr_crc %#08x\n",
+ be32_to_cpu(ec_hdr->hdr_crc));
+ printk(KERN_DEBUG "erase counter header hexdump:\n");
print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
ec_hdr, UBI_EC_HDR_SIZE, 1);
}
*/
void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr)
{
- dbg_msg("volume identifier header dump:");
- dbg_msg("magic %08x", be32_to_cpu(vid_hdr->magic));
- dbg_msg("version %d", (int)vid_hdr->version);
- dbg_msg("vol_type %d", (int)vid_hdr->vol_type);
- dbg_msg("copy_flag %d", (int)vid_hdr->copy_flag);
- dbg_msg("compat %d", (int)vid_hdr->compat);
- dbg_msg("vol_id %d", be32_to_cpu(vid_hdr->vol_id));
- dbg_msg("lnum %d", be32_to_cpu(vid_hdr->lnum));
- dbg_msg("leb_ver %u", be32_to_cpu(vid_hdr->leb_ver));
- dbg_msg("data_size %d", be32_to_cpu(vid_hdr->data_size));
- dbg_msg("used_ebs %d", be32_to_cpu(vid_hdr->used_ebs));
- dbg_msg("data_pad %d", be32_to_cpu(vid_hdr->data_pad));
- dbg_msg("sqnum %llu",
+ printk(KERN_DEBUG "Volume identifier header dump:\n");
+ printk(KERN_DEBUG "\tmagic %08x\n", be32_to_cpu(vid_hdr->magic));
+ printk(KERN_DEBUG "\tversion %d\n", (int)vid_hdr->version);
+ printk(KERN_DEBUG "\tvol_type %d\n", (int)vid_hdr->vol_type);
+ printk(KERN_DEBUG "\tcopy_flag %d\n", (int)vid_hdr->copy_flag);
+ printk(KERN_DEBUG "\tcompat %d\n", (int)vid_hdr->compat);
+ printk(KERN_DEBUG "\tvol_id %d\n", be32_to_cpu(vid_hdr->vol_id));
+ printk(KERN_DEBUG "\tlnum %d\n", be32_to_cpu(vid_hdr->lnum));
+ printk(KERN_DEBUG "\tdata_size %d\n", be32_to_cpu(vid_hdr->data_size));
+ printk(KERN_DEBUG "\tused_ebs %d\n", be32_to_cpu(vid_hdr->used_ebs));
+ printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(vid_hdr->data_pad));
+ printk(KERN_DEBUG "\tsqnum %llu\n",
(unsigned long long)be64_to_cpu(vid_hdr->sqnum));
- dbg_msg("hdr_crc %08x", be32_to_cpu(vid_hdr->hdr_crc));
- dbg_msg("volume identifier header hexdump:");
+ printk(KERN_DEBUG "\thdr_crc %08x\n", be32_to_cpu(vid_hdr->hdr_crc));
+ printk(KERN_DEBUG "Volume identifier header hexdump:\n");
+ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
+ vid_hdr, UBI_VID_HDR_SIZE, 1);
}
/**
*/
void ubi_dbg_dump_vol_info(const struct ubi_volume *vol)
{
- dbg_msg("volume information dump:");
- dbg_msg("vol_id %d", vol->vol_id);
- dbg_msg("reserved_pebs %d", vol->reserved_pebs);
- dbg_msg("alignment %d", vol->alignment);
- dbg_msg("data_pad %d", vol->data_pad);
- dbg_msg("vol_type %d", vol->vol_type);
- dbg_msg("name_len %d", vol->name_len);
- dbg_msg("usable_leb_size %d", vol->usable_leb_size);
- dbg_msg("used_ebs %d", vol->used_ebs);
- dbg_msg("used_bytes %lld", vol->used_bytes);
- dbg_msg("last_eb_bytes %d", vol->last_eb_bytes);
- dbg_msg("corrupted %d", vol->corrupted);
- dbg_msg("upd_marker %d", vol->upd_marker);
+ printk(KERN_DEBUG "Volume information dump:\n");
+ printk(KERN_DEBUG "\tvol_id %d\n", vol->vol_id);
+ printk(KERN_DEBUG "\treserved_pebs %d\n", vol->reserved_pebs);
+ printk(KERN_DEBUG "\talignment %d\n", vol->alignment);
+ printk(KERN_DEBUG "\tdata_pad %d\n", vol->data_pad);
+ printk(KERN_DEBUG "\tvol_type %d\n", vol->vol_type);
+ printk(KERN_DEBUG "\tname_len %d\n", vol->name_len);
+ printk(KERN_DEBUG "\tusable_leb_size %d\n", vol->usable_leb_size);
+ printk(KERN_DEBUG "\tused_ebs %d\n", vol->used_ebs);
+ printk(KERN_DEBUG "\tused_bytes %lld\n", vol->used_bytes);
+ printk(KERN_DEBUG "\tlast_eb_bytes %d\n", vol->last_eb_bytes);
+ printk(KERN_DEBUG "\tcorrupted %d\n", vol->corrupted);
+ printk(KERN_DEBUG "\tupd_marker %d\n", vol->upd_marker);
if (vol->name_len <= UBI_VOL_NAME_MAX &&
strnlen(vol->name, vol->name_len + 1) == vol->name_len) {
- dbg_msg("name %s", vol->name);
+ printk(KERN_DEBUG "\tname %s\n", vol->name);
} else {
- dbg_msg("the 1st 5 characters of the name: %c%c%c%c%c",
- vol->name[0], vol->name[1], vol->name[2],
- vol->name[3], vol->name[4]);
+ printk(KERN_DEBUG "\t1st 5 characters of name: %c%c%c%c%c\n",
+ vol->name[0], vol->name[1], vol->name[2],
+ vol->name[3], vol->name[4]);
}
}
{
int name_len = be16_to_cpu(r->name_len);
- dbg_msg("volume table record %d dump:", idx);
- dbg_msg("reserved_pebs %d", be32_to_cpu(r->reserved_pebs));
- dbg_msg("alignment %d", be32_to_cpu(r->alignment));
- dbg_msg("data_pad %d", be32_to_cpu(r->data_pad));
- dbg_msg("vol_type %d", (int)r->vol_type);
- dbg_msg("upd_marker %d", (int)r->upd_marker);
- dbg_msg("name_len %d", name_len);
+ printk(KERN_DEBUG "Volume table record %d dump:\n", idx);
+ printk(KERN_DEBUG "\treserved_pebs %d\n",
+ be32_to_cpu(r->reserved_pebs));
+ printk(KERN_DEBUG "\talignment %d\n", be32_to_cpu(r->alignment));
+ printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(r->data_pad));
+ printk(KERN_DEBUG "\tvol_type %d\n", (int)r->vol_type);
+ printk(KERN_DEBUG "\tupd_marker %d\n", (int)r->upd_marker);
+ printk(KERN_DEBUG "\tname_len %d\n", name_len);
if (r->name[0] == '\0') {
- dbg_msg("name NULL");
+ printk(KERN_DEBUG "\tname NULL\n");
return;
}
if (name_len <= UBI_VOL_NAME_MAX &&
strnlen(&r->name[0], name_len + 1) == name_len) {
- dbg_msg("name %s", &r->name[0]);
+ printk(KERN_DEBUG "\tname %s\n", &r->name[0]);
} else {
- dbg_msg("1st 5 characters of the name: %c%c%c%c%c",
+ printk(KERN_DEBUG "\t1st 5 characters of name: %c%c%c%c%c\n",
r->name[0], r->name[1], r->name[2], r->name[3],
r->name[4]);
}
- dbg_msg("crc %#08x", be32_to_cpu(r->crc));
+ printk(KERN_DEBUG "\tcrc %#08x\n", be32_to_cpu(r->crc));
}
/**
*/
void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv)
{
- dbg_msg("volume scanning information dump:");
- dbg_msg("vol_id %d", sv->vol_id);
- dbg_msg("highest_lnum %d", sv->highest_lnum);
- dbg_msg("leb_count %d", sv->leb_count);
- dbg_msg("compat %d", sv->compat);
- dbg_msg("vol_type %d", sv->vol_type);
- dbg_msg("used_ebs %d", sv->used_ebs);
- dbg_msg("last_data_size %d", sv->last_data_size);
- dbg_msg("data_pad %d", sv->data_pad);
+ printk(KERN_DEBUG "Volume scanning information dump:\n");
+ printk(KERN_DEBUG "\tvol_id %d\n", sv->vol_id);
+ printk(KERN_DEBUG "\thighest_lnum %d\n", sv->highest_lnum);
+ printk(KERN_DEBUG "\tleb_count %d\n", sv->leb_count);
+ printk(KERN_DEBUG "\tcompat %d\n", sv->compat);
+ printk(KERN_DEBUG "\tvol_type %d\n", sv->vol_type);
+ printk(KERN_DEBUG "\tused_ebs %d\n", sv->used_ebs);
+ printk(KERN_DEBUG "\tlast_data_size %d\n", sv->last_data_size);
+ printk(KERN_DEBUG "\tdata_pad %d\n", sv->data_pad);
}
/**
*/
void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type)
{
- dbg_msg("eraseblock scanning information dump:");
- dbg_msg("ec %d", seb->ec);
- dbg_msg("pnum %d", seb->pnum);
+ printk(KERN_DEBUG "eraseblock scanning information dump:\n");
+ printk(KERN_DEBUG "\tec %d\n", seb->ec);
+ printk(KERN_DEBUG "\tpnum %d\n", seb->pnum);
if (type == 0) {
- dbg_msg("lnum %d", seb->lnum);
- dbg_msg("scrub %d", seb->scrub);
- dbg_msg("sqnum %llu", seb->sqnum);
- dbg_msg("leb_ver %u", seb->leb_ver);
+ printk(KERN_DEBUG "\tlnum %d\n", seb->lnum);
+ printk(KERN_DEBUG "\tscrub %d\n", seb->scrub);
+ printk(KERN_DEBUG "\tsqnum %llu\n", seb->sqnum);
}
}
{
char nm[17];
- dbg_msg("volume creation request dump:");
- dbg_msg("vol_id %d", req->vol_id);
- dbg_msg("alignment %d", req->alignment);
- dbg_msg("bytes %lld", (long long)req->bytes);
- dbg_msg("vol_type %d", req->vol_type);
- dbg_msg("name_len %d", req->name_len);
+ printk(KERN_DEBUG "Volume creation request dump:\n");
+ printk(KERN_DEBUG "\tvol_id %d\n", req->vol_id);
+ printk(KERN_DEBUG "\talignment %d\n", req->alignment);
+ printk(KERN_DEBUG "\tbytes %lld\n", (long long)req->bytes);
+ printk(KERN_DEBUG "\tvol_type %d\n", req->vol_type);
+ printk(KERN_DEBUG "\tname_len %d\n", req->name_len);
memcpy(nm, req->name, 16);
nm[16] = 0;
- dbg_msg("the 1st 16 characters of the name: %s", nm);
+ printk(KERN_DEBUG "\t1st 16 characters of name: %s\n", nm);
}
-#endif /* CONFIG_MTD_UBI_DEBUG_MSG */
+#endif /* CONFIG_MTD_UBI_DEBUG */
#ifdef CONFIG_MTD_UBI_DEBUG
#include <linux/random.h>
-#define ubi_assert(expr) BUG_ON(!(expr))
#define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__)
-#else
-#define ubi_assert(expr) ({})
-#define dbg_err(fmt, ...) ({})
-#endif
-#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT
-#define DBG_DISABLE_BGT 1
-#else
-#define DBG_DISABLE_BGT 0
-#endif
+#define ubi_assert(expr) do { \
+ if (unlikely(!(expr))) { \
+ printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \
+ __func__, __LINE__, current->pid); \
+ ubi_dbg_dump_stack(); \
+ } \
+} while (0)
-#ifdef CONFIG_MTD_UBI_DEBUG_MSG
-/* Generic debugging message */
#define dbg_msg(fmt, ...) \
printk(KERN_DEBUG "UBI DBG (pid %d): %s: " fmt "\n", \
current->pid, __func__, ##__VA_ARGS__)
void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type);
void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req);
+#ifdef CONFIG_MTD_UBI_DEBUG_MSG
+/* General debugging messages */
+#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
#else
-
-#define dbg_msg(fmt, ...) ({})
-#define ubi_dbg_dump_stack() ({})
-#define ubi_dbg_dump_ec_hdr(ec_hdr) ({})
-#define ubi_dbg_dump_vid_hdr(vid_hdr) ({})
-#define ubi_dbg_dump_vol_info(vol) ({})
-#define ubi_dbg_dump_vtbl_record(r, idx) ({})
-#define ubi_dbg_dump_sv(sv) ({})
-#define ubi_dbg_dump_seb(seb, type) ({})
-#define ubi_dbg_dump_mkvol_req(req) ({})
-
-#endif /* CONFIG_MTD_UBI_DEBUG_MSG */
+#define dbg_gen(fmt, ...) ({})
+#endif
#ifdef CONFIG_MTD_UBI_DEBUG_MSG_EBA
-/* Messages from the eraseblock association unit */
+/* Messages from the eraseblock association sub-system */
#define dbg_eba(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
#else
#define dbg_eba(fmt, ...) ({})
#endif
#ifdef CONFIG_MTD_UBI_DEBUG_MSG_WL
-/* Messages from the wear-leveling unit */
+/* Messages from the wear-leveling sub-system */
#define dbg_wl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
#else
#define dbg_wl(fmt, ...) ({})
#endif
#ifdef CONFIG_MTD_UBI_DEBUG_MSG_IO
-/* Messages from the input/output unit */
+/* Messages from the input/output sub-system */
#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
#else
#define dbg_io(fmt, ...) ({})
#define UBI_IO_DEBUG 0
#endif
+#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT
+#define DBG_DISABLE_BGT 1
+#else
+#define DBG_DISABLE_BGT 0
+#endif
+
#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_BITFLIPS
/**
* ubi_dbg_is_bitflip - if it is time to emulate a bit-flip.
#define ubi_dbg_is_erase_failure() 0
#endif
+#else
+
+#define ubi_assert(expr) ({})
+#define dbg_err(fmt, ...) ({})
+#define dbg_msg(fmt, ...) ({})
+#define dbg_gen(fmt, ...) ({})
+#define dbg_eba(fmt, ...) ({})
+#define dbg_wl(fmt, ...) ({})
+#define dbg_io(fmt, ...) ({})
+#define dbg_bld(fmt, ...) ({})
+#define ubi_dbg_dump_stack() ({})
+#define ubi_dbg_dump_ec_hdr(ec_hdr) ({})
+#define ubi_dbg_dump_vid_hdr(vid_hdr) ({})
+#define ubi_dbg_dump_vol_info(vol) ({})
+#define ubi_dbg_dump_vtbl_record(r, idx) ({})
+#define ubi_dbg_dump_sv(sv) ({})
+#define ubi_dbg_dump_seb(seb, type) ({})
+#define ubi_dbg_dump_mkvol_req(req) ({})
+
+#define UBI_IO_DEBUG 0
+#define DBG_DISABLE_BGT 0
+#define ubi_dbg_is_bitflip() 0
+#define ubi_dbg_is_write_failure() 0
+#define ubi_dbg_is_erase_failure() 0
+
+#endif /* !CONFIG_MTD_UBI_DEBUG */
#endif /* !__UBI_DEBUG_H__ */
*/
/*
- * The UBI Eraseblock Association (EBA) unit.
+ * The UBI Eraseblock Association (EBA) sub-system.
*
- * This unit is responsible for I/O to/from logical eraseblock.
+ * This sub-system is responsible for I/O to/from logical eraseblock.
*
* Although in this implementation the EBA table is fully kept and managed in
* RAM, which assumes poor scalability, it might be (partially) maintained on
* flash in future implementations.
*
- * The EBA unit implements per-logical eraseblock locking. Before accessing a
- * logical eraseblock it is locked for reading or writing. The per-logical
- * eraseblock locking is implemented by means of the lock tree. The lock tree
- * is an RB-tree which refers all the currently locked logical eraseblocks. The
- * lock tree elements are &struct ubi_ltree_entry objects. They are indexed by
- * (@vol_id, @lnum) pairs.
+ * The EBA sub-system implements per-logical eraseblock locking. Before
+ * accessing a logical eraseblock it is locked for reading or writing. The
+ * per-logical eraseblock locking is implemented by means of the lock tree. The
+ * lock tree is an RB-tree which refers all the currently locked logical
+ * eraseblocks. The lock tree elements are &struct ubi_ltree_entry objects.
+ * They are indexed by (@vol_id, @lnum) pairs.
*
* EBA also maintains the global sequence counter which is incremented each
* time a logical eraseblock is mapped to a physical eraseblock and it is
le->users += 1;
spin_unlock(&ubi->ltree_lock);
- if (le_free)
- kfree(le_free);
-
+ kfree(le_free);
return le;
}
*/
static void leb_read_unlock(struct ubi_device *ubi, int vol_id, int lnum)
{
- int free = 0;
struct ubi_ltree_entry *le;
spin_lock(&ubi->ltree_lock);
le = ltree_lookup(ubi, vol_id, lnum);
le->users -= 1;
ubi_assert(le->users >= 0);
+ up_read(&le->mutex);
if (le->users == 0) {
rb_erase(&le->rb, &ubi->ltree);
- free = 1;
+ kfree(le);
}
spin_unlock(&ubi->ltree_lock);
-
- up_read(&le->mutex);
- if (free)
- kfree(le);
}
/**
*/
static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum)
{
- int free;
struct ubi_ltree_entry *le;
le = ltree_add_entry(ubi, vol_id, lnum);
ubi_assert(le->users >= 0);
if (le->users == 0) {
rb_erase(&le->rb, &ubi->ltree);
- free = 1;
- } else
- free = 0;
- spin_unlock(&ubi->ltree_lock);
- if (free)
kfree(le);
+ }
+ spin_unlock(&ubi->ltree_lock);
return 1;
}
*/
static void leb_write_unlock(struct ubi_device *ubi, int vol_id, int lnum)
{
- int free;
struct ubi_ltree_entry *le;
spin_lock(&ubi->ltree_lock);
le = ltree_lookup(ubi, vol_id, lnum);
le->users -= 1;
ubi_assert(le->users >= 0);
+ up_write(&le->mutex);
if (le->users == 0) {
rb_erase(&le->rb, &ubi->ltree);
- free = 1;
- } else
- free = 0;
- spin_unlock(&ubi->ltree_lock);
-
- up_write(&le->mutex);
- if (free)
kfree(le);
+ }
+ spin_unlock(&ubi->ltree_lock);
}
/**
struct ubi_vid_hdr *vid_hdr;
vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
- if (!vid_hdr) {
+ if (!vid_hdr)
return -ENOMEM;
- }
mutex_lock(&ubi->buf_mutex);
/* If this is the last LEB @len may be unaligned */
len = ALIGN(data_size, ubi->min_io_size);
else
- ubi_assert(len % ubi->min_io_size == 0);
+ ubi_assert(!(len & (ubi->min_io_size - 1)));
vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
if (!vid_hdr)
}
if (vol->eba_tbl[lnum] >= 0) {
- err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 1);
+ err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 0);
if (err)
goto out_leb_unlock;
}
}
/**
- * ubi_eba_init_scan - initialize the EBA unit using scanning information.
+ * ubi_eba_init_scan - initialize the EBA sub-system using scanning information.
* @ubi: UBI device description object
* @si: scanning information
*
struct ubi_scan_leb *seb;
struct rb_node *rb;
- dbg_eba("initialize EBA unit");
+ dbg_eba("initialize EBA sub-system");
spin_lock_init(&ubi->ltree_lock);
mutex_init(&ubi->alc_mutex);
ubi->rsvd_pebs += ubi->beb_rsvd_pebs;
}
- dbg_eba("EBA unit is initialized");
+ dbg_eba("EBA sub-system is initialized");
return 0;
out_free:
}
return err;
}
-
-/**
- * ubi_eba_close - close EBA unit.
- * @ubi: UBI device description object
- */
-void ubi_eba_close(const struct ubi_device *ubi)
-{
- int i, num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT;
-
- dbg_eba("close EBA unit");
-
- for (i = 0; i < num_volumes; i++) {
- if (!ubi->volumes[i])
- continue;
- kfree(ubi->volumes[i]->eba_tbl);
- }
-}
struct ubi_device *ubi;
uint64_t tmp = from;
- dbg_msg("read %zd bytes from offset %lld", len, from);
+ dbg_gen("read %zd bytes from offset %lld", len, from);
if (len < 0 || from < 0 || from + len > mtd->size)
return -EINVAL;
struct ubi_device *ubi;
uint64_t tmp = to;
- dbg_msg("write %zd bytes to offset %lld", len, to);
+ dbg_gen("write %zd bytes to offset %lld", len, to);
if (len < 0 || to < 0 || len + to > mtd->size)
return -EINVAL;
struct ubi_volume *vol;
struct ubi_device *ubi;
- dbg_msg("erase %u bytes at offset %u", instr->len, instr->addr);
+ dbg_gen("erase %u bytes at offset %u", instr->len, instr->addr);
if (instr->addr < 0 || instr->addr > mtd->size - mtd->erasesize)
return -EINVAL;
if (err)
goto out_err;
- instr->state = MTD_ERASE_DONE;
- mtd_erase_callback(instr);
+ instr->state = MTD_ERASE_DONE;
+ mtd_erase_callback(instr);
return 0;
out_err:
mtd->size = vol->used_bytes;
if (add_mtd_device(mtd)) {
- ubi_err("cannot not add MTD device\n");
+ ubi_err("cannot not add MTD device");
kfree(mtd->name);
return -ENFILE;
}
- dbg_msg("added mtd%d (\"%s\"), size %u, EB size %u",
+ dbg_gen("added mtd%d (\"%s\"), size %u, EB size %u",
mtd->index, mtd->name, mtd->size, mtd->erasesize);
return 0;
}
int err;
struct mtd_info *mtd = &vol->gluebi_mtd;
- dbg_msg("remove mtd%d", mtd->index);
+ dbg_gen("remove mtd%d", mtd->index);
err = del_mtd_device(mtd);
if (err)
return err;
*/
/*
- * UBI input/output unit.
+ * UBI input/output sub-system.
*
- * This unit provides a uniform way to work with all kinds of the underlying
- * MTD devices. It also implements handy functions for reading and writing UBI
- * headers.
+ * This sub-system provides a uniform way to work with all kinds of the
+ * underlying MTD devices. It also implements handy functions for reading and
+ * writing UBI headers.
*
* We are trying to have a paranoid mindset and not to trust to what we read
- * from the flash media in order to be more secure and robust. So this unit
- * validates every single header it reads from the flash media.
+ * from the flash media in order to be more secure and robust. So this
+ * sub-system validates every single header it reads from the flash media.
*
* Some words about how the eraseblock headers are stored.
*
* 512-byte chunks, we have to allocate one more buffer and copy our VID header
* to offset 448 of this buffer.
*
- * The I/O unit does the following trick in order to avoid this extra copy.
- * It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID header
- * and returns a pointer to offset @ubi->vid_hdr_shift of this buffer. When the
- * VID header is being written out, it shifts the VID header pointer back and
- * writes the whole sub-page.
+ * The I/O sub-system does the following trick in order to avoid this extra
+ * copy. It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID
+ * header and returns a pointer to offset @ubi->vid_hdr_shift of this buffer.
+ * When the VID header is being written out, it shifts the VID header pointer
+ * back and writes the whole sub-page.
*/
#include <linux/crc32.h>
/*
* -EUCLEAN is reported if there was a bit-flip which
* was corrected, so this is harmless.
+ *
+ * We do not report about it here unless debugging is
+ * enabled. A corresponding message will be printed
+ * later, when it is has been scrubbed.
*/
- ubi_msg("fixable bit-flip detected at PEB %d", pnum);
+ dbg_msg("fixable bit-flip detected at PEB %d", pnum);
ubi_assert(len == read);
return UBI_IO_BITFLIPS;
}
if (read != len && retries++ < UBI_IO_RETRIES) {
- dbg_io("error %d while reading %d bytes from PEB %d:%d, "
- "read only %zd bytes, retry",
+ dbg_io("error %d while reading %d bytes from PEB %d:%d,"
+ " read only %zd bytes, retry",
err, len, pnum, offset, read);
yield();
goto retry;
ubi_assert(len == read);
if (ubi_dbg_is_bitflip()) {
- dbg_msg("bit-flip (emulated)");
+ dbg_gen("bit-flip (emulated)");
err = UBI_IO_BITFLIPS;
}
}
{
int err, i, patt_count;
+ ubi_msg("run torture test for PEB %d", pnum);
patt_count = ARRAY_SIZE(patterns);
ubi_assert(patt_count > 0);
}
err = patt_count;
+ ubi_msg("PEB %d passed torture test, do not mark it a bad", pnum);
out:
mutex_unlock(&ubi->buf_mutex);
if (hdr_crc != crc) {
if (verbose) {
- ubi_warn("bad EC header CRC at PEB %d, calculated %#08x,"
- " read %#08x", pnum, crc, hdr_crc);
+ ubi_warn("bad EC header CRC at PEB %d, calculated "
+ "%#08x, read %#08x", pnum, crc, hdr_crc);
ubi_dbg_dump_ec_hdr(ec_hdr);
}
return UBI_IO_BAD_EC_HDR;
}
/**
- * paranoid_check_peb_ec_hdr - check that the erase counter header of a
- * physical eraseblock is in-place and is all right.
+ * paranoid_check_peb_ec_hdr - check erase counter header.
* @ubi: UBI device description object
* @pnum: the physical eraseblock number to check
*
}
/**
- * paranoid_check_peb_vid_hdr - check that the volume identifier header of a
- * physical eraseblock is in-place and is all right.
+ * paranoid_check_peb_vid_hdr - check volume identifier header.
* @ubi: UBI device description object
* @pnum: the physical eraseblock number to check
*
fail:
ubi_err("paranoid check failed for PEB %d", pnum);
- dbg_msg("hex dump of the %d-%d region", offset, offset + len);
+ ubi_msg("hex dump of the %d-%d region", offset, offset + len);
print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
ubi->dbg_peb_buf, len, 1);
err = 1;
struct ubi_device *ubi;
struct ubi_volume *vol;
- dbg_msg("open device %d volume %d, mode %d", ubi_num, vol_id, mode);
+ dbg_gen("open device %d volume %d, mode %d", ubi_num, vol_id, mode);
if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES)
return ERR_PTR(-EINVAL);
struct ubi_device *ubi;
struct ubi_volume_desc *ret;
- dbg_msg("open volume %s, mode %d", name, mode);
+ dbg_gen("open volume %s, mode %d", name, mode);
if (!name)
return ERR_PTR(-EINVAL);
struct ubi_volume *vol = desc->vol;
struct ubi_device *ubi = vol->ubi;
- dbg_msg("close volume %d, mode %d", vol->vol_id, desc->mode);
+ dbg_gen("close volume %d, mode %d", vol->vol_id, desc->mode);
spin_lock(&ubi->volumes_lock);
switch (desc->mode) {
struct ubi_device *ubi = vol->ubi;
int err, vol_id = vol->vol_id;
- dbg_msg("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset);
+ dbg_gen("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset);
if (vol_id < 0 || vol_id >= ubi->vtbl_slots || lnum < 0 ||
lnum >= vol->used_ebs || offset < 0 || len < 0 ||
struct ubi_device *ubi = vol->ubi;
int vol_id = vol->vol_id;
- dbg_msg("write %d bytes to LEB %d:%d:%d", len, vol_id, lnum, offset);
+ dbg_gen("write %d bytes to LEB %d:%d:%d", len, vol_id, lnum, offset);
if (vol_id < 0 || vol_id >= ubi->vtbl_slots)
return -EINVAL;
return -EROFS;
if (lnum < 0 || lnum >= vol->reserved_pebs || offset < 0 || len < 0 ||
- offset + len > vol->usable_leb_size || offset % ubi->min_io_size ||
- len % ubi->min_io_size)
+ offset + len > vol->usable_leb_size ||
+ offset & (ubi->min_io_size - 1) || len & (ubi->min_io_size - 1))
return -EINVAL;
if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM &&
struct ubi_device *ubi = vol->ubi;
int vol_id = vol->vol_id;
- dbg_msg("atomically write %d bytes to LEB %d:%d", len, vol_id, lnum);
+ dbg_gen("atomically write %d bytes to LEB %d:%d", len, vol_id, lnum);
if (vol_id < 0 || vol_id >= ubi->vtbl_slots)
return -EINVAL;
return -EROFS;
if (lnum < 0 || lnum >= vol->reserved_pebs || len < 0 ||
- len > vol->usable_leb_size || len % ubi->min_io_size)
+ len > vol->usable_leb_size || len & (ubi->min_io_size - 1))
return -EINVAL;
if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM &&
struct ubi_device *ubi = vol->ubi;
int err;
- dbg_msg("erase LEB %d:%d", vol->vol_id, lnum);
+ dbg_gen("erase LEB %d:%d", vol->vol_id, lnum);
if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
return -EROFS;
struct ubi_volume *vol = desc->vol;
struct ubi_device *ubi = vol->ubi;
- dbg_msg("unmap LEB %d:%d", vol->vol_id, lnum);
+ dbg_gen("unmap LEB %d:%d", vol->vol_id, lnum);
if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
return -EROFS;
struct ubi_volume *vol = desc->vol;
struct ubi_device *ubi = vol->ubi;
- dbg_msg("unmap LEB %d:%d", vol->vol_id, lnum);
+ dbg_gen("unmap LEB %d:%d", vol->vol_id, lnum);
if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
return -EROFS;
{
struct ubi_volume *vol = desc->vol;
- dbg_msg("test LEB %d:%d", vol->vol_id, lnum);
+ dbg_gen("test LEB %d:%d", vol->vol_id, lnum);
if (lnum < 0 || lnum >= vol->reserved_pebs)
return -EINVAL;
return vol->eba_tbl[lnum] >= 0;
}
EXPORT_SYMBOL_GPL(ubi_is_mapped);
+
+/**
+ * ubi_sync - synchronize UBI device buffers.
+ * @ubi_num: UBI device to synchronize
+ *
+ * The underlying MTD device may cache data in hardware or in software. This
+ * function ensures the caches are flushed. Returns zero in case of success and
+ * a negative error code in case of failure.
+ */
+int ubi_sync(int ubi_num)
+{
+ struct ubi_device *ubi;
+
+ ubi = ubi_get_device(ubi_num);
+ if (!ubi)
+ return -ENODEV;
+
+ if (ubi->mtd->sync)
+ ubi->mtd->sync(ubi->mtd);
+
+ ubi_put_device(ubi);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ubi_sync);
{
int i;
- ubi_assert(length % ubi->min_io_size == 0);
+ ubi_assert(!(length & (ubi->min_io_size - 1)));
for (i = length - 1; i >= 0; i--)
if (((const uint8_t *)buf)[i] != 0xFF)
*/
/*
- * UBI scanning unit.
+ * UBI scanning sub-system.
*
- * This unit is responsible for scanning the flash media, checking UBI
+ * This sub-system is responsible for scanning the flash media, checking UBI
* headers and providing complete information about the UBI flash image.
*
* The scanning information is represented by a &struct ubi_scan_info' object.
}
/**
- * validate_vid_hdr - check that volume identifier header is correct and
- * consistent.
+ * validate_vid_hdr - check volume identifier header.
* @vid_hdr: the volume identifier header to check
* @sv: information about the volume this logical eraseblock belongs to
* @pnum: physical eraseblock number the VID header came from
* non-zero if an inconsistency was found and zero if not.
*
* Note, UBI does sanity check of everything it reads from the flash media.
- * Most of the checks are done in the I/O unit. Here we check that the
+ * Most of the checks are done in the I/O sub-system. Here we check that the
* information in the VID header is consistent to the information in other VID
* headers of the same volume.
*/
struct ubi_vid_hdr *vh = NULL;
unsigned long long sqnum2 = be64_to_cpu(vid_hdr->sqnum);
- if (seb->sqnum == 0 && sqnum2 == 0) {
- long long abs, v1 = seb->leb_ver, v2 = be32_to_cpu(vid_hdr->leb_ver);
-
+ if (sqnum2 == seb->sqnum) {
/*
- * UBI constantly increases the logical eraseblock version
- * number and it can overflow. Thus, we have to bear in mind
- * that versions that are close to %0xFFFFFFFF are less then
- * versions that are close to %0.
- *
- * The UBI WL unit guarantees that the number of pending tasks
- * is not greater then %0x7FFFFFFF. So, if the difference
- * between any two versions is greater or equivalent to
- * %0x7FFFFFFF, there was an overflow and the logical
- * eraseblock with lower version is actually newer then the one
- * with higher version.
- *
- * FIXME: but this is anyway obsolete and will be removed at
- * some point.
+ * This must be a really ancient UBI image which has been
+ * created before sequence numbers support has been added. At
+ * that times we used 32-bit LEB versions stored in logical
+ * eraseblocks. That was before UBI got into mainline. We do not
+ * support these images anymore. Well, those images will work
+ * still work, but only if no unclean reboots happened.
*/
- dbg_bld("using old crappy leb_ver stuff");
-
- if (v1 == v2) {
- ubi_err("PEB %d and PEB %d have the same version %lld",
- seb->pnum, pnum, v1);
- return -EINVAL;
- }
-
- abs = v1 - v2;
- if (abs < 0)
- abs = -abs;
+ ubi_err("unsupported on-flash UBI format\n");
+ return -EINVAL;
+ }
- if (abs < 0x7FFFFFFF)
- /* Non-overflow situation */
- second_is_newer = (v2 > v1);
- else
- second_is_newer = (v2 < v1);
- } else
- /* Obviously the LEB with lower sequence counter is older */
- second_is_newer = sqnum2 > seb->sqnum;
+ /* Obviously the LEB with lower sequence counter is older */
+ second_is_newer = !!(sqnum2 > seb->sqnum);
/*
* Now we know which copy is newer. If the copy flag of the PEB with
* check data CRC. For the second PEB we already have the VID header,
* for the first one - we'll need to re-read it from flash.
*
- * FIXME: this may be optimized so that we wouldn't read twice.
+ * Note: this may be optimized so that we wouldn't read twice.
*/
if (second_is_newer) {
}
/**
- * ubi_scan_add_used - add information about a physical eraseblock to the
- * scanning information.
+ * ubi_scan_add_used - add physical eraseblock to the scanning information.
* @ubi: UBI device description object
* @si: scanning information
* @pnum: the physical eraseblock number
int bitflips)
{
int err, vol_id, lnum;
- uint32_t leb_ver;
unsigned long long sqnum;
struct ubi_scan_volume *sv;
struct ubi_scan_leb *seb;
vol_id = be32_to_cpu(vid_hdr->vol_id);
lnum = be32_to_cpu(vid_hdr->lnum);
sqnum = be64_to_cpu(vid_hdr->sqnum);
- leb_ver = be32_to_cpu(vid_hdr->leb_ver);
- dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, ver %u, bitflips %d",
- pnum, vol_id, lnum, ec, sqnum, leb_ver, bitflips);
+ dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, bitflips %d",
+ pnum, vol_id, lnum, ec, sqnum, bitflips);
sv = add_volume(si, vol_id, pnum, vid_hdr);
if (IS_ERR(sv) < 0)
*/
dbg_bld("this LEB already exists: PEB %d, sqnum %llu, "
- "LEB ver %u, EC %d", seb->pnum, seb->sqnum,
- seb->leb_ver, seb->ec);
-
- /*
- * Make sure that the logical eraseblocks have different
- * versions. Otherwise the image is bad.
- */
- if (seb->leb_ver == leb_ver && leb_ver != 0) {
- ubi_err("two LEBs with same version %u", leb_ver);
- ubi_dbg_dump_seb(seb, 0);
- ubi_dbg_dump_vid_hdr(vid_hdr);
- return -EINVAL;
- }
+ "EC %d", seb->pnum, seb->sqnum, seb->ec);
/*
* Make sure that the logical eraseblocks have different
* sequence numbers. Otherwise the image is bad.
*
- * FIXME: remove 'sqnum != 0' check when leb_ver is removed.
+ * However, if the sequence number is zero, we assume it must
+ * be an ancient UBI image from the era when UBI did not have
+ * sequence numbers. We still can attach these images, unless
+ * there is a need to distinguish between old and new
+ * eraseblocks, in which case we'll refuse the image in
+ * 'compare_lebs()'. In other words, we attach old clean
+ * images, but refuse attaching old images with duplicated
+ * logical eraseblocks because there was an unclean reboot.
*/
if (seb->sqnum == sqnum && sqnum != 0) {
ubi_err("two LEBs with same sequence number %llu",
seb->pnum = pnum;
seb->scrub = ((cmp_res & 2) || bitflips);
seb->sqnum = sqnum;
- seb->leb_ver = leb_ver;
if (sv->highest_lnum == lnum)
sv->last_data_size =
seb->lnum = lnum;
seb->sqnum = sqnum;
seb->scrub = bitflips;
- seb->leb_ver = leb_ver;
if (sv->highest_lnum <= lnum) {
sv->highest_lnum = lnum;
}
/**
- * ubi_scan_find_sv - find information about a particular volume in the
- * scanning information.
+ * ubi_scan_find_sv - find volume in the scanning information.
* @si: scanning information
* @vol_id: the requested volume ID
*
}
/**
- * ubi_scan_find_seb - find information about a particular logical
- * eraseblock in the volume scanning information.
+ * ubi_scan_find_seb - find LEB in the volume scanning information.
* @sv: a pointer to the volume scanning information
* @lnum: the requested logical eraseblock
*
*
* This function erases physical eraseblock 'pnum', and writes the erase
* counter header to it. This function should only be used on UBI device
- * initialization stages, when the EBA unit had not been yet initialized. This
- * function returns zero in case of success and a negative error code in case
- * of failure.
+ * initialization stages, when the EBA sub-system had not been yet initialized.
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
*/
int ubi_scan_erase_peb(struct ubi_device *ubi, const struct ubi_scan_info *si,
int pnum, int ec)
* @si: scanning information
*
* This function returns a free physical eraseblock. It is supposed to be
- * called on the UBI initialization stages when the wear-leveling unit is not
- * initialized yet. This function picks a physical eraseblocks from one of the
- * lists, writes the EC header if it is needed, and removes it from the list.
+ * called on the UBI initialization stages when the wear-leveling sub-system is
+ * not initialized yet. This function picks a physical eraseblocks from one of
+ * the lists, writes the EC header if it is needed, and removes it from the
+ * list.
*
* This function returns scanning physical eraseblock information in case of
* success and an error code in case of failure.
}
/**
- * process_eb - read UBI headers, check them and add corresponding data
- * to the scanning information.
+ * process_eb - read, check UBI headers, and add them to scanning information.
* @ubi: UBI device description object
* @si: scanning information
* @pnum: the physical eraseblock number
* This function returns a zero if the physical eraseblock was successfully
* handled and a negative error code in case of failure.
*/
-static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum)
+static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si,
+ int pnum)
{
long long uninitialized_var(ec);
int err, bitflips = 0, vol_id, ec_corr = 0;
return err;
else if (err) {
/*
- * FIXME: this is actually duty of the I/O unit to initialize
- * this, but MTD does not provide enough information.
+ * FIXME: this is actually duty of the I/O sub-system to
+ * initialize this, but MTD does not provide enough
+ * information.
*/
si->bad_peb_count += 1;
return 0;
for (pnum = 0; pnum < ubi->peb_count; pnum++) {
cond_resched();
- dbg_msg("process PEB %d", pnum);
+ dbg_gen("process PEB %d", pnum);
err = process_eb(ubi, si, pnum);
if (err < 0)
goto out_vidh;
#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
/**
- * paranoid_check_si - check if the scanning information is correct and
- * consistent.
+ * paranoid_check_si - check the scanning information.
* @ubi: UBI device description object
* @si: scanning information
*
ubi_err("bad data_pad %d", sv->data_pad);
goto bad_vid_hdr;
}
-
- if (seb->leb_ver != be32_to_cpu(vidh->leb_ver)) {
- ubi_err("bad leb_ver %u", seb->leb_ver);
- goto bad_vid_hdr;
- }
}
if (!last_seb)
if (err < 0) {
kfree(buf);
return err;
- }
- else if (err)
+ } else if (err)
buf[pnum] = 1;
}
* @u: unions RB-tree or @list links
* @u.rb: link in the per-volume RB-tree of &struct ubi_scan_leb objects
* @u.list: link in one of the eraseblock lists
- * @leb_ver: logical eraseblock version (obsolete)
*
* One object of this type is allocated for each physical eraseblock during
* scanning.
struct rb_node rb;
struct list_head list;
} u;
- uint32_t leb_ver;
};
/**
* @leb_count: number of logical eraseblocks in this volume
* @vol_type: volume type
* @used_ebs: number of used logical eraseblocks in this volume (only for
- * static volumes)
+ * static volumes)
* @last_data_size: amount of data in the last logical eraseblock of this
- * volume (always equivalent to the usable logical eraseblock size in case of
- * dynamic volumes)
+ * volume (always equivalent to the usable logical eraseblock
+ * size in case of dynamic volumes)
* @data_pad: how many bytes at the end of logical eraseblocks of this volume
- * are not used (due to volume alignment)
+ * are not used (due to volume alignment)
* @compat: compatibility flags of this volume
* @rb: link in the volume RB-tree
* @root: root of the RB-tree containing all the eraseblock belonging to this
- * volume (&struct ubi_scan_leb objects)
+ * volume (&struct ubi_scan_leb objects)
*
* One object of this type is allocated for each volume during scanning.
*/
* @free: list of free physical eraseblocks
* @erase: list of physical eraseblocks which have to be erased
* @alien: list of physical eraseblocks which should not be used by UBI (e.g.,
+ * those belonging to "preserve"-compatible internal volumes)
* @bad_peb_count: count of bad physical eraseblocks
- * those belonging to "preserve"-compatible internal volumes)
* @vols_found: number of volumes found during scanning
* @highest_vol_id: highest volume ID
* @alien_peb_count: count of physical eraseblocks in the @alien list
* @ec_count: a temporary variable used when calculating @mean_ec
*
* This data structure contains the result of scanning and may be used by other
- * UBI units to build final UBI data structures, further error-recovery and so
- * on.
+ * UBI sub-systems to build final UBI data structures, further error-recovery
+ * and so on.
*/
struct ubi_scan_info {
struct rb_root volumes;
struct ubi_vid_hdr;
/*
- * ubi_scan_move_to_list - move a physical eraseblock from the volume tree to a
- * list.
+ * ubi_scan_move_to_list - move a PEB from the volume tree to a list.
*
* @sv: volume scanning information
* @seb: scanning eraseblock infprmation
* Compatibility constants used by internal volumes.
*
* @UBI_COMPAT_DELETE: delete this internal volume before anything is written
- * to the flash
+ * to the flash
* @UBI_COMPAT_RO: attach this device in read-only mode
* @UBI_COMPAT_PRESERVE: preserve this internal volume - do not touch its
- * physical eraseblocks, don't allow the wear-leveling unit to move them
+ * physical eraseblocks, don't allow the wear-leveling
+ * sub-system to move them
* @UBI_COMPAT_REJECT: reject this UBI image
*/
enum {
* struct ubi_ec_hdr - UBI erase counter header.
* @magic: erase counter header magic number (%UBI_EC_HDR_MAGIC)
* @version: version of UBI implementation which is supposed to accept this
- * UBI image
+ * UBI image
* @padding1: reserved for future, zeroes
* @ec: the erase counter
* @vid_hdr_offset: where the VID header starts
* struct ubi_vid_hdr - on-flash UBI volume identifier header.
* @magic: volume identifier header magic number (%UBI_VID_HDR_MAGIC)
* @version: UBI implementation version which is supposed to accept this UBI
- * image (%UBI_VERSION)
+ * image (%UBI_VERSION)
* @vol_type: volume type (%UBI_VID_DYNAMIC or %UBI_VID_STATIC)
* @copy_flag: if this logical eraseblock was copied from another physical
- * eraseblock (for wear-leveling reasons)
+ * eraseblock (for wear-leveling reasons)
* @compat: compatibility of this volume (%0, %UBI_COMPAT_DELETE,
- * %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT)
+ * %UBI_COMPAT_IGNORE, %UBI_COMPAT_PRESERVE, or %UBI_COMPAT_REJECT)
* @vol_id: ID of this volume
* @lnum: logical eraseblock number
- * @leb_ver: version of this logical eraseblock (IMPORTANT: obsolete, to be
- * removed, kept only for not breaking older UBI users)
+ * @padding1: reserved for future, zeroes
* @data_size: how many bytes of data this logical eraseblock contains
* @used_ebs: total number of used logical eraseblocks in this volume
* @data_pad: how many bytes at the end of this physical eraseblock are not
- * used
+ * used
* @data_crc: CRC checksum of the data stored in this logical eraseblock
- * @padding1: reserved for future, zeroes
- * @sqnum: sequence number
* @padding2: reserved for future, zeroes
+ * @sqnum: sequence number
+ * @padding3: reserved for future, zeroes
* @hdr_crc: volume identifier header CRC checksum
*
* The @sqnum is the value of the global sequence counter at the time when this
* checksum is correct, this physical eraseblock is selected (P1). Otherwise
* the older one (P) is selected.
*
- * Note, there is an obsolete @leb_ver field which was used instead of @sqnum
- * in the past. But it is not used anymore and we keep it in order to be able
- * to deal with old UBI images. It will be removed at some point.
- *
* There are 2 sorts of volumes in UBI: user volumes and internal volumes.
* Internal volumes are not seen from outside and are used for various internal
* UBI purposes. In this implementation there is only one internal volume - the
* The @data_crc field contains the CRC checksum of the contents of the logical
* eraseblock if this is a static volume. In case of dynamic volumes, it does
* not contain the CRC checksum as a rule. The only exception is when the
- * data of the physical eraseblock was moved by the wear-leveling unit, then
- * the wear-leveling unit calculates the data CRC and stores it in the
- * @data_crc field. And of course, the @copy_flag is %in this case.
+ * data of the physical eraseblock was moved by the wear-leveling sub-system,
+ * then the wear-leveling sub-system calculates the data CRC and stores it in
+ * the @data_crc field. And of course, the @copy_flag is %in this case.
*
* The @data_size field is used only for static volumes because UBI has to know
* how many bytes of data are stored in this eraseblock. For dynamic volumes,
__u8 compat;
__be32 vol_id;
__be32 lnum;
- __be32 leb_ver; /* obsolete, to be removed, don't use */
+ __u8 padding1[4];
__be32 data_size;
__be32 used_ebs;
__be32 data_pad;
__be32 data_crc;
- __u8 padding1[4];
+ __u8 padding2[4];
__be64 sqnum;
- __u8 padding2[12];
+ __u8 padding3[12];
__be32 hdr_crc;
} __attribute__ ((packed));
#define UBI_IO_RETRIES 3
/*
- * Error codes returned by the I/O unit.
+ * Error codes returned by the I/O sub-system.
*
* UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only
- * 0xFF bytes
+ * %0xFF bytes
* UBI_IO_PEB_FREE: the physical eraseblock is free, i.e. it contains only a
- * valid erase counter header, and the rest are %0xFF bytes
+ * valid erase counter header, and the rest are %0xFF bytes
* UBI_IO_BAD_EC_HDR: the erase counter header is corrupted (bad magic or CRC)
* UBI_IO_BAD_VID_HDR: the volume identifier header is corrupted (bad magic or
- * CRC)
+ * CRC)
* UBI_IO_BITFLIPS: bit-flips were detected and corrected
*/
enum {
* @ec: erase counter
* @pnum: physical eraseblock number
*
- * This data structure is used in the WL unit. Each physical eraseblock has a
- * corresponding &struct wl_entry object which may be kept in different
- * RB-trees. See WL unit for details.
+ * This data structure is used in the WL sub-system. Each physical eraseblock
+ * has a corresponding &struct wl_entry object which may be kept in different
+ * RB-trees. See WL sub-system for details.
*/
struct ubi_wl_entry {
struct rb_node rb;
* @mutex: read/write mutex to implement read/write access serialization to
* the (@vol_id, @lnum) logical eraseblock
*
- * This data structure is used in the EBA unit to implement per-LEB locking.
- * When a logical eraseblock is being locked - corresponding
+ * This data structure is used in the EBA sub-system to implement per-LEB
+ * locking. When a logical eraseblock is being locked - corresponding
* &struct ubi_ltree_entry object is inserted to the lock tree (@ubi->ltree).
- * See EBA unit for details.
+ * See EBA sub-system for details.
*/
struct ubi_ltree_entry {
struct rb_node rb;
struct rw_semaphore mutex;
};
+/**
+ * struct ubi_rename_entry - volume re-name description data structure.
+ * @new_name_len: new volume name length
+ * @new_name: new volume name
+ * @remove: if not zero, this volume should be removed, not re-named
+ * @desc: descriptor of the volume
+ * @list: links re-name entries into a list
+ *
+ * This data structure is utilized in the multiple volume re-name code. Namely,
+ * UBI first creates a list of &struct ubi_rename_entry objects from the
+ * &struct ubi_rnvol_req request object, and then utilizes this list to do all
+ * the job.
+ */
+struct ubi_rename_entry {
+ int new_name_len;
+ char new_name[UBI_VOL_NAME_MAX + 1];
+ int remove;
+ struct ubi_volume_desc *desc;
+ struct list_head list;
+};
+
struct ubi_volume_desc;
/**
int alignment;
int data_pad;
int name_len;
- char name[UBI_VOL_NAME_MAX+1];
+ char name[UBI_VOL_NAME_MAX + 1];
int upd_ebs;
int ch_lnum;
#ifdef CONFIG_MTD_UBI_GLUEBI
/*
* Gluebi-related stuff may be compiled out.
- * TODO: this should not be built into UBI but should be a separate
+ * Note: this should not be built into UBI but should be a separate
* ubimtd driver which works on top of UBI and emulates MTD devices.
*/
struct ubi_volume_desc *gluebi_desc;
};
/**
- * struct ubi_volume_desc - descriptor of the UBI volume returned when it is
- * opened.
+ * struct ubi_volume_desc - UBI volume descriptor returned when it is opened.
* @vol: reference to the corresponding volume description object
* @mode: open mode (%UBI_READONLY, %UBI_READWRITE, or %UBI_EXCLUSIVE)
*/
* @vtbl_size: size of the volume table in bytes
* @vtbl: in-RAM volume table copy
* @volumes_mutex: protects on-flash volume table and serializes volume
- * changes, like creation, deletion, update, resize
+ * changes, like creation, deletion, update, re-size and re-name
*
* @max_ec: current highest erase counter value
* @mean_ec: current mean erase counter value
* @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works
* fields
* @move_mutex: serializes eraseblock moves
+ * @work_sem: sycnhronizes the WL worker with use tasks
* @wl_scheduled: non-zero if the wear-leveling was scheduled
* @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any
* physical eraseblock
* @ro_mode: if the UBI device is in read-only mode
* @leb_size: logical eraseblock size
* @leb_start: starting offset of logical eraseblocks within physical
- * eraseblocks
+ * eraseblocks
* @ec_hdr_alsize: size of the EC header aligned to @hdrs_min_io_size
* @vid_hdr_alsize: size of the VID header aligned to @hdrs_min_io_size
* @vid_hdr_offset: starting offset of the volume identifier header (might be
- * unaligned)
+ * unaligned)
* @vid_hdr_aloffset: starting offset of the VID header aligned to
* @hdrs_min_io_size
* @vid_hdr_shift: contains @vid_hdr_offset - @vid_hdr_aloffset
* @peb_buf1: a buffer of PEB size used for different purposes
* @peb_buf2: another buffer of PEB size used for different purposes
* @buf_mutex: proptects @peb_buf1 and @peb_buf2
+ * @ckvol_mutex: serializes static volume checking when opening
+ * @mult_mutex: serializes operations on multiple volumes, like re-nameing
* @dbg_peb_buf: buffer of PEB size used for debugging
* @dbg_buf_mutex: proptects @dbg_peb_buf
*/
struct mutex volumes_mutex;
int max_ec;
- /* TODO: mean_ec is not updated run-time, fix */
+ /* Note, mean_ec is not updated run-time - should be fixed */
int mean_ec;
- /* EBA unit's stuff */
+ /* EBA sub-system's stuff */
unsigned long long global_sqnum;
spinlock_t ltree_lock;
struct rb_root ltree;
struct mutex alc_mutex;
- /* Wear-leveling unit's stuff */
+ /* Wear-leveling sub-system's stuff */
struct rb_root used;
struct rb_root free;
struct rb_root scrub;
int thread_enabled;
char bgt_name[sizeof(UBI_BGT_NAME_PATTERN)+2];
- /* I/O unit's stuff */
+ /* I/O sub-system's stuff */
long long flash_size;
int peb_count;
int peb_size;
void *peb_buf2;
struct mutex buf_mutex;
struct mutex ckvol_mutex;
+ struct mutex mult_mutex;
#ifdef CONFIG_MTD_UBI_DEBUG
void *dbg_peb_buf;
struct mutex dbg_buf_mutex;
/* vtbl.c */
int ubi_change_vtbl_record(struct ubi_device *ubi, int idx,
struct ubi_vtbl_record *vtbl_rec);
+int ubi_vtbl_rename_volumes(struct ubi_device *ubi,
+ struct list_head *rename_list);
int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si);
/* vmt.c */
int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req);
-int ubi_remove_volume(struct ubi_volume_desc *desc);
+int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl);
int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs);
+int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list);
int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol);
void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol);
const void __user *buf, int count);
/* misc.c */
-int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, int length);
+int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf,
+ int length);
int ubi_check_volume(struct ubi_device *ubi, int vol_id);
void ubi_calculate_reserved(struct ubi_device *ubi);
int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
struct ubi_vid_hdr *vid_hdr);
int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si);
-void ubi_eba_close(const struct ubi_device *ubi);
/* wl.c */
int ubi_wl_get_peb(struct ubi_device *ubi, int dtype);
*/
#include <linux/err.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/div64.h>
#include "ubi.h"
int err;
struct ubi_vtbl_record vtbl_rec;
- dbg_msg("set update marker for volume %d", vol->vol_id);
+ dbg_gen("set update marker for volume %d", vol->vol_id);
if (vol->upd_marker) {
ubi_assert(ubi->vtbl[vol->vol_id].upd_marker);
- dbg_msg("already set");
+ dbg_gen("already set");
return 0;
}
uint64_t tmp;
struct ubi_vtbl_record vtbl_rec;
- dbg_msg("clear update marker for volume %d", vol->vol_id);
+ dbg_gen("clear update marker for volume %d", vol->vol_id);
memcpy(&vtbl_rec, &ubi->vtbl[vol->vol_id],
sizeof(struct ubi_vtbl_record));
int i, err;
uint64_t tmp;
- dbg_msg("start update of volume %d, %llu bytes", vol->vol_id, bytes);
+ dbg_gen("start update of volume %d, %llu bytes", vol->vol_id, bytes);
ubi_assert(!vol->updating && !vol->changing_leb);
vol->updating = 1;
{
ubi_assert(!vol->updating && !vol->changing_leb);
- dbg_msg("start changing LEB %d:%d, %u bytes",
+ dbg_gen("start changing LEB %d:%d, %u bytes",
vol->vol_id, req->lnum, req->bytes);
if (req->bytes == 0)
return ubi_eba_atomic_leb_change(ubi, vol, req->lnum, NULL, 0,
int err;
if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
- len = ALIGN(len, ubi->min_io_size);
- memset(buf + len, 0xFF, len - len);
+ int l = ALIGN(len, ubi->min_io_size);
- len = ubi_calc_data_len(ubi, buf, len);
+ memset(buf + len, 0xFF, l - len);
+ len = ubi_calc_data_len(ubi, buf, l);
if (len == 0) {
- dbg_msg("all %d bytes contain 0xFF - skip", len);
+ dbg_gen("all %d bytes contain 0xFF - skip", len);
return 0;
}
- err = ubi_eba_write_leb(ubi, vol, lnum, buf, 0, len, UBI_UNKNOWN);
+ err = ubi_eba_write_leb(ubi, vol, lnum, buf, 0, len,
+ UBI_UNKNOWN);
} else {
/*
* When writing static volume, and this is the last logical
/**
* ubi_more_update_data - write more update data.
+ * @ubi: UBI device description object
* @vol: volume description object
* @buf: write data (user-space memory buffer)
* @count: how much bytes to write
uint64_t tmp;
int lnum, offs, err = 0, len, to_write = count;
- dbg_msg("write %d of %lld bytes, %lld already passed",
+ dbg_gen("write %d of %lld bytes, %lld already passed",
count, vol->upd_bytes, vol->upd_received);
if (ubi->ro_mode)
/**
* ubi_more_leb_change_data - accept more data for atomic LEB change.
+ * @ubi: UBI device description object
* @vol: volume description object
* @buf: write data (user-space memory buffer)
* @count: how much bytes to write
{
int err;
- dbg_msg("write %d of %lld bytes, %lld already passed",
+ dbg_gen("write %d of %lld bytes, %lld already passed",
count, vol->upd_bytes, vol->upd_received);
if (ubi->ro_mode)
if (vol->upd_received == vol->upd_bytes) {
int len = ALIGN((int)vol->upd_bytes, ubi->min_io_size);
- memset(vol->upd_buf + vol->upd_bytes, 0xFF, len - vol->upd_bytes);
+ memset(vol->upd_buf + vol->upd_bytes, 0xFF,
+ len - vol->upd_bytes);
len = ubi_calc_data_len(ubi, vol->upd_buf, len);
err = ubi_eba_atomic_leb_change(ubi, vol, vol->ch_lnum,
vol->upd_buf, len, UBI_UNKNOWN);
#include "ubi.h"
#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
-static void paranoid_check_volumes(struct ubi_device *ubi);
+static int paranoid_check_volumes(struct ubi_device *ubi);
#else
-#define paranoid_check_volumes(ubi)
+#define paranoid_check_volumes(ubi) 0
#endif
static ssize_t vol_attribute_show(struct device *dev,
{
struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev);
+ kfree(vol->eba_tbl);
kfree(vol);
}
*/
int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
{
- int i, err, vol_id = req->vol_id, dont_free = 0;
+ int i, err, vol_id = req->vol_id, do_free = 1;
struct ubi_volume *vol;
struct ubi_vtbl_record vtbl_rec;
uint64_t bytes;
spin_lock(&ubi->volumes_lock);
if (vol_id == UBI_VOL_NUM_AUTO) {
/* Find unused volume ID */
- dbg_msg("search for vacant volume ID");
+ dbg_gen("search for vacant volume ID");
for (i = 0; i < ubi->vtbl_slots; i++)
if (!ubi->volumes[i]) {
vol_id = i;
req->vol_id = vol_id;
}
- dbg_msg("volume ID %d, %llu bytes, type %d, name %s",
+ dbg_gen("volume ID %d, %llu bytes, type %d, name %s",
vol_id, (unsigned long long)req->bytes,
(int)req->vol_type, req->name);
goto out_unlock;
}
- /* Calculate how many eraseblocks are requested */
+ /* Calculate how many eraseblocks are requested */
vol->usable_leb_size = ubi->leb_size - ubi->leb_size % req->alignment;
bytes = req->bytes;
if (do_div(bytes, vol->usable_leb_size))
vol->data_pad = ubi->leb_size % vol->alignment;
vol->vol_type = req->vol_type;
vol->name_len = req->name_len;
- memcpy(vol->name, req->name, vol->name_len + 1);
+ memcpy(vol->name, req->name, vol->name_len);
vol->ubi = ubi;
/*
vtbl_rec.vol_type = UBI_VID_DYNAMIC;
else
vtbl_rec.vol_type = UBI_VID_STATIC;
- memcpy(vtbl_rec.name, vol->name, vol->name_len + 1);
+ memcpy(vtbl_rec.name, vol->name, vol->name_len);
err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec);
if (err)
ubi->vol_count += 1;
spin_unlock(&ubi->volumes_lock);
- paranoid_check_volumes(ubi);
- return 0;
+ err = paranoid_check_volumes(ubi);
+ return err;
out_sysfs:
/*
- * We have registered our device, we should not free the volume*
+ * We have registered our device, we should not free the volume
* description object in this function in case of an error - it is
* freed by the release function.
*
* Get device reference to prevent the release function from being
* called just after sysfs has been closed.
*/
- dont_free = 1;
+ do_free = 0;
get_device(&vol->dev);
volume_sysfs_close(vol);
out_gluebi:
out_cdev:
cdev_del(&vol->cdev);
out_mapping:
- kfree(vol->eba_tbl);
+ if (do_free)
+ kfree(vol->eba_tbl);
out_acc:
spin_lock(&ubi->volumes_lock);
ubi->rsvd_pebs -= vol->reserved_pebs;
ubi->avail_pebs += vol->reserved_pebs;
out_unlock:
spin_unlock(&ubi->volumes_lock);
- if (dont_free)
- put_device(&vol->dev);
- else
+ if (do_free)
kfree(vol);
+ else
+ put_device(&vol->dev);
ubi_err("cannot create volume %d, error %d", vol_id, err);
return err;
}
/**
* ubi_remove_volume - remove volume.
* @desc: volume descriptor
+ * @no_vtbl: do not change volume table if not zero
*
* This function removes volume described by @desc. The volume has to be opened
* in "exclusive" mode. Returns zero in case of success and a negative error
* code in case of failure. The caller has to have the @ubi->volumes_mutex
* locked.
*/
-int ubi_remove_volume(struct ubi_volume_desc *desc)
+int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl)
{
struct ubi_volume *vol = desc->vol;
struct ubi_device *ubi = vol->ubi;
int i, err, vol_id = vol->vol_id, reserved_pebs = vol->reserved_pebs;
- dbg_msg("remove UBI volume %d", vol_id);
+ dbg_gen("remove UBI volume %d", vol_id);
ubi_assert(desc->mode == UBI_EXCLUSIVE);
ubi_assert(vol == ubi->volumes[vol_id]);
if (err)
goto out_err;
- err = ubi_change_vtbl_record(ubi, vol_id, NULL);
- if (err)
- goto out_err;
+ if (!no_vtbl) {
+ err = ubi_change_vtbl_record(ubi, vol_id, NULL);
+ if (err)
+ goto out_err;
+ }
for (i = 0; i < vol->reserved_pebs; i++) {
err = ubi_eba_unmap_leb(ubi, vol, i);
goto out_err;
}
- kfree(vol->eba_tbl);
- vol->eba_tbl = NULL;
cdev_del(&vol->cdev);
volume_sysfs_close(vol);
ubi->vol_count -= 1;
spin_unlock(&ubi->volumes_lock);
- paranoid_check_volumes(ubi);
- return 0;
+ if (!no_vtbl)
+ err = paranoid_check_volumes(ubi);
+ return err;
out_err:
ubi_err("cannot remove volume %d, error %d", vol_id, err);
if (ubi->ro_mode)
return -EROFS;
- dbg_msg("re-size volume %d to from %d to %d PEBs",
+ dbg_gen("re-size volume %d to from %d to %d PEBs",
vol_id, vol->reserved_pebs, reserved_pebs);
if (vol->vol_type == UBI_STATIC_VOLUME &&
(long long)vol->used_ebs * vol->usable_leb_size;
}
- paranoid_check_volumes(ubi);
- return 0;
+ err = paranoid_check_volumes(ubi);
+ return err;
out_acc:
if (pebs > 0) {
return err;
}
+/**
+ * ubi_rename_volumes - re-name UBI volumes.
+ * @ubi: UBI device description object
+ * @rename_list: list of &struct ubi_rename_entry objects
+ *
+ * This function re-names or removes volumes specified in the re-name list.
+ * Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+int ubi_rename_volumes(struct ubi_device *ubi, struct list_head *rename_list)
+{
+ int err;
+ struct ubi_rename_entry *re;
+
+ err = ubi_vtbl_rename_volumes(ubi, rename_list);
+ if (err)
+ return err;
+
+ list_for_each_entry(re, rename_list, list) {
+ if (re->remove) {
+ err = ubi_remove_volume(re->desc, 1);
+ if (err)
+ break;
+ } else {
+ struct ubi_volume *vol = re->desc->vol;
+
+ spin_lock(&ubi->volumes_lock);
+ vol->name_len = re->new_name_len;
+ memcpy(vol->name, re->new_name, re->new_name_len + 1);
+ spin_unlock(&ubi->volumes_lock);
+ }
+ }
+
+ if (!err)
+ err = paranoid_check_volumes(ubi);
+ return err;
+}
+
/**
* ubi_add_volume - add volume.
* @ubi: UBI device description object
int err, vol_id = vol->vol_id;
dev_t dev;
- dbg_msg("add volume %d", vol_id);
- ubi_dbg_dump_vol_info(vol);
+ dbg_gen("add volume %d", vol_id);
/* Register character device for the volume */
cdev_init(&vol->cdev, &ubi_vol_cdev_operations);
return err;
}
- paranoid_check_volumes(ubi);
- return 0;
+ err = paranoid_check_volumes(ubi);
+ return err;
out_gluebi:
err = ubi_destroy_gluebi(vol);
{
int err;
- dbg_msg("free volume %d", vol->vol_id);
+ dbg_gen("free volume %d", vol->vol_id);
ubi->volumes[vol->vol_id] = NULL;
err = ubi_destroy_gluebi(vol);
* paranoid_check_volume - check volume information.
* @ubi: UBI device description object
* @vol_id: volume ID
+ *
+ * Returns zero if volume is all right and a a negative error code if not.
*/
-static void paranoid_check_volume(struct ubi_device *ubi, int vol_id)
+static int paranoid_check_volume(struct ubi_device *ubi, int vol_id)
{
int idx = vol_id2idx(ubi, vol_id);
int reserved_pebs, alignment, data_pad, vol_type, name_len, upd_marker;
goto fail;
}
spin_unlock(&ubi->volumes_lock);
- return;
- }
-
- if (vol->exclusive) {
- /*
- * The volume may be being created at the moment, do not check
- * it (e.g., it may be in the middle of ubi_create_volume().
- */
- spin_unlock(&ubi->volumes_lock);
- return;
+ return 0;
}
if (vol->reserved_pebs < 0 || vol->alignment < 0 || vol->data_pad < 0 ||
goto fail;
}
- n = vol->alignment % ubi->min_io_size;
+ n = vol->alignment & (ubi->min_io_size - 1);
if (vol->alignment != 1 && n) {
ubi_err("alignment is not multiple of min I/O unit");
goto fail;
if (alignment != vol->alignment || data_pad != vol->data_pad ||
upd_marker != vol->upd_marker || vol_type != vol->vol_type ||
- name_len!= vol->name_len || strncmp(name, vol->name, name_len)) {
+ name_len != vol->name_len || strncmp(name, vol->name, name_len)) {
ubi_err("volume info is different");
goto fail;
}
spin_unlock(&ubi->volumes_lock);
- return;
+ return 0;
fail:
ubi_err("paranoid check failed for volume %d", vol_id);
- ubi_dbg_dump_vol_info(vol);
+ if (vol)
+ ubi_dbg_dump_vol_info(vol);
ubi_dbg_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id);
spin_unlock(&ubi->volumes_lock);
- BUG();
+ return -EINVAL;
}
/**
* paranoid_check_volumes - check information about all volumes.
* @ubi: UBI device description object
+ *
+ * Returns zero if volumes are all right and a a negative error code if not.
*/
-static void paranoid_check_volumes(struct ubi_device *ubi)
+static int paranoid_check_volumes(struct ubi_device *ubi)
{
- int i;
+ int i, err = 0;
- for (i = 0; i < ubi->vtbl_slots; i++)
- paranoid_check_volume(ubi, i);
+ for (i = 0; i < ubi->vtbl_slots; i++) {
+ err = paranoid_check_volume(ubi, i);
+ if (err)
+ break;
+ }
+
+ return err;
}
#endif
}
/**
- * vtbl_check - check if volume table is not corrupted and contains sensible
- * data.
+ * ubi_vtbl_rename_volumes - rename UBI volumes in the volume table.
+ * @ubi: UBI device description object
+ * @rename_list: list of &struct ubi_rename_entry objects
+ *
+ * This function re-names multiple volumes specified in @req in the volume
+ * table. Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+int ubi_vtbl_rename_volumes(struct ubi_device *ubi,
+ struct list_head *rename_list)
+{
+ int i, err;
+ struct ubi_rename_entry *re;
+ struct ubi_volume *layout_vol;
+
+ list_for_each_entry(re, rename_list, list) {
+ uint32_t crc;
+ struct ubi_volume *vol = re->desc->vol;
+ struct ubi_vtbl_record *vtbl_rec = &ubi->vtbl[vol->vol_id];
+
+ if (re->remove) {
+ memcpy(vtbl_rec, &empty_vtbl_record,
+ sizeof(struct ubi_vtbl_record));
+ continue;
+ }
+
+ vtbl_rec->name_len = cpu_to_be16(re->new_name_len);
+ memcpy(vtbl_rec->name, re->new_name, re->new_name_len);
+ memset(vtbl_rec->name + re->new_name_len, 0,
+ UBI_VOL_NAME_MAX + 1 - re->new_name_len);
+ crc = crc32(UBI_CRC32_INIT, vtbl_rec,
+ UBI_VTBL_RECORD_SIZE_CRC);
+ vtbl_rec->crc = cpu_to_be32(crc);
+ }
+
+ layout_vol = ubi->volumes[vol_id2idx(ubi, UBI_LAYOUT_VOLUME_ID)];
+ for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) {
+ err = ubi_eba_unmap_leb(ubi, layout_vol, i);
+ if (err)
+ return err;
+
+ err = ubi_eba_write_leb(ubi, layout_vol, i, ubi->vtbl, 0,
+ ubi->vtbl_size, UBI_LONGTERM);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * vtbl_check - check if volume table is not corrupted and sensible.
* @ubi: UBI device description object
* @vtbl: volume table
*
const struct ubi_vtbl_record *vtbl)
{
int i, n, reserved_pebs, alignment, data_pad, vol_type, name_len;
- int upd_marker;
+ int upd_marker, err;
uint32_t crc;
const char *name;
if (reserved_pebs == 0) {
if (memcmp(&vtbl[i], &empty_vtbl_record,
UBI_VTBL_RECORD_SIZE)) {
- dbg_err("bad empty record");
+ err = 2;
goto bad;
}
continue;
if (reserved_pebs < 0 || alignment < 0 || data_pad < 0 ||
name_len < 0) {
- dbg_err("negative values");
+ err = 3;
goto bad;
}
if (alignment > ubi->leb_size || alignment == 0) {
- dbg_err("bad alignment");
+ err = 4;
goto bad;
}
- n = alignment % ubi->min_io_size;
+ n = alignment & (ubi->min_io_size - 1);
if (alignment != 1 && n) {
- dbg_err("alignment is not multiple of min I/O unit");
+ err = 5;
goto bad;
}
n = ubi->leb_size % alignment;
if (data_pad != n) {
dbg_err("bad data_pad, has to be %d", n);
+ err = 6;
goto bad;
}
if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) {
- dbg_err("bad vol_type");
+ err = 7;
goto bad;
}
if (upd_marker != 0 && upd_marker != 1) {
- dbg_err("bad upd_marker");
+ err = 8;
goto bad;
}
if (reserved_pebs > ubi->good_peb_count) {
dbg_err("too large reserved_pebs, good PEBs %d",
ubi->good_peb_count);
+ err = 9;
goto bad;
}
if (name_len > UBI_VOL_NAME_MAX) {
- dbg_err("too long volume name, max %d",
- UBI_VOL_NAME_MAX);
+ err = 10;
goto bad;
}
if (name[0] == '\0') {
- dbg_err("NULL volume name");
+ err = 11;
goto bad;
}
if (name_len != strnlen(name, name_len + 1)) {
- dbg_err("bad name_len");
+ err = 12;
goto bad;
}
}
return 0;
bad:
- ubi_err("volume table check failed, record %d", i);
+ ubi_err("volume table check failed: record %d, error %d", i, err);
ubi_dbg_dump_vtbl_record(&vtbl[i], i);
return -EINVAL;
}
vid_hdr->data_pad = cpu_to_be32(0);
vid_hdr->lnum = cpu_to_be32(copy);
vid_hdr->sqnum = cpu_to_be64(++si->max_sqnum);
- vid_hdr->leb_ver = cpu_to_be32(old_seb ? old_seb->leb_ver + 1: 0);
/* The EC header is already there, write the VID header */
err = ubi_io_write_vid_hdr(ubi, new_seb->pnum, vid_hdr);
* to LEB 0.
*/
- dbg_msg("check layout volume");
+ dbg_gen("check layout volume");
/* Read both LEB 0 and LEB 1 into memory */
ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) {
err = ubi_io_read_data(ubi, leb[seb->lnum], seb->pnum, 0,
ubi->vtbl_size);
if (err == UBI_IO_BITFLIPS || err == -EBADMSG)
- /* Scrub the PEB later */
+ /*
+ * Scrub the PEB later. Note, -EBADMSG indicates an
+ * uncorrectable ECC error, but we have our own CRC and
+ * the data will be checked later. If the data is OK,
+ * the PEB will be scrubbed (because we set
+ * seb->scrub). If the data is not OK, the contents of
+ * the PEB will be recovered from the second copy, and
+ * seb->scrub will be cleared in
+ * 'ubi_scan_add_used()'.
+ */
seb->scrub = 1;
else if (err)
goto out_free;
if (!leb_corrupted[0]) {
/* LEB 0 is OK */
if (leb[1])
- leb_corrupted[1] = memcmp(leb[0], leb[1], ubi->vtbl_size);
+ leb_corrupted[1] = memcmp(leb[0], leb[1],
+ ubi->vtbl_size);
if (leb_corrupted[1]) {
ubi_warn("volume table copy #2 is corrupted");
err = create_vtbl(ubi, si, 1, leb[0]);
static int check_sv(const struct ubi_volume *vol,
const struct ubi_scan_volume *sv)
{
+ int err;
+
if (sv->highest_lnum >= vol->reserved_pebs) {
- dbg_err("bad highest_lnum");
+ err = 1;
goto bad;
}
if (sv->leb_count > vol->reserved_pebs) {
- dbg_err("bad leb_count");
+ err = 2;
goto bad;
}
if (sv->vol_type != vol->vol_type) {
- dbg_err("bad vol_type");
+ err = 3;
goto bad;
}
if (sv->used_ebs > vol->reserved_pebs) {
- dbg_err("bad used_ebs");
+ err = 4;
goto bad;
}
if (sv->data_pad != vol->data_pad) {
- dbg_err("bad data_pad");
+ err = 5;
goto bad;
}
return 0;
bad:
- ubi_err("bad scanning information");
+ ubi_err("bad scanning information, error %d", err);
ubi_dbg_dump_sv(sv);
ubi_dbg_dump_vol_info(vol);
return -EINVAL;
return -EINVAL;
}
- if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT&&
+ if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT &&
si->highest_vol_id < UBI_INTERNAL_VOL_START) {
ubi_err("too large volume ID %d found by scanning",
si->highest_vol_id);
return -EINVAL;
}
-
for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) {
cond_resched();
}
/**
- * ubi_read_volume_table - read volume table.
- * information.
+ * ubi_read_volume_table - read the volume table.
* @ubi: UBI device description object
* @si: scanning information
*
out_free:
vfree(ubi->vtbl);
- for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++)
- if (ubi->volumes[i]) {
- kfree(ubi->volumes[i]);
- ubi->volumes[i] = NULL;
- }
+ for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) {
+ kfree(ubi->volumes[i]);
+ ubi->volumes[i] = NULL;
+ }
return err;
}
*/
/*
- * UBI wear-leveling unit.
+ * UBI wear-leveling sub-system.
*
- * This unit is responsible for wear-leveling. It works in terms of physical
- * eraseblocks and erase counters and knows nothing about logical eraseblocks,
- * volumes, etc. From this unit's perspective all physical eraseblocks are of
- * two types - used and free. Used physical eraseblocks are those that were
- * "get" by the 'ubi_wl_get_peb()' function, and free physical eraseblocks are
- * those that were put by the 'ubi_wl_put_peb()' function.
+ * This sub-system is responsible for wear-leveling. It works in terms of
+ * physical* eraseblocks and erase counters and knows nothing about logical
+ * eraseblocks, volumes, etc. From this sub-system's perspective all physical
+ * eraseblocks are of two types - used and free. Used physical eraseblocks are
+ * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical
+ * eraseblocks are those that were put by the 'ubi_wl_put_peb()' function.
*
* Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter
- * header. The rest of the physical eraseblock contains only 0xFF bytes.
+ * header. The rest of the physical eraseblock contains only %0xFF bytes.
*
- * When physical eraseblocks are returned to the WL unit by means of the
+ * When physical eraseblocks are returned to the WL sub-system by means of the
* 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is
* done asynchronously in context of the per-UBI device background thread,
- * which is also managed by the WL unit.
+ * which is also managed by the WL sub-system.
*
* The wear-leveling is ensured by means of moving the contents of used
* physical eraseblocks with low erase counter to free physical eraseblocks
* The 'ubi_wl_get_peb()' function accepts data type hints which help to pick
* an "optimal" physical eraseblock. For example, when it is known that the
* physical eraseblock will be "put" soon because it contains short-term data,
- * the WL unit may pick a free physical eraseblock with low erase counter, and
- * so forth.
+ * the WL sub-system may pick a free physical eraseblock with low erase
+ * counter, and so forth.
*
- * If the WL unit fails to erase a physical eraseblock, it marks it as bad.
+ * If the WL sub-system fails to erase a physical eraseblock, it marks it as
+ * bad.
*
- * This unit is also responsible for scrubbing. If a bit-flip is detected in a
- * physical eraseblock, it has to be moved. Technically this is the same as
- * moving it for wear-leveling reasons.
+ * This sub-system is also responsible for scrubbing. If a bit-flip is detected
+ * in a physical eraseblock, it has to be moved. Technically this is the same
+ * as moving it for wear-leveling reasons.
*
- * As it was said, for the UBI unit all physical eraseblocks are either "free"
- * or "used". Free eraseblock are kept in the @wl->free RB-tree, while used
- * eraseblocks are kept in a set of different RB-trees: @wl->used,
+ * As it was said, for the UBI sub-system all physical eraseblocks are either
+ * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while
+ * used eraseblocks are kept in a set of different RB-trees: @wl->used,
* @wl->prot.pnum, @wl->prot.aec, and @wl->scrub.
*
* Note, in this implementation, we keep a small in-RAM object for each physical
* eraseblock. This is surely not a scalable solution. But it appears to be good
* enough for moderately large flashes and it is simple. In future, one may
- * re-work this unit and make it more scalable.
+ * re-work this sub-system and make it more scalable.
*
- * At the moment this unit does not utilize the sequence number, which was
- * introduced relatively recently. But it would be wise to do this because the
- * sequence number of a logical eraseblock characterizes how old is it. For
+ * At the moment this sub-system does not utilize the sequence number, which
+ * was introduced relatively recently. But it would be wise to do this because
+ * the sequence number of a logical eraseblock characterizes how old is it. For
* example, when we move a PEB with low erase counter, and we need to pick the
* target PEB, we pick a PEB with the highest EC if our PEB is "old" and we
* pick target PEB with an average EC if our PEB is not very "old". This is a
- * room for future re-works of the WL unit.
+ * room for future re-works of the WL sub-system.
*
- * FIXME: looks too complex, should be simplified (later).
+ * Note: the stuff with protection trees looks too complex and is difficult to
+ * understand. Should be fixed.
*/
#include <linux/slab.h>
/*
* Maximum difference between two erase counters. If this threshold is
- * exceeded, the WL unit starts moving data from used physical eraseblocks with
- * low erase counter to free physical eraseblocks with high erase counter.
+ * exceeded, the WL sub-system starts moving data from used physical
+ * eraseblocks with low erase counter to free physical eraseblocks with high
+ * erase counter.
*/
#define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD
/*
- * When a physical eraseblock is moved, the WL unit has to pick the target
+ * When a physical eraseblock is moved, the WL sub-system has to pick the target
* physical eraseblock to move to. The simplest way would be just to pick the
* one with the highest erase counter. But in certain workloads this could lead
* to an unlimited wear of one or few physical eraseblock. Indeed, imagine a
* situation when the picked physical eraseblock is constantly erased after the
* data is written to it. So, we have a constant which limits the highest erase
- * counter of the free physical eraseblock to pick. Namely, the WL unit does
- * not pick eraseblocks with erase counter greater then the lowest erase
+ * counter of the free physical eraseblock to pick. Namely, the WL sub-system
+ * does not pick eraseblocks with erase counter greater then the lowest erase
* counter plus %WL_FREE_MAX_DIFF.
*/
#define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD)
* @abs_ec: the absolute erase counter value when the protection ends
* @e: the wear-leveling entry of the physical eraseblock under protection
*
- * When the WL unit returns a physical eraseblock, the physical eraseblock is
- * protected from being moved for some "time". For this reason, the physical
- * eraseblock is not directly moved from the @wl->free tree to the @wl->used
- * tree. There is one more tree in between where this physical eraseblock is
- * temporarily stored (@wl->prot).
+ * When the WL sub-system returns a physical eraseblock, the physical
+ * eraseblock is protected from being moved for some "time". For this reason,
+ * the physical eraseblock is not directly moved from the @wl->free tree to the
+ * @wl->used tree. There is one more tree in between where this physical
+ * eraseblock is temporarily stored (@wl->prot).
*
* All this protection stuff is needed because:
* o we don't want to move physical eraseblocks just after we have given them
* @list: a link in the list of pending works
* @func: worker function
* @priv: private data of the worker function
- *
* @e: physical eraseblock to erase
* @torture: if the physical eraseblock has to be tortured
*
}
switch (dtype) {
- case UBI_LONGTERM:
- /*
- * For long term data we pick a physical eraseblock
- * with high erase counter. But the highest erase
- * counter we can pick is bounded by the the lowest
- * erase counter plus %WL_FREE_MAX_DIFF.
- */
- e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
- protect = LT_PROTECTION;
- break;
- case UBI_UNKNOWN:
- /*
- * For unknown data we pick a physical eraseblock with
- * medium erase counter. But we by no means can pick a
- * physical eraseblock with erase counter greater or
- * equivalent than the lowest erase counter plus
- * %WL_FREE_MAX_DIFF.
- */
- first = rb_entry(rb_first(&ubi->free),
- struct ubi_wl_entry, rb);
- last = rb_entry(rb_last(&ubi->free),
- struct ubi_wl_entry, rb);
+ case UBI_LONGTERM:
+ /*
+ * For long term data we pick a physical eraseblock with high
+ * erase counter. But the highest erase counter we can pick is
+ * bounded by the the lowest erase counter plus
+ * %WL_FREE_MAX_DIFF.
+ */
+ e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
+ protect = LT_PROTECTION;
+ break;
+ case UBI_UNKNOWN:
+ /*
+ * For unknown data we pick a physical eraseblock with medium
+ * erase counter. But we by no means can pick a physical
+ * eraseblock with erase counter greater or equivalent than the
+ * lowest erase counter plus %WL_FREE_MAX_DIFF.
+ */
+ first = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb);
+ last = rb_entry(rb_last(&ubi->free), struct ubi_wl_entry, rb);
- if (last->ec - first->ec < WL_FREE_MAX_DIFF)
- e = rb_entry(ubi->free.rb_node,
- struct ubi_wl_entry, rb);
- else {
- medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2;
- e = find_wl_entry(&ubi->free, medium_ec);
- }
- protect = U_PROTECTION;
- break;
- case UBI_SHORTTERM:
- /*
- * For short term data we pick a physical eraseblock
- * with the lowest erase counter as we expect it will
- * be erased soon.
- */
- e = rb_entry(rb_first(&ubi->free),
- struct ubi_wl_entry, rb);
- protect = ST_PROTECTION;
- break;
- default:
- protect = 0;
- e = NULL;
- BUG();
+ if (last->ec - first->ec < WL_FREE_MAX_DIFF)
+ e = rb_entry(ubi->free.rb_node,
+ struct ubi_wl_entry, rb);
+ else {
+ medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2;
+ e = find_wl_entry(&ubi->free, medium_ec);
+ }
+ protect = U_PROTECTION;
+ break;
+ case UBI_SHORTTERM:
+ /*
+ * For short term data we pick a physical eraseblock with the
+ * lowest erase counter as we expect it will be erased soon.
+ */
+ e = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb);
+ protect = ST_PROTECTION;
+ break;
+ default:
+ protect = 0;
+ e = NULL;
+ BUG();
}
/*
* This function returns zero in case of success and a negative error code in
* case of failure.
*/
-static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int torture)
+static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
+ int torture)
{
int err;
struct ubi_ec_hdr *ec_hdr;
}
/**
- * check_protection_over - check if it is time to stop protecting some
- * physical eraseblocks.
+ * check_protection_over - check if it is time to stop protecting some PEBs.
* @ubi: UBI device description object
*
* This function is called after each erase operation, when the absolute erase
}
ubi_free_vid_hdr(ubi, vid_hdr);
+ if (scrubbing && !protect)
+ ubi_msg("scrubbed PEB %d, data moved to PEB %d",
+ e1->pnum, e2->pnum);
+
spin_lock(&ubi->wl_lock);
if (protect)
prot_tree_add(ubi, e1, pe, protect);
spin_unlock(&ubi->wl_lock);
/*
- * One more erase operation has happened, take care about protected
- * physical eraseblocks.
+ * One more erase operation has happened, take care about
+ * protected physical eraseblocks.
*/
check_protection_over(ubi);
}
/**
- * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling unit.
+ * ubi_wl_put_peb - return a PEB to the wear-leveling sub-system.
* @ubi: UBI device description object
* @pnum: physical eraseblock to return
* @torture: if this physical eraseblock has to be tortured
/*
* User is putting the physical eraseblock which was selected
* as the target the data is moved to. It may happen if the EBA
- * unit already re-mapped the LEB in 'ubi_eba_copy_leb()' but
- * the WL unit has not put the PEB to the "used" tree yet, but
- * it is about to do this. So we just set a flag which will
- * tell the WL worker that the PEB is not needed anymore and
- * should be scheduled for erasure.
+ * sub-system already re-mapped the LEB in 'ubi_eba_copy_leb()'
+ * but the WL sub-system has not put the PEB to the "used" tree
+ * yet, but it is about to do this. So we just set a flag which
+ * will tell the WL worker that the PEB is not needed anymore
+ * and should be scheduled for erasure.
*/
dbg_wl("PEB %d is the target of data moving", pnum);
ubi_assert(!ubi->move_to_put);
{
struct ubi_wl_entry *e;
- ubi_msg("schedule PEB %d for scrubbing", pnum);
+ dbg_msg("schedule PEB %d for scrubbing", pnum);
retry:
spin_lock(&ubi->wl_lock);
int err;
if (kthread_should_stop())
- goto out;
+ break;
if (try_to_freeze())
continue;
cond_resched();
}
-out:
dbg_wl("background thread \"%s\" is killed", ubi->bgt_name);
return 0;
}
}
/**
- * ubi_wl_init_scan - initialize the wear-leveling unit using scanning
- * information.
+ * ubi_wl_init_scan - initialize the WL sub-system using scanning information.
* @ubi: UBI device description object
* @si: scanning information
*
}
/**
- * ubi_wl_close - close the wear-leveling unit.
+ * ubi_wl_close - close the wear-leveling sub-system.
* @ubi: UBI device description object
*/
void ubi_wl_close(struct ubi_device *ubi)
{
- dbg_wl("close the UBI wear-leveling unit");
-
+ dbg_wl("close the WL sub-system");
cancel_pending(ubi);
protection_trees_destroy(ubi);
tree_destroy(&ubi->used);
#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
/**
- * paranoid_check_ec - make sure that the erase counter of a physical eraseblock
- * is correct.
+ * paranoid_check_ec - make sure that the erase counter of a PEB is correct.
* @ubi: UBI device description object
* @pnum: the physical eraseblock number to check
* @ec: the erase counter to check
}
/**
- * paranoid_check_in_wl_tree - make sure that a wear-leveling entry is present
- * in a WL RB-tree.
+ * paranoid_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree.
* @e: the wear-leveling entry to check
* @root: the root of the tree
*
- * This function returns zero if @e is in the @root RB-tree and %1 if it
- * is not.
+ * This function returns zero if @e is in the @root RB-tree and %1 if it is
+ * not.
*/
static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
struct rb_root *root)
*/
#include <linux/module.h>
+#include <linux/moduleparam.h>
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/ioport.h>
#include <asm/hvcall.h>
#include <asm/atomic.h>
#include <asm/vio.h>
+#include <asm/iommu.h>
#include <asm/uaccess.h>
+#include <asm/firmware.h>
#include <linux/seq_file.h>
#include "ibmveth.h"
static void ibmveth_proc_unregister_adapter(struct ibmveth_adapter *adapter);
static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance);
static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter);
+static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev);
static struct kobj_type ktype_veth_pool;
+
#ifdef CONFIG_PROC_FS
#define IBMVETH_PROC_DIR "ibmveth"
static struct proc_dir_entry *ibmveth_proc_dir;
u32 i;
u32 count = pool->size - atomic_read(&pool->available);
u32 buffers_added = 0;
+ struct sk_buff *skb;
+ unsigned int free_index, index;
+ u64 correlator;
+ unsigned long lpar_rc;
+ dma_addr_t dma_addr;
mb();
for(i = 0; i < count; ++i) {
- struct sk_buff *skb;
- unsigned int free_index, index;
- u64 correlator;
union ibmveth_buf_desc desc;
- unsigned long lpar_rc;
- dma_addr_t dma_addr;
skb = alloc_skb(pool->buff_size, GFP_ATOMIC);
dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
pool->buff_size, DMA_FROM_DEVICE);
+ if (dma_mapping_error(dma_addr))
+ goto failure;
+
pool->free_map[free_index] = IBM_VETH_INVALID_MAP;
pool->dma_addr[index] = dma_addr;
pool->skbuff[index] = skb;
lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, desc.desc);
- if(lpar_rc != H_SUCCESS) {
- pool->free_map[free_index] = index;
- pool->skbuff[index] = NULL;
- if (pool->consumer_index == 0)
- pool->consumer_index = pool->size - 1;
- else
- pool->consumer_index--;
- dma_unmap_single(&adapter->vdev->dev,
- pool->dma_addr[index], pool->buff_size,
- DMA_FROM_DEVICE);
- dev_kfree_skb_any(skb);
- adapter->replenish_add_buff_failure++;
- break;
- } else {
+ if (lpar_rc != H_SUCCESS)
+ goto failure;
+ else {
buffers_added++;
adapter->replenish_add_buff_success++;
}
}
+ mb();
+ atomic_add(buffers_added, &(pool->available));
+ return;
+
+failure:
+ pool->free_map[free_index] = index;
+ pool->skbuff[index] = NULL;
+ if (pool->consumer_index == 0)
+ pool->consumer_index = pool->size - 1;
+ else
+ pool->consumer_index--;
+ if (!dma_mapping_error(dma_addr))
+ dma_unmap_single(&adapter->vdev->dev,
+ pool->dma_addr[index], pool->buff_size,
+ DMA_FROM_DEVICE);
+ dev_kfree_skb_any(skb);
+ adapter->replenish_add_buff_failure++;
+
mb();
atomic_add(buffers_added, &(pool->available));
}
adapter->replenish_task_cycles++;
- for(i = 0; i < IbmVethNumBufferPools; i++)
+ for (i = (IbmVethNumBufferPools - 1); i >= 0; i--)
if(adapter->rx_buff_pool[i].active)
ibmveth_replenish_buffer_pool(adapter,
&adapter->rx_buff_pool[i]);
if (adapter->rx_buff_pool[i].active)
ibmveth_free_buffer_pool(adapter,
&adapter->rx_buff_pool[i]);
+
+ if (adapter->bounce_buffer != NULL) {
+ if (!dma_mapping_error(adapter->bounce_buffer_dma)) {
+ dma_unmap_single(&adapter->vdev->dev,
+ adapter->bounce_buffer_dma,
+ adapter->netdev->mtu + IBMVETH_BUFF_OH,
+ DMA_BIDIRECTIONAL);
+ adapter->bounce_buffer_dma = DMA_ERROR_CODE;
+ }
+ kfree(adapter->bounce_buffer);
+ adapter->bounce_buffer = NULL;
+ }
}
static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter,
return rc;
}
+ adapter->bounce_buffer =
+ kmalloc(netdev->mtu + IBMVETH_BUFF_OH, GFP_KERNEL);
+ if (!adapter->bounce_buffer) {
+ ibmveth_error_printk("unable to allocate bounce buffer\n");
+ ibmveth_cleanup(adapter);
+ napi_disable(&adapter->napi);
+ return -ENOMEM;
+ }
+ adapter->bounce_buffer_dma =
+ dma_map_single(&adapter->vdev->dev, adapter->bounce_buffer,
+ netdev->mtu + IBMVETH_BUFF_OH, DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(adapter->bounce_buffer_dma)) {
+ ibmveth_error_printk("unable to map bounce buffer\n");
+ ibmveth_cleanup(adapter);
+ napi_disable(&adapter->napi);
+ return -ENOMEM;
+ }
+
ibmveth_debug_printk("initial replenish cycle\n");
ibmveth_interrupt(netdev->irq, netdev);
unsigned int tx_packets = 0;
unsigned int tx_send_failed = 0;
unsigned int tx_map_failed = 0;
+ int used_bounce = 0;
+ unsigned long data_dma_addr;
desc.fields.flags_len = IBMVETH_BUF_VALID | skb->len;
- desc.fields.address = dma_map_single(&adapter->vdev->dev, skb->data,
- skb->len, DMA_TO_DEVICE);
+ data_dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
+ skb->len, DMA_TO_DEVICE);
if (skb->ip_summed == CHECKSUM_PARTIAL &&
ip_hdr(skb)->protocol != IPPROTO_TCP && skb_checksum_help(skb)) {
buf[1] = 0;
}
- if (dma_mapping_error(desc.fields.address)) {
- ibmveth_error_printk("tx: unable to map xmit buffer\n");
+ if (dma_mapping_error(data_dma_addr)) {
+ if (!firmware_has_feature(FW_FEATURE_CMO))
+ ibmveth_error_printk("tx: unable to map xmit buffer\n");
+ skb_copy_from_linear_data(skb, adapter->bounce_buffer,
+ skb->len);
+ desc.fields.address = adapter->bounce_buffer_dma;
tx_map_failed++;
- tx_dropped++;
- goto out;
- }
+ used_bounce = 1;
+ } else
+ desc.fields.address = data_dma_addr;
/* send the frame. Arbitrarily set retrycount to 1024 */
correlator = 0;
netdev->trans_start = jiffies;
}
- dma_unmap_single(&adapter->vdev->dev, desc.fields.address,
- skb->len, DMA_TO_DEVICE);
+ if (!used_bounce)
+ dma_unmap_single(&adapter->vdev->dev, data_dma_addr,
+ skb->len, DMA_TO_DEVICE);
out: spin_lock_irqsave(&adapter->stats_lock, flags);
netdev->stats.tx_dropped += tx_dropped;
static int ibmveth_change_mtu(struct net_device *dev, int new_mtu)
{
struct ibmveth_adapter *adapter = dev->priv;
+ struct vio_dev *viodev = adapter->vdev;
int new_mtu_oh = new_mtu + IBMVETH_BUFF_OH;
- int reinit = 0;
- int i, rc;
+ int i;
if (new_mtu < IBMVETH_MAX_MTU)
return -EINVAL;
if (i == IbmVethNumBufferPools)
return -EINVAL;
+ /* Deactivate all the buffer pools so that the next loop can activate
+ only the buffer pools necessary to hold the new MTU */
+ for (i = 0; i < IbmVethNumBufferPools; i++)
+ if (adapter->rx_buff_pool[i].active) {
+ ibmveth_free_buffer_pool(adapter,
+ &adapter->rx_buff_pool[i]);
+ adapter->rx_buff_pool[i].active = 0;
+ }
+
/* Look for an active buffer pool that can hold the new MTU */
for(i = 0; i<IbmVethNumBufferPools; i++) {
- if (!adapter->rx_buff_pool[i].active) {
- adapter->rx_buff_pool[i].active = 1;
- reinit = 1;
- }
+ adapter->rx_buff_pool[i].active = 1;
if (new_mtu_oh < adapter->rx_buff_pool[i].buff_size) {
- if (reinit && netif_running(adapter->netdev)) {
+ if (netif_running(adapter->netdev)) {
adapter->pool_config = 1;
ibmveth_close(adapter->netdev);
adapter->pool_config = 0;
dev->mtu = new_mtu;
- if ((rc = ibmveth_open(adapter->netdev)))
- return rc;
- } else
- dev->mtu = new_mtu;
+ vio_cmo_set_dev_desired(viodev,
+ ibmveth_get_desired_dma
+ (viodev));
+ return ibmveth_open(adapter->netdev);
+ }
+ dev->mtu = new_mtu;
+ vio_cmo_set_dev_desired(viodev,
+ ibmveth_get_desired_dma
+ (viodev));
return 0;
}
}
}
#endif
+/**
+ * ibmveth_get_desired_dma - Calculate IO memory desired by the driver
+ *
+ * @vdev: struct vio_dev for the device whose desired IO mem is to be returned
+ *
+ * Return value:
+ * Number of bytes of IO data the driver will need to perform well.
+ */
+static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev)
+{
+ struct net_device *netdev = dev_get_drvdata(&vdev->dev);
+ struct ibmveth_adapter *adapter;
+ unsigned long ret;
+ int i;
+ int rxqentries = 1;
+
+ /* netdev inits at probe time along with the structures we need below*/
+ if (netdev == NULL)
+ return IOMMU_PAGE_ALIGN(IBMVETH_IO_ENTITLEMENT_DEFAULT);
+
+ adapter = netdev_priv(netdev);
+
+ ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE;
+ ret += IOMMU_PAGE_ALIGN(netdev->mtu);
+
+ for (i = 0; i < IbmVethNumBufferPools; i++) {
+ /* add the size of the active receive buffers */
+ if (adapter->rx_buff_pool[i].active)
+ ret +=
+ adapter->rx_buff_pool[i].size *
+ IOMMU_PAGE_ALIGN(adapter->rx_buff_pool[i].
+ buff_size);
+ rxqentries += adapter->rx_buff_pool[i].size;
+ }
+ /* add the size of the receive queue entries */
+ ret += IOMMU_PAGE_ALIGN(rxqentries * sizeof(struct ibmveth_rx_q_entry));
+
+ return ret;
+}
+
static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
{
int rc, i;
ibmveth_proc_unregister_adapter(adapter);
free_netdev(netdev);
+ dev_set_drvdata(&dev->dev, NULL);
+
return 0;
}
return -EPERM;
}
- pool->active = 0;
if (netif_running(netdev)) {
adapter->pool_config = 1;
ibmveth_close(netdev);
+ pool->active = 0;
adapter->pool_config = 0;
if ((rc = ibmveth_open(netdev)))
return rc;
}
+ pool->active = 0;
}
} else if (attr == &veth_num_attr) {
if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT)
.id_table = ibmveth_device_table,
.probe = ibmveth_probe,
.remove = ibmveth_remove,
+ .get_desired_dma = ibmveth_get_desired_dma,
.driver = {
.name = ibmveth_driver_name,
.owner = THIS_MODULE,
plpar_hcall_norets(H_CHANGE_LOGICAL_LAN_MAC, ua, mac)
#define IbmVethNumBufferPools 5
+#define IBMVETH_IO_ENTITLEMENT_DEFAULT 4243456 /* MTU of 1500 needs 4.2Mb */
#define IBMVETH_BUFF_OH 22 /* Overhead: 14 ethernet header + 8 opaque handle */
#define IBMVETH_MAX_MTU 68
#define IBMVETH_MAX_POOL_COUNT 4096
+#define IBMVETH_BUFF_LIST_SIZE 4096
+#define IBMVETH_FILT_LIST_SIZE 4096
#define IBMVETH_MAX_BUF_SIZE (1024 * 128)
static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 };
struct ibmveth_rx_q rx_queue;
int pool_config;
int rx_csum;
+ void *bounce_buffer;
+ dma_addr_t bounce_buffer_dma;
/* adapter specific stats */
u64 replenish_task_cycles;
//#define DEBUG
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
#include <linux/module.h>
#include <linux/virtio.h>
#include <linux/virtio_net.h>
struct tasklet_struct tasklet;
bool free_in_tasklet;
+ /* I like... big packets and I cannot lie! */
+ bool big_packets;
+
/* Receive & send queues. */
struct sk_buff_head recv;
struct sk_buff_head send;
+
+ /* Chain pages by the private ptr. */
+ struct page *pages;
};
static inline struct virtio_net_hdr *skb_vnet_hdr(struct sk_buff *skb)
sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr));
}
+static void give_a_page(struct virtnet_info *vi, struct page *page)
+{
+ page->private = (unsigned long)vi->pages;
+ vi->pages = page;
+}
+
+static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask)
+{
+ struct page *p = vi->pages;
+
+ if (p)
+ vi->pages = (struct page *)p->private;
+ else
+ p = alloc_page(gfp_mask);
+ return p;
+}
+
static void skb_xmit_done(struct virtqueue *svq)
{
struct virtnet_info *vi = svq->vdev->priv;
unsigned len)
{
struct virtio_net_hdr *hdr = skb_vnet_hdr(skb);
+ int err;
if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
pr_debug("%s: short packet %i\n", dev->name, len);
goto drop;
}
len -= sizeof(struct virtio_net_hdr);
- BUG_ON(len > MAX_PACKET_LEN);
- skb_trim(skb, len);
+ if (len <= MAX_PACKET_LEN) {
+ unsigned int i;
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+ give_a_page(dev->priv, skb_shinfo(skb)->frags[i].page);
+ skb->data_len = 0;
+ skb_shinfo(skb)->nr_frags = 0;
+ }
+
+ err = pskb_trim(skb, len);
+ if (err) {
+ pr_debug("%s: pskb_trim failed %i %d\n", dev->name, len, err);
+ dev->stats.rx_dropped++;
+ goto drop;
+ }
+ skb->truesize += skb->data_len;
dev->stats.rx_bytes += skb->len;
dev->stats.rx_packets++;
{
struct sk_buff *skb;
struct scatterlist sg[2+MAX_SKB_FRAGS];
- int num, err;
+ int num, err, i;
sg_init_table(sg, 2+MAX_SKB_FRAGS);
for (;;) {
skb_put(skb, MAX_PACKET_LEN);
vnet_hdr_to_sg(sg, skb);
+
+ if (vi->big_packets) {
+ for (i = 0; i < MAX_SKB_FRAGS; i++) {
+ skb_frag_t *f = &skb_shinfo(skb)->frags[i];
+ f->page = get_a_page(vi, GFP_ATOMIC);
+ if (!f->page)
+ break;
+
+ f->page_offset = 0;
+ f->size = PAGE_SIZE;
+
+ skb->data_len += PAGE_SIZE;
+ skb->len += PAGE_SIZE;
+
+ skb_shinfo(skb)->nr_frags++;
+ }
+ }
+
num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
skb_queue_head(&vi->recv, skb);
free_old_xmit_skbs(vi);
/* If we has a buffer left over from last time, send it now. */
- if (unlikely(vi->last_xmit_skb)) {
- if (xmit_skb(vi, vi->last_xmit_skb) != 0) {
- /* Drop this skb: we only queue one. */
- vi->dev->stats.tx_dropped++;
- kfree_skb(skb);
- skb = NULL;
- goto stop_queue;
- }
- vi->last_xmit_skb = NULL;
- }
+ if (unlikely(vi->last_xmit_skb) &&
+ xmit_skb(vi, vi->last_xmit_skb) != 0)
+ goto stop_queue;
+
+ vi->last_xmit_skb = NULL;
/* Put new one in send queue and do transmit */
if (likely(skb)) {
netif_start_queue(dev);
goto again;
}
+ if (skb) {
+ /* Drop this skb: we only queue one. */
+ vi->dev->stats.tx_dropped++;
+ kfree_skb(skb);
+ }
goto done;
}
return 0;
}
+static int virtnet_set_tx_csum(struct net_device *dev, u32 data)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ struct virtio_device *vdev = vi->vdev;
+
+ if (data && !virtio_has_feature(vdev, VIRTIO_NET_F_CSUM))
+ return -ENOSYS;
+
+ return ethtool_op_set_tx_hw_csum(dev, data);
+}
+
+static struct ethtool_ops virtnet_ethtool_ops = {
+ .set_tx_csum = virtnet_set_tx_csum,
+ .set_sg = ethtool_op_set_sg,
+};
+
static int virtnet_probe(struct virtio_device *vdev)
{
int err;
#ifdef CONFIG_NET_POLL_CONTROLLER
dev->poll_controller = virtnet_netpoll;
#endif
+ SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops);
SET_NETDEV_DEV(dev, &vdev->dev);
/* Do we support "hardware" checksums? */
vi->dev = dev;
vi->vdev = vdev;
vdev->priv = vi;
+ vi->pages = NULL;
/* If they give us a callback when all buffers are done, we don't need
* the timer. */
vi->free_in_tasklet = virtio_has_feature(vdev,VIRTIO_F_NOTIFY_ON_EMPTY);
+ /* If we can receive ANY GSO packets, we must allocate large ones. */
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4)
+ || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)
+ || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN))
+ vi->big_packets = true;
+
/* We expect two virtqueues, receive then send. */
vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done);
if (IS_ERR(vi->rvq)) {
vdev->config->del_vq(vi->svq);
vdev->config->del_vq(vi->rvq);
unregister_netdev(vi->dev);
+
+ while (vi->pages)
+ __free_pages(get_a_page(vi, GFP_KERNEL), 0);
+
free_netdev(vi->dev);
}
VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM,
VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
- VIRTIO_NET_F_HOST_ECN, VIRTIO_F_NOTIFY_ON_EMPTY,
+ VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
+ VIRTIO_NET_F_GUEST_ECN, /* We don't yet handle UFO input. */
+ VIRTIO_F_NOTIFY_ON_EMPTY,
};
static struct virtio_driver virtio_net = {
config OF_GPIO
def_bool y
- depends on OF && PPC_OF && HAVE_GPIO_LIB
+ depends on OF && PPC_OF && GPIOLIB
help
OpenFirmware GPIO accessors
}
info.irq = irq_of_parse_and_map(node, 0);
- if (info.irq == NO_IRQ)
- info.irq = -1;
if (of_find_i2c_driver(node, &info) < 0) {
irq_dispose_mapping(info.irq);
#define parport_ax88796_resume NULL
#endif
+MODULE_ALIAS("platform:ax88796-pp");
+
static struct platform_driver axdrv = {
.driver = {
.name = "ax88796-pp",
#endif /* CONFIG_PM */
+MODULE_ALIAS("platform:ds2760-battery");
+
static struct platform_driver ds2760_battery_driver = {
.driver = {
.name = "ds2760-battery",
#define pda_power_resume NULL
#endif /* CONFIG_PM */
+MODULE_ALIAS("platform:pda-power");
+
static struct platform_driver pda_power_pdrv = {
.driver = {
.name = "pda-power",
#include <linux/err.h>
#include <linux/virtio.h>
#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
#include <linux/interrupt.h>
#include <linux/virtio_ring.h>
#include <linux/pfn.h>
return features;
}
-static void kvm_set_features(struct virtio_device *vdev, u32 features)
+static void kvm_finalize_features(struct virtio_device *vdev)
{
- unsigned int i;
+ unsigned int i, bits;
struct kvm_device_desc *desc = to_kvmdev(vdev)->desc;
/* Second half of bitmap is features we accept. */
u8 *out_features = kvm_vq_features(desc) + desc->feature_len;
+ /* Give virtio_ring a chance to accept features. */
+ vring_transport_features(vdev);
+
memset(out_features, 0, desc->feature_len);
- for (i = 0; i < min(desc->feature_len * 8, 32); i++) {
- if (features & (1 << i))
+ bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
+ for (i = 0; i < bits; i++) {
+ if (test_bit(i, vdev->features))
out_features[i / 8] |= (1 << (i % 8));
}
}
*/
static struct virtio_config_ops kvm_vq_configspace_ops = {
.get_features = kvm_get_features,
- .set_features = kvm_set_features,
+ .finalize_features = kvm_finalize_features,
.get = kvm_get,
.set = kvm_set,
.get_status = kvm_get_status,
return 0;
}
+/* code for early console output with virtio_console */
+static __init int early_put_chars(u32 vtermno, const char *buf, int count)
+{
+ char scratch[17];
+ unsigned int len = count;
+
+ if (len > sizeof(scratch) - 1)
+ len = sizeof(scratch) - 1;
+ scratch[len] = '\0';
+ memcpy(scratch, buf, len);
+ kvm_hypercall1(KVM_S390_VIRTIO_NOTIFY, __pa(scratch));
+ return len;
+}
+
+void s390_virtio_console_init(void)
+{
+ virtio_cons_early_init(early_put_chars);
+}
+
/*
* We do this after core stuff, but before the drivers.
*/
return 0;
}
+/**
+ * ibmvfc_get_desired_dma - Calculate DMA resources needed by the driver
+ * @vdev: vio device struct
+ *
+ * Return value:
+ * Number of bytes the driver will need to DMA map at the same time in
+ * order to perform well.
+ */
+static unsigned long ibmvfc_get_desired_dma(struct vio_dev *vdev)
+{
+ unsigned long pool_dma = max_requests * sizeof(union ibmvfc_iu);
+ return pool_dma + ((512 * 1024) * driver_template.cmd_per_lun);
+}
+
static struct vio_device_id ibmvfc_device_table[] __devinitdata = {
{"fcp", "IBM,vfc-client"},
{ "", "" }
.id_table = ibmvfc_device_table,
.probe = ibmvfc_probe,
.remove = ibmvfc_remove,
+ .get_desired_dma = ibmvfc_get_desired_dma,
.driver = {
.name = IBMVFC_NAME,
.owner = THIS_MODULE,
#include <linux/delay.h>
#include <asm/firmware.h>
#include <asm/vio.h>
+#include <asm/firmware.h>
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_host.h>
SG_ALL * sizeof(struct srp_direct_buf),
&evt_struct->ext_list_token, 0);
if (!evt_struct->ext_list) {
- sdev_printk(KERN_ERR, cmd->device,
- "Can't allocate memory for indirect table\n");
+ if (!firmware_has_feature(FW_FEATURE_CMO))
+ sdev_printk(KERN_ERR, cmd->device,
+ "Can't allocate memory "
+ "for indirect table\n");
return 0;
}
}
srp_cmd->lun = ((u64) lun) << 48;
if (!map_data_for_srp_cmd(cmnd, evt_struct, srp_cmd, hostdata->dev)) {
- sdev_printk(KERN_ERR, cmnd->device, "couldn't convert cmd to srp_cmd\n");
+ if (!firmware_has_feature(FW_FEATURE_CMO))
+ sdev_printk(KERN_ERR, cmnd->device,
+ "couldn't convert cmd to srp_cmd\n");
free_event_struct(&hostdata->pool, evt_struct);
return SCSI_MLQUEUE_HOST_BUSY;
}
DMA_BIDIRECTIONAL);
if (dma_mapping_error(req->buffer)) {
- dev_err(hostdata->dev, "Unable to map request_buffer for adapter_info!\n");
+ if (!firmware_has_feature(FW_FEATURE_CMO))
+ dev_err(hostdata->dev,
+ "Unable to map request_buffer for "
+ "adapter_info!\n");
free_event_struct(&hostdata->pool, evt_struct);
return;
}
DMA_BIDIRECTIONAL);
if (dma_mapping_error(host_config->buffer)) {
- dev_err(hostdata->dev, "dma_mapping error getting host config\n");
+ if (!firmware_has_feature(FW_FEATURE_CMO))
+ dev_err(hostdata->dev,
+ "dma_mapping error getting host config\n");
free_event_struct(&hostdata->pool, evt_struct);
return -1;
}
.eh_host_reset_handler = ibmvscsi_eh_host_reset_handler,
.slave_configure = ibmvscsi_slave_configure,
.change_queue_depth = ibmvscsi_change_queue_depth,
- .cmd_per_lun = 16,
+ .cmd_per_lun = IBMVSCSI_CMDS_PER_LUN_DEFAULT,
.can_queue = IBMVSCSI_MAX_REQUESTS_DEFAULT,
.this_id = -1,
.sg_tablesize = SG_ALL,
.shost_attrs = ibmvscsi_attrs,
};
+/**
+ * ibmvscsi_get_desired_dma - Calculate IO memory desired by the driver
+ *
+ * @vdev: struct vio_dev for the device whose desired IO mem is to be returned
+ *
+ * Return value:
+ * Number of bytes of IO data the driver will need to perform well.
+ */
+static unsigned long ibmvscsi_get_desired_dma(struct vio_dev *vdev)
+{
+ /* iu_storage data allocated in initialize_event_pool */
+ unsigned long desired_io = max_requests * sizeof(union viosrp_iu);
+
+ /* add io space for sg data */
+ desired_io += (IBMVSCSI_MAX_SECTORS_DEFAULT *
+ IBMVSCSI_CMDS_PER_LUN_DEFAULT);
+
+ return desired_io;
+}
+
/**
* Called by bus code for each adapter
*/
hostdata->host = host;
hostdata->dev = dev;
atomic_set(&hostdata->request_limit, -1);
- hostdata->host->max_sectors = 32 * 8; /* default max I/O 32 pages */
+ hostdata->host->max_sectors = IBMVSCSI_MAX_SECTORS_DEFAULT;
rc = ibmvscsi_ops->init_crq_queue(&hostdata->queue, hostdata, max_requests);
if (rc != 0 && rc != H_RESOURCE) {
.id_table = ibmvscsi_device_table,
.probe = ibmvscsi_probe,
.remove = ibmvscsi_remove,
+ .get_desired_dma = ibmvscsi_get_desired_dma,
.driver = {
.name = "ibmvscsi",
.owner = THIS_MODULE,
#define MAX_INDIRECT_BUFS 10
#define IBMVSCSI_MAX_REQUESTS_DEFAULT 100
+#define IBMVSCSI_CMDS_PER_LUN_DEFAULT 16
+#define IBMVSCSI_MAX_SECTORS_DEFAULT 256 /* 32 * 8 = default max I/O 32 pages */
#define IBMVSCSI_MAX_CMDS_PER_LUN 64
/* ------------------------------------------------------------
return retval;
}
-static int ixj_ioctl(struct inode *inode, struct file *file_p, unsigned int cmd, unsigned long arg)
+static long do_ixj_ioctl(struct file *file_p, unsigned int cmd, unsigned long arg)
{
IXJ_TONE ti;
IXJ_FILTER jf;
IXJ_FILTER_RAW jfr;
void __user *argp = (void __user *)arg;
-
- unsigned int raise, mant;
+ struct inode *inode = file_p->f_path.dentry->d_inode;
unsigned int minor = iminor(inode);
+ unsigned int raise, mant;
int board = NUM(inode);
IXJ *j = get_ixj(NUM(inode));
return retval;
}
+static long ixj_ioctl(struct file *file_p, unsigned int cmd, unsigned long arg)
+{
+ long ret;
+ lock_kernel();
+ ret = do_ixj_ioctl(file_p, cmd, arg);
+ unlock_kernel();
+ return ret;
+}
+
static int ixj_fasync(int fd, struct file *file_p, int mode)
{
IXJ *j = get_ixj(NUM(file_p->f_path.dentry->d_inode));
.read = ixj_enhanced_read,
.write = ixj_enhanced_write,
.poll = ixj_poll,
- .ioctl = ixj_ioctl,
+ .unlocked_ioctl = ixj_ioctl,
.release = ixj_release,
.fasync = ixj_fasync
};
#endif
#define ERR(stuff...) pr_err("udc: " stuff)
-#define WARN(stuff...) pr_warning("udc: " stuff)
+#define WARNING(stuff...) pr_warning("udc: " stuff)
#define INFO(stuff...) pr_info("udc: " stuff)
#define DBG(stuff...) pr_debug("udc: " stuff)
* but if the controller isn't recognized at all then
* that assumption is a bit more likely to be wrong.
*/
- WARN(cdev, "controller '%s' not recognized; trying %s\n",
+ WARNING(cdev, "controller '%s' not recognized; trying %s\n",
gadget->name,
cdc_config_driver.label);
device_desc.bcdDevice =
* but if the controller isn't recognized at all then
* that assumption is a bit more likely to be wrong.
*/
- WARN(cdev, "controller '%s' not recognized; trying %s\n",
+ WARNING(cdev, "controller '%s' not recognized; trying %s\n",
gadget->name,
eth_config_driver.label);
device_desc.bcdDevice =
dev_vdbg(&(d)->gadget->dev , fmt , ## args)
#define ERROR(d, fmt, args...) \
dev_err(&(d)->gadget->dev , fmt , ## args)
-#define WARN(d, fmt, args...) \
+#define WARNING(d, fmt, args...) \
dev_warn(&(d)->gadget->dev , fmt , ## args)
#define INFO(d, fmt, args...) \
dev_info(&(d)->gadget->dev , fmt , ## args)
if (rc != 0 && rc != -ESHUTDOWN) {
/* We can't do much more than wait for a reset */
- WARN(fsg, "error in submission: %s --> %d\n",
+ WARNING(fsg, "error in submission: %s --> %d\n",
fsg->ep0->name, rc);
}
return rc;
/* Save the command for later */
if (fsg->cbbuf_cmnd_size)
- WARN(fsg, "CB[I] overwriting previous command\n");
+ WARNING(fsg, "CB[I] overwriting previous command\n");
fsg->cbbuf_cmnd_size = req->actual;
memcpy(fsg->cbbuf_cmnd, req->buf, fsg->cbbuf_cmnd_size);
* submissions if DMA is enabled. */
if (rc != -ESHUTDOWN && !(rc == -EOPNOTSUPP &&
req->length == 0))
- WARN(fsg, "error in submission: %s --> %d\n",
+ WARNING(fsg, "error in submission: %s --> %d\n",
ep->name, rc);
}
}
VDBG(fsg, "delayed bulk-in endpoint halt\n");
while (rc != 0) {
if (rc != -EAGAIN) {
- WARN(fsg, "usb_ep_set_halt -> %d\n", rc);
+ WARNING(fsg, "usb_ep_set_halt -> %d\n", rc);
rc = 0;
break;
}
VDBG(fsg, "delayed bulk-in endpoint wedge\n");
while (rc != 0) {
if (rc != -EAGAIN) {
- WARN(fsg, "usb_ep_set_wedge -> %d\n", rc);
+ WARNING(fsg, "usb_ep_set_wedge -> %d\n", rc);
rc = 0;
break;
}
if (gcnum >= 0)
mod_data.release = 0x0300 + gcnum;
else {
- WARN(fsg, "controller '%s' not recognized\n",
+ WARNING(fsg, "controller '%s' not recognized\n",
fsg->gadget->name);
mod_data.release = 0x0399;
}
/* If the ep is configured */
if (curr_ep->name == NULL) {
- WARN("Invalid EP?");
+ WARNING("Invalid EP?");
continue;
}
#endif
#define ERR(stuff...) pr_err("udc: " stuff)
-#define WARN(stuff...) pr_warning("udc: " stuff)
+#define WARNING(stuff...) pr_warning("udc: " stuff)
#define INFO(stuff...) pr_info("udc: " stuff)
/*-------------------------------------------------------------------------*/
dev_vdbg(&(d)->gadget->dev , fmt , ## args)
#define ERROR(d, fmt, args...) \
dev_err(&(d)->gadget->dev , fmt , ## args)
-#define WARN(d, fmt, args...) \
- dev_warn(&(d)->gadget->dev , fmt , ## args)
#define INFO(d, fmt, args...) \
dev_info(&(d)->gadget->dev , fmt , ## args)
* usb_gadget_driver_{register,unregister}() must change.
*/
if (the_controller) {
- WARN(dev, "ignoring %s\n", pci_name(pdev));
+ WARNING(dev, "ignoring %s\n", pci_name(pdev));
return -EBUSY;
}
if (!pdev->irq) {
#define ERROR(dev,fmt,args...) \
xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev,fmt,args...) \
+#define WARNING(dev,fmt,args...) \
xprintk(dev , KERN_WARNING , fmt , ## args)
#define INFO(dev,fmt,args...) \
xprintk(dev , KERN_INFO , fmt , ## args)
#define ERROR(dev,fmt,args...) \
xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev,fmt,args...) \
- xprintk(dev , KERN_WARNING , fmt , ## args)
#define INFO(dev,fmt,args...) \
xprintk(dev , KERN_INFO , fmt , ## args)
* 0122, and 0124; not all cases trigger the warning.
*/
if ((tmp & (1 << NAK_OUT_PACKETS)) == 0) {
- WARN (ep->dev, "%s lost packet sync!\n",
+ WARNING (ep->dev, "%s lost packet sync!\n",
ep->ep.name);
req->req.status = -EOVERFLOW;
} else if ((tmp = readl (&ep->regs->ep_avail)) != 0) {
#define ERROR(dev,fmt,args...) \
xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev,fmt,args...) \
+#define WARNING(dev,fmt,args...) \
xprintk(dev , KERN_WARNING , fmt , ## args)
#define INFO(dev,fmt,args...) \
xprintk(dev , KERN_INFO , fmt , ## args)
status = -EINVAL;
else if (value) {
if (ep->udc->ep0_set_config) {
- WARN("error changing config?\n");
+ WARNING("error changing config?\n");
omap_writew(UDC_CLR_CFG, UDC_SYSCON2);
}
omap_writew(UDC_STALL_CMD, UDC_SYSCON2);
u.r.bRequestType, u.r.bRequest, status);
if (udc->ep0_set_config) {
if (udc->ep0_reset_config)
- WARN("error resetting config?\n");
+ WARNING("error resetting config?\n");
else
omap_writew(UDC_CLR_CFG, UDC_SYSCON2);
}
* which would prevent entry to deep sleep...
*/
if ((devstat & UDC_ATT) != 0 && (devstat & UDC_SUS) == 0) {
- WARN("session active; suspend requires disconnect\n");
+ WARNING("session active; suspend requires disconnect\n");
omap_pullup(&udc->gadget, 0);
}
#endif
#define ERR(stuff...) pr_err("udc: " stuff)
-#define WARN(stuff...) pr_warning("udc: " stuff)
+#define WARNING(stuff...) pr_warning("udc: " stuff)
#define INFO(stuff...) pr_info("udc: " stuff)
#define DBG(stuff...) pr_debug("udc: " stuff)
#define ERROR(dev, fmt, args...) \
xprintk(dev, KERN_ERR, fmt, ## args)
-#define WARN(dev, fmt, args...) \
+#define WARNING(dev, fmt, args...) \
xprintk(dev, KERN_WARNING, fmt, ## args)
#define INFO(dev, fmt, args...) \
xprintk(dev, KERN_INFO, fmt, ## args)
struct pxa25x_request *req;
req = container_of (_req, struct pxa25x_request, req);
- WARN_ON (!list_empty (&req->queue));
+ WARN_ON(!list_empty (&req->queue));
kfree(req);
}
* tell us about config change events,
* so later ones may fail...
*/
- WARN("config change %02x fail %d?\n",
+ WARNING("config change %02x fail %d?\n",
u.r.bRequest, i);
return;
/* TODO experiment: if has_cfr,
unsigned long flags;
if (!udc->mach->gpio_pullup && !udc->mach->udc_command)
- WARN("USB host won't detect disconnect!\n");
+ WARNING("USB host won't detect disconnect!\n");
udc->suspended = 1;
local_irq_save(flags);
#define DBG(lvl, stuff...) do{if ((lvl) <= UDC_DEBUG) DMSG(stuff);}while(0)
#define ERR(stuff...) pr_err("udc: " stuff)
-#define WARN(stuff...) pr_warning("udc: " stuff)
+#define WARNING(stuff...) pr_warning("udc: " stuff)
#define INFO(stuff...) pr_info("udc: " stuff)
#undef DBG
#undef VDBG
#undef ERROR
-#undef WARN
#undef INFO
#define xprintk(d, level, fmt, args...) \
#define ERROR(dev, fmt, args...) \
xprintk(dev , KERN_ERR , fmt , ## args)
-#define WARN(dev, fmt, args...) \
- xprintk(dev , KERN_WARNING , fmt , ## args)
#define INFO(dev, fmt, args...) \
xprintk(dev , KERN_INFO , fmt , ## args)
for (i = 0; i < 100 && !list_empty(&hep->urb_list); i++)
msleep(3);
if (!list_empty(&hep->urb_list))
- WARN("ep %p not empty?\n", ep);
+ WARNING("ep %p not empty?\n", ep);
kfree(ep);
hep->hcpriv = NULL;
#endif
#define ERR(stuff...) printk(KERN_ERR "116x: " stuff)
-#define WARN(stuff...) printk(KERN_WARNING "116x: " stuff)
+#define WARNING(stuff...) printk(KERN_WARNING "116x: " stuff)
#define INFO(stuff...) printk(KERN_INFO "116x: " stuff)
/* ------------------------------------------------- */
if (!list_empty(&hep->urb_list))
msleep(3);
if (!list_empty(&hep->urb_list))
- WARN("ep %p not empty?\n", ep);
+ WARNING("ep %p not empty?\n", ep);
kfree(ep);
hep->hcpriv = NULL;
#endif
#define ERR(stuff...) printk(KERN_ERR "sl811: " stuff)
-#define WARN(stuff...) printk(KERN_WARNING "sl811: " stuff)
+#define WARNING(stuff...) printk(KERN_WARNING "sl811: " stuff)
#define INFO(stuff...) printk(KERN_INFO "sl811: " stuff)
#define ERROR(tdev, fmt, args...) \
dev_err(&(tdev)->intf->dev , fmt , ## args)
-#define WARN(tdev, fmt, args...) \
+#define WARNING(tdev, fmt, args...) \
dev_warn(&(tdev)->intf->dev , fmt , ## args)
/*-------------------------------------------------------------------------*/
status = get_endpoints (dev, intf);
if (status < 0) {
- WARN(dev, "couldn't get endpoints, %d\n",
+ WARNING(dev, "couldn't get endpoints, %d\n",
status);
return status;
}
dev->id.device, dev->id.vendor);
}
-static struct bus_type virtio_bus = {
- .name = "virtio",
- .match = virtio_dev_match,
- .dev_attrs = virtio_dev_attrs,
- .uevent = virtio_uevent,
-};
-
static void add_status(struct virtio_device *dev, unsigned status)
{
dev->config->set_status(dev, dev->config->get_status(dev) | status);
set_bit(f, dev->features);
}
+ /* Transport features always preserved to pass to finalize_features. */
+ for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++)
+ if (device_features & (1 << i))
+ set_bit(i, dev->features);
+
err = drv->probe(dev);
if (err)
add_status(dev, VIRTIO_CONFIG_S_FAILED);
else {
- /* They should never have set feature bits beyond 32 */
- dev->config->set_features(dev, dev->features[0]);
+ dev->config->finalize_features(dev);
add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
}
return err;
return 0;
}
+static struct bus_type virtio_bus = {
+ .name = "virtio",
+ .match = virtio_dev_match,
+ .dev_attrs = virtio_dev_attrs,
+ .uevent = virtio_uevent,
+ .probe = virtio_dev_probe,
+ .remove = virtio_dev_remove,
+};
+
int register_virtio_driver(struct virtio_driver *driver)
{
/* Catch this early. */
BUG_ON(driver->feature_table_size && !driver->feature_table);
driver->driver.bus = &virtio_bus;
- driver->driver.probe = virtio_dev_probe;
- driver->driver.remove = virtio_dev_remove;
return driver_register(&driver->driver);
}
EXPORT_SYMBOL_GPL(register_virtio_driver);
return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
}
-/* virtio config->set_features() implementation */
-static void vp_set_features(struct virtio_device *vdev, u32 features)
+/* virtio config->finalize_features() implementation */
+static void vp_finalize_features(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
- iowrite32(features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
+ /* Give virtio_ring a chance to accept features. */
+ vring_transport_features(vdev);
+
+ /* We only support 32 feature bits. */
+ BUILD_BUG_ON(ARRAY_SIZE(vdev->features) != 1);
+ iowrite32(vdev->features[0], vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES);
}
/* virtio config->get() implementation */
.find_vq = vp_find_vq,
.del_vq = vp_del_vq,
.get_features = vp_get_features,
- .set_features = vp_set_features,
+ .finalize_features = vp_finalize_features,
};
/* the PCI probing function */
*/
#include <linux/virtio.h>
#include <linux/virtio_ring.h>
+#include <linux/virtio_config.h>
#include <linux/device.h>
#ifdef DEBUG
if (vq->num_free < out + in) {
pr_debug("Can't add buf len %i - avail = %i\n",
out + in, vq->num_free);
- /* We notify *even if* VRING_USED_F_NO_NOTIFY is set here. */
- vq->notify(&vq->vq);
+ /* FIXME: for historical reasons, we force a notify here if
+ * there are outgoing parts to the buffer. Presumably the
+ * host should service the ring ASAP. */
+ if (out)
+ vq->notify(&vq->vq);
END_USE(vq);
return -ENOSPC;
}
}
EXPORT_SYMBOL_GPL(vring_del_virtqueue);
+/* Manipulates transport-specific feature bits. */
+void vring_transport_features(struct virtio_device *vdev)
+{
+ unsigned int i;
+
+ for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
+ switch (i) {
+ default:
+ /* We don't understand this bit. */
+ clear_bit(i, vdev->features);
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(vring_transport_features);
+
MODULE_LICENSE("GPL");
menu "Pseudo filesystems"
-config PROC_FS
- bool "/proc file system support" if EMBEDDED
- default y
- help
- This is a virtual file system providing information about the status
- of the system. "Virtual" means that it doesn't take up any space on
- your hard disk: the files are created on the fly by the kernel when
- you try to access them. Also, you cannot read the files with older
- version of the program less: you need to use more or cat.
-
- It's totally cool; for example, "cat /proc/interrupts" gives
- information about what the different IRQs are used for at the moment
- (there is a small number of Interrupt ReQuest lines in your computer
- that are used by the attached devices to gain the CPU's attention --
- often a source of trouble if two devices are mistakenly configured
- to use the same IRQ). The program procinfo to display some
- information about your system gathered from the /proc file system.
-
- Before you can use the /proc file system, it has to be mounted,
- meaning it has to be given a location in the directory hierarchy.
- That location should be /proc. A command such as "mount -t proc proc
- /proc" or the equivalent line in /etc/fstab does the job.
-
- The /proc file system is explained in the file
- <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
- ("man 5 proc").
-
- This option will enlarge your kernel by about 67 KB. Several
- programs depend on this, so everyone should say Y here.
-
-config PROC_KCORE
- bool "/proc/kcore support" if !ARM
- depends on PROC_FS && MMU
-
-config PROC_VMCORE
- bool "/proc/vmcore support (EXPERIMENTAL)"
- depends on PROC_FS && CRASH_DUMP
- default y
- help
- Exports the dump image of crashed kernel in ELF format.
-
-config PROC_SYSCTL
- bool "Sysctl support (/proc/sys)" if EMBEDDED
- depends on PROC_FS
- select SYSCTL
- default y
- ---help---
- The sysctl interface provides a means of dynamically changing
- certain kernel parameters and variables on the fly without requiring
- a recompile of the kernel or reboot of the system. The primary
- interface is through /proc/sys. If you say Y here a tree of
- modifiable sysctl entries will be generated beneath the
- /proc/sys directory. They are explained in the files
- in <file:Documentation/sysctl/>. Note that enabling this
- option will enlarge the kernel by at least 8 KB.
-
- As it is generally a good thing, you should say Y here unless
- building a kernel for install/rescue disks or your system is very
- limited in memory.
+source "fs/proc/Kconfig"
config SYSFS
bool "sysfs file system support" if EMBEDDED
To compile the coda client support as a module, choose M here: the
module will be called coda.
-config CODA_FS_OLD_API
- bool "Use 96-bit Coda file identifiers"
- depends on CODA_FS
- help
- A new kernel-userspace API had to be introduced for Coda v6.0
- to support larger 128-bit file identifiers as needed by the
- new realms implementation.
-
- However this new API is not backward compatible with older
- clients. If you really need to run the old Coda userspace
- cache manager then say Y.
-
- For most cases you probably want to say N.
-
config AFS_FS
tristate "Andrew File System support (AFS) (EXPERIMENTAL)"
depends on INET && EXPERIMENTAL
struct task_struct *tsk = current;
task_lock(tsk);
- tsk->flags |= PF_BORROWED_MM;
active_mm = tsk->active_mm;
atomic_inc(&mm->mm_count);
tsk->mm = mm;
struct task_struct *tsk = current;
task_lock(tsk);
- tsk->flags &= ~PF_BORROWED_MM;
tsk->mm = NULL;
/* active_mm is still 'mm' */
enter_lazy_tlb(mm, tsk);
#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
#endif
+#ifndef ELF_BASE_PLATFORM
+/*
+ * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
+ * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
+ * will be copied to the user stack in the same manner as AT_PLATFORM.
+ */
+#define ELF_BASE_PLATFORM NULL
+#endif
+
static int
create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
unsigned long load_addr, unsigned long interp_load_addr)
elf_addr_t __user *envp;
elf_addr_t __user *sp;
elf_addr_t __user *u_platform;
+ elf_addr_t __user *u_base_platform;
const char *k_platform = ELF_PLATFORM;
+ const char *k_base_platform = ELF_BASE_PLATFORM;
int items;
elf_addr_t *elf_info;
int ei_index = 0;
return -EFAULT;
}
+ /*
+ * If this architecture has a "base" platform capability
+ * string, copy it to userspace.
+ */
+ u_base_platform = NULL;
+ if (k_base_platform) {
+ size_t len = strlen(k_base_platform) + 1;
+
+ u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
+ if (__copy_to_user(u_base_platform, k_base_platform, len))
+ return -EFAULT;
+ }
+
/* Create the ELF interpreter info */
elf_info = (elf_addr_t *)current->mm->saved_auxv;
/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
NEW_AUX_ENT(AT_PLATFORM,
(elf_addr_t)(unsigned long)u_platform);
}
+ if (k_base_platform) {
+ NEW_AUX_ENT(AT_BASE_PLATFORM,
+ (elf_addr_t)(unsigned long)u_base_platform);
+ }
if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
}
const struct user_regset_view *view = task_user_regset_view(dump_task);
struct elf_thread_core_info *t;
struct elf_prpsinfo *psinfo;
- struct task_struct *g, *p;
+ struct core_thread *ct;
unsigned int i;
info->size = 0;
/*
* Allocate a structure for each thread.
*/
- rcu_read_lock();
- do_each_thread(g, p)
- if (p->mm == dump_task->mm) {
- t = kzalloc(offsetof(struct elf_thread_core_info,
- notes[info->thread_notes]),
- GFP_ATOMIC);
- if (unlikely(!t)) {
- rcu_read_unlock();
- return 0;
- }
- t->task = p;
- if (p == dump_task || !info->thread) {
- t->next = info->thread;
- info->thread = t;
- } else {
- /*
- * Make sure to keep the original task at
- * the head of the list.
- */
- t->next = info->thread->next;
- info->thread->next = t;
- }
+ for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
+ t = kzalloc(offsetof(struct elf_thread_core_info,
+ notes[info->thread_notes]),
+ GFP_KERNEL);
+ if (unlikely(!t))
+ return 0;
+
+ t->task = ct->task;
+ if (ct->task == dump_task || !info->thread) {
+ t->next = info->thread;
+ info->thread = t;
+ } else {
+ /*
+ * Make sure to keep the original task at
+ * the head of the list.
+ */
+ t->next = info->thread->next;
+ info->thread->next = t;
}
- while_each_thread(g, p);
- rcu_read_unlock();
+ }
/*
* Now fill in each thread's information.
{
#define NUM_NOTES 6
struct list_head *t;
- struct task_struct *g, *p;
info->notes = NULL;
info->prstatus = NULL;
info->thread_status_size = 0;
if (signr) {
+ struct core_thread *ct;
struct elf_thread_status *ets;
- rcu_read_lock();
- do_each_thread(g, p)
- if (current->mm == p->mm && current != p) {
- ets = kzalloc(sizeof(*ets), GFP_ATOMIC);
- if (!ets) {
- rcu_read_unlock();
- return 0;
- }
- ets->thread = p;
- list_add(&ets->list, &info->thread_list);
- }
- while_each_thread(g, p);
- rcu_read_unlock();
+
+ for (ct = current->mm->core_state->dumper.next;
+ ct; ct = ct->next) {
+ ets = kzalloc(sizeof(*ets), GFP_KERNEL);
+ if (!ets)
+ return 0;
+
+ ets->thread = ct->task;
+ list_add(&ets->list, &info->thread_list);
+ }
+
list_for_each(t, &info->thread_list) {
int sz;
struct memelfnote *notes = NULL;
struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */
- struct task_struct *g, *p;
LIST_HEAD(thread_list);
struct list_head *t;
elf_fpregset_t *fpu = NULL;
#endif
if (signr) {
+ struct core_thread *ct;
struct elf_thread_status *tmp;
- rcu_read_lock();
- do_each_thread(g,p)
- if (current->mm == p->mm && current != p) {
- tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
- if (!tmp) {
- rcu_read_unlock();
- goto cleanup;
- }
- tmp->thread = p;
- list_add(&tmp->list, &thread_list);
- }
- while_each_thread(g,p);
- rcu_read_unlock();
+
+ for (ct = current->mm->core_state->dumper.next;
+ ct; ct = ct->next) {
+ tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ goto cleanup;
+
+ tmp->thread = ct->task;
+ list_add(&tmp->list, &thread_list);
+ }
+
list_for_each(t, &thread_list) {
struct elf_thread_status *tmp;
int sz;
char * coda_f2s(struct CodaFid *f)
{
static char s[60];
-#ifdef CONFIG_CODA_FS_OLD_API
- sprintf(s, "(%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2]);
-#else
+
sprintf(s, "(%08x.%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2], f->opaque[3]);
-#endif
+
return s;
}
MODULE_DESCRIPTION("Coda Distributed File System VFS interface");
MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR);
MODULE_LICENSE("GPL");
-#ifdef CONFIG_CODA_FS_OLD_API
-MODULE_VERSION("5.3.21");
-#else
MODULE_VERSION("6.6");
-#endif
static int __init init_coda(void)
{
inp->ih.opcode = opcode;
inp->ih.pid = current->pid;
inp->ih.pgid = task_pgrp_nr(current);
-#ifdef CONFIG_CODA_FS_OLD_API
- memset(&inp->ih.cred, 0, sizeof(struct coda_cred));
- inp->ih.cred.cr_fsuid = current->fsuid;
-#else
inp->ih.uid = current->fsuid;
-#endif
+
return (void*)inp;
}
union inputArgs *inp;
union outputArgs *outp;
int insize, outsize, error;
-#ifdef CONFIG_CODA_FS_OLD_API
- struct coda_cred cred = { 0, };
- cred.cr_fsuid = uid;
-#endif
insize = SIZE(release);
UPARG(CODA_CLOSE);
-#ifdef CONFIG_CODA_FS_OLD_API
- memcpy(&(inp->ih.cred), &cred, sizeof(cred));
-#else
inp->ih.uid = uid;
-#endif
-
inp->coda_close.VFid = *fid;
inp->coda_close.flags = flags;
#include <linux/slab.h>
#include <linux/raid/md.h>
#include <linux/kd.h>
-#include <linux/dirent.h>
#include <linux/route.h>
#include <linux/in6.h>
#include <linux/ipv6_route.h>
if (xop->callback == NULL)
wait_event(recv_wq, (op->done != 0));
else {
- rv = -EINPROGRESS;
+ rv = FILE_LOCK_DEFERRED;
goto out;
}
*/
static void dqput(struct dquot *dquot)
{
+ int ret;
+
if (!dquot)
return;
#ifdef __DQUOT_PARANOIA
if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) {
spin_unlock(&dq_list_lock);
/* Commit dquot before releasing */
- dquot->dq_sb->dq_op->write_dquot(dquot);
+ ret = dquot->dq_sb->dq_op->write_dquot(dquot);
+ if (ret < 0) {
+ printk(KERN_ERR "VFS: cannot write quota structure on "
+ "device %s (error %d). Quota may get out of "
+ "sync!\n", dquot->dq_sb->s_id, ret);
+ /*
+ * We clear dirty bit anyway, so that we avoid
+ * infinite loop here
+ */
+ spin_lock(&dq_list_lock);
+ clear_dquot_dirty(dquot);
+ spin_unlock(&dq_list_lock);
+ }
goto we_slept;
}
/* Clear flag in case dquot was inactive (something bad happened) */
char *msg = NULL;
struct tty_struct *tty;
- if (!need_print_warning(dquot))
+ if (warntype == QUOTA_NL_IHARDBELOW ||
+ warntype == QUOTA_NL_ISOFTBELOW ||
+ warntype == QUOTA_NL_BHARDBELOW ||
+ warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot))
return;
mutex_lock(&tty_mutex);
return QUOTA_OK;
}
+static int info_idq_free(struct dquot *dquot, ulong inodes)
+{
+ if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
+ dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit)
+ return QUOTA_NL_NOWARN;
+
+ if (dquot->dq_dqb.dqb_curinodes - inodes <= dquot->dq_dqb.dqb_isoftlimit)
+ return QUOTA_NL_ISOFTBELOW;
+ if (dquot->dq_dqb.dqb_curinodes >= dquot->dq_dqb.dqb_ihardlimit &&
+ dquot->dq_dqb.dqb_curinodes - inodes < dquot->dq_dqb.dqb_ihardlimit)
+ return QUOTA_NL_IHARDBELOW;
+ return QUOTA_NL_NOWARN;
+}
+
+static int info_bdq_free(struct dquot *dquot, qsize_t space)
+{
+ if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
+ toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit)
+ return QUOTA_NL_NOWARN;
+
+ if (toqb(dquot->dq_dqb.dqb_curspace - space) <=
+ dquot->dq_dqb.dqb_bsoftlimit)
+ return QUOTA_NL_BSOFTBELOW;
+ if (toqb(dquot->dq_dqb.dqb_curspace) >= dquot->dq_dqb.dqb_bhardlimit &&
+ toqb(dquot->dq_dqb.dqb_curspace - space) <
+ dquot->dq_dqb.dqb_bhardlimit)
+ return QUOTA_NL_BHARDBELOW;
+ return QUOTA_NL_NOWARN;
+}
/*
* Initialize quota pointers in inode
* Transaction must be started at entry
return 0;
}
+/* Wrapper to remove references to quota structures from inode */
+void vfs_dq_drop(struct inode *inode)
+{
+ /* Here we can get arbitrary inode from clear_inode() so we have
+ * to be careful. OTOH we don't need locking as quota operations
+ * are allowed to change only at mount time */
+ if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
+ && inode->i_sb->dq_op->drop) {
+ int cnt;
+ /* Test before calling to rule out calls from proc and such
+ * where we are not allowed to block. Note that this is
+ * actually reliable test even without the lock - the caller
+ * must assure that nobody can come after the DQUOT_DROP and
+ * add quota pointers back anyway */
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ if (inode->i_dquot[cnt] != NODQUOT)
+ break;
+ if (cnt < MAXQUOTAS)
+ inode->i_sb->dq_op->drop(inode);
+ }
+}
+
/*
* Following four functions update i_blocks+i_bytes fields and
* quota information (together with appropriate checks)
int dquot_free_space(struct inode *inode, qsize_t number)
{
unsigned int cnt;
+ char warntype[MAXQUOTAS];
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
inode_sub_bytes(inode, number);
return QUOTA_OK;
}
+
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
/* Now recheck reliably when holding dqptr_sem */
if (IS_NOQUOTA(inode)) {
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (inode->i_dquot[cnt] == NODQUOT)
continue;
+ warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
dquot_decr_space(inode->i_dquot[cnt], number);
}
inode_sub_bytes(inode, number);
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
if (inode->i_dquot[cnt])
mark_dquot_dirty(inode->i_dquot[cnt]);
+ flush_warnings(inode->i_dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return QUOTA_OK;
}
int dquot_free_inode(const struct inode *inode, unsigned long number)
{
unsigned int cnt;
+ char warntype[MAXQUOTAS];
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
if (IS_NOQUOTA(inode))
return QUOTA_OK;
+
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
/* Now recheck reliably when holding dqptr_sem */
if (IS_NOQUOTA(inode)) {
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (inode->i_dquot[cnt] == NODQUOT)
continue;
+ warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number);
dquot_decr_inodes(inode->i_dquot[cnt], number);
}
spin_unlock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
if (inode->i_dquot[cnt])
mark_dquot_dirty(inode->i_dquot[cnt]);
+ flush_warnings(inode->i_dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return QUOTA_OK;
}
struct dquot *transfer_to[MAXQUOTAS];
int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid,
chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid;
- char warntype[MAXQUOTAS];
+ char warntype_to[MAXQUOTAS];
+ char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
/* Clear the arrays */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
transfer_to[cnt] = transfer_from[cnt] = NODQUOT;
- warntype[cnt] = QUOTA_NL_NOWARN;
+ warntype_to[cnt] = QUOTA_NL_NOWARN;
}
down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
/* Now recheck reliably when holding dqptr_sem */
if (transfer_to[cnt] == NODQUOT)
continue;
transfer_from[cnt] = inode->i_dquot[cnt];
- if (check_idq(transfer_to[cnt], 1, warntype+cnt) == NO_QUOTA ||
- check_bdq(transfer_to[cnt], space, 0, warntype+cnt) == NO_QUOTA)
+ if (check_idq(transfer_to[cnt], 1, warntype_to + cnt) ==
+ NO_QUOTA || check_bdq(transfer_to[cnt], space, 0,
+ warntype_to + cnt) == NO_QUOTA)
goto warn_put_all;
}
/* Due to IO error we might not have transfer_from[] structure */
if (transfer_from[cnt]) {
+ warntype_from_inodes[cnt] =
+ info_idq_free(transfer_from[cnt], 1);
+ warntype_from_space[cnt] =
+ info_bdq_free(transfer_from[cnt], space);
dquot_decr_inodes(transfer_from[cnt], 1);
dquot_decr_space(transfer_from[cnt], space);
}
if (transfer_to[cnt])
mark_dquot_dirty(transfer_to[cnt]);
}
- flush_warnings(transfer_to, warntype);
+ flush_warnings(transfer_to, warntype_to);
+ flush_warnings(transfer_from, warntype_from_inodes);
+ flush_warnings(transfer_from, warntype_from_space);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (ret == QUOTA_OK && transfer_from[cnt] != NODQUOT)
return ret;
}
+/* Wrapper for transferring ownership of an inode */
+int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
+{
+ if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
+ vfs_dq_init(inode);
+ if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
+ return 1;
+ }
+ return 0;
+}
+
+
/*
* Write info of quota file to disk
*/
return error;
}
+/* Wrapper to turn on quotas when remounting rw */
+int vfs_dq_quota_on_remount(struct super_block *sb)
+{
+ int cnt;
+ int ret = 0, err;
+
+ if (!sb->s_qcop || !sb->s_qcop->quota_on)
+ return -ENOSYS;
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1);
+ if (err < 0 && !ret)
+ ret = err;
+ }
+ return ret;
+}
+
/* Generic routine for getting common part of quota structure */
static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di)
{
EXPORT_SYMBOL(dquot_mark_dquot_dirty);
EXPORT_SYMBOL(dquot_initialize);
EXPORT_SYMBOL(dquot_drop);
+EXPORT_SYMBOL(vfs_dq_drop);
EXPORT_SYMBOL(dquot_alloc_space);
EXPORT_SYMBOL(dquot_alloc_inode);
EXPORT_SYMBOL(dquot_free_space);
EXPORT_SYMBOL(dquot_free_inode);
EXPORT_SYMBOL(dquot_transfer);
+EXPORT_SYMBOL(vfs_dq_transfer);
+EXPORT_SYMBOL(vfs_dq_quota_on_remount);
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/fdtable.h>
-#include <linux/mman.h>
+#include <linux/mm.h>
#include <linux/stat.h>
#include <linux/fcntl.h>
#include <linux/smp_lock.h>
+#include <linux/swap.h>
#include <linux/string.h>
#include <linux/init.h>
-#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/spinlock.h>
#include <linux/key.h>
#include <linux/personality.h>
#include <linux/binfmts.h>
-#include <linux/swap.h>
#include <linux/utsname.h>
#include <linux/pid_namespace.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/syscalls.h>
-#include <linux/rmap.h>
#include <linux/tsacct_kern.h>
#include <linux/cn_proc.h>
#include <linux/audit.h>
* Make sure that if there is a core dump in progress
* for the old mm, we get out and die instead of going
* through with the exec. We must hold mmap_sem around
- * checking core_waiters and changing tsk->mm. The
- * core-inducing thread will increment core_waiters for
- * each thread whose ->mm == old_mm.
+ * checking core_state and changing tsk->mm.
*/
down_read(&old_mm->mmap_sem);
- if (unlikely(old_mm->core_waiters)) {
+ if (unlikely(old_mm->core_state)) {
up_read(&old_mm->mmap_sem);
return -EINTR;
}
if (retval < 0)
goto out;
+ current->flags &= ~PF_KTHREAD;
retval = search_binary_handler(bprm,regs);
if (retval >= 0) {
/* execve success */
* name into corename, which must have space for at least
* CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
*/
-static int format_corename(char *corename, const char *pattern, long signr)
+static int format_corename(char *corename, int nr_threads, long signr)
{
- const char *pat_ptr = pattern;
+ const char *pat_ptr = core_pattern;
+ int ispipe = (*pat_ptr == '|');
char *out_ptr = corename;
char *const out_end = corename + CORENAME_MAX_SIZE;
int rc;
int pid_in_pattern = 0;
- int ispipe = 0;
-
- if (*pattern == '|')
- ispipe = 1;
/* Repeat as long as we have more pattern to process and more output
space */
* and core_uses_pid is set, then .%pid will be appended to
* the filename. Do not do this for piped commands. */
if (!ispipe && !pid_in_pattern
- && (core_uses_pid || atomic_read(¤t->mm->mm_users) != 1)) {
+ && (core_uses_pid || nr_threads)) {
rc = snprintf(out_ptr, out_end - out_ptr,
".%d", task_tgid_vnr(current));
if (rc > out_end - out_ptr)
return ispipe;
}
-static void zap_process(struct task_struct *start)
+static int zap_process(struct task_struct *start)
{
struct task_struct *t;
+ int nr = 0;
start->signal->flags = SIGNAL_GROUP_EXIT;
start->signal->group_stop_count = 0;
t = start;
do {
if (t != current && t->mm) {
- t->mm->core_waiters++;
sigaddset(&t->pending.signal, SIGKILL);
signal_wake_up(t, 1);
+ nr++;
}
- } while ((t = next_thread(t)) != start);
+ } while_each_thread(start, t);
+
+ return nr;
}
static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
- int exit_code)
+ struct core_state *core_state, int exit_code)
{
struct task_struct *g, *p;
unsigned long flags;
- int err = -EAGAIN;
+ int nr = -EAGAIN;
spin_lock_irq(&tsk->sighand->siglock);
if (!signal_group_exit(tsk->signal)) {
+ mm->core_state = core_state;
tsk->signal->group_exit_code = exit_code;
- zap_process(tsk);
- err = 0;
+ nr = zap_process(tsk);
}
spin_unlock_irq(&tsk->sighand->siglock);
- if (err)
- return err;
+ if (unlikely(nr < 0))
+ return nr;
- if (atomic_read(&mm->mm_users) == mm->core_waiters + 1)
+ if (atomic_read(&mm->mm_users) == nr + 1)
goto done;
-
+ /*
+ * We should find and kill all tasks which use this mm, and we should
+ * count them correctly into ->nr_threads. We don't take tasklist
+ * lock, but this is safe wrt:
+ *
+ * fork:
+ * None of sub-threads can fork after zap_process(leader). All
+ * processes which were created before this point should be
+ * visible to zap_threads() because copy_process() adds the new
+ * process to the tail of init_task.tasks list, and lock/unlock
+ * of ->siglock provides a memory barrier.
+ *
+ * do_exit:
+ * The caller holds mm->mmap_sem. This means that the task which
+ * uses this mm can't pass exit_mm(), so it can't exit or clear
+ * its ->mm.
+ *
+ * de_thread:
+ * It does list_replace_rcu(&leader->tasks, ¤t->tasks),
+ * we must see either old or new leader, this does not matter.
+ * However, it can change p->sighand, so lock_task_sighand(p)
+ * must be used. Since p->mm != NULL and we hold ->mmap_sem
+ * it can't fail.
+ *
+ * Note also that "g" can be the old leader with ->mm == NULL
+ * and already unhashed and thus removed from ->thread_group.
+ * This is OK, __unhash_process()->list_del_rcu() does not
+ * clear the ->next pointer, we will find the new leader via
+ * next_thread().
+ */
rcu_read_lock();
for_each_process(g) {
if (g == tsk->group_leader)
continue;
-
+ if (g->flags & PF_KTHREAD)
+ continue;
p = g;
do {
if (p->mm) {
- if (p->mm == mm) {
- /*
- * p->sighand can't disappear, but
- * may be changed by de_thread()
- */
+ if (unlikely(p->mm == mm)) {
lock_task_sighand(p, &flags);
- zap_process(p);
+ nr += zap_process(p);
unlock_task_sighand(p, &flags);
}
break;
}
- } while ((p = next_thread(p)) != g);
+ } while_each_thread(g, p);
}
rcu_read_unlock();
done:
- return mm->core_waiters;
+ atomic_set(&core_state->nr_threads, nr);
+ return nr;
}
-static int coredump_wait(int exit_code)
+static int coredump_wait(int exit_code, struct core_state *core_state)
{
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
- struct completion startup_done;
struct completion *vfork_done;
int core_waiters;
- init_completion(&mm->core_done);
- init_completion(&startup_done);
- mm->core_startup_done = &startup_done;
-
- core_waiters = zap_threads(tsk, mm, exit_code);
+ init_completion(&core_state->startup);
+ core_state->dumper.task = tsk;
+ core_state->dumper.next = NULL;
+ core_waiters = zap_threads(tsk, mm, core_state, exit_code);
up_write(&mm->mmap_sem);
if (unlikely(core_waiters < 0))
}
if (core_waiters)
- wait_for_completion(&startup_done);
+ wait_for_completion(&core_state->startup);
fail:
- BUG_ON(mm->core_waiters);
return core_waiters;
}
+static void coredump_finish(struct mm_struct *mm)
+{
+ struct core_thread *curr, *next;
+ struct task_struct *task;
+
+ next = mm->core_state->dumper.next;
+ while ((curr = next) != NULL) {
+ next = curr->next;
+ task = curr->task;
+ /*
+ * see exit_mm(), curr->task must not see
+ * ->task == NULL before we read ->next.
+ */
+ smp_mb();
+ curr->task = NULL;
+ wake_up_process(task);
+ }
+
+ mm->core_state = NULL;
+}
+
/*
* set_dumpable converts traditional three-value dumpable to two flags and
* stores them into mm->flags. It modifies lower two bits of mm->flags, but
int do_coredump(long signr, int exit_code, struct pt_regs * regs)
{
+ struct core_state core_state;
char corename[CORENAME_MAX_SIZE + 1];
struct mm_struct *mm = current->mm;
struct linux_binfmt * binfmt;
/*
* If another thread got here first, or we are not dumpable, bail out.
*/
- if (mm->core_waiters || !get_dumpable(mm)) {
+ if (mm->core_state || !get_dumpable(mm)) {
up_write(&mm->mmap_sem);
goto fail;
}
current->fsuid = 0; /* Dump root private */
}
- retval = coredump_wait(exit_code);
+ retval = coredump_wait(exit_code, &core_state);
if (retval < 0)
goto fail;
* uses lock_kernel()
*/
lock_kernel();
- ispipe = format_corename(corename, core_pattern, signr);
+ ispipe = format_corename(corename, retval, signr);
unlock_kernel();
/*
* Don't bother to check the RLIMIT_CORE value if core_pattern points
argv_free(helper_argv);
current->fsuid = fsuid;
- complete_all(&mm->core_done);
+ coredump_finish(mm);
fail:
return retval;
}
#include <linux/seq_file.h>
#include <linux/mount.h>
#include <linux/log2.h>
+#include <linux/quotaops.h>
#include <asm/uaccess.h>
#include "ext2.h"
#include "xattr.h"
ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
const char *name, size_t name_len)
{
- const int prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
+ const int prefix_len = XATTR_SECURITY_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
if (list && total_len <= list_size) {
#include <linux/ext2_fs.h>
#include "xattr.h"
-#define XATTR_TRUSTED_PREFIX "trusted."
-
static size_t
ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
const char *name, size_t name_len)
{
- const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1;
+ const int prefix_len = XATTR_TRUSTED_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
if (!capable(CAP_SYS_ADMIN))
#include "ext2.h"
#include "xattr.h"
-#define XATTR_USER_PREFIX "user."
-
static size_t
ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
const char *name, size_t name_len)
{
- const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1;
+ const size_t prefix_len = XATTR_USER_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
if (!test_opt(inode->i_sb, XATTR_USER))
while (n) {
/* Do the node's children first */
- if ((n)->rb_left) {
+ if (n->rb_left) {
n = n->rb_left;
continue;
}
parent->rb_right = NULL;
n = parent;
}
- root->rb_node = NULL;
}
-static struct dir_private_info *create_dir_info(loff_t pos)
+static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos)
{
struct dir_private_info *p;
- p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL);
+ p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
if (!p)
return NULL;
- p->root.rb_node = NULL;
- p->curr_node = NULL;
- p->extra_fname = NULL;
- p->last_pos = 0;
p->curr_hash = pos2maj_hash(pos);
p->curr_minor_hash = pos2min_hash(pos);
- p->next_hash = 0;
return p;
}
int ret;
if (!info) {
- info = create_dir_info(filp->f_pos);
+ info = ext3_htree_create_dir_info(filp->f_pos);
if (!info)
return -ENOMEM;
filp->private_data = info;
if (IS_ERR(inode))
goto iget_failed;
+ /*
+ * If the orphans has i_nlinks > 0 then it should be able to be
+ * truncated, otherwise it won't be removed from the orphan list
+ * during processing and an infinite loop will result.
+ */
+ if (inode->i_nlink && !ext3_can_truncate(inode))
+ goto bad_orphan;
+
if (NEXT_ORPHAN(inode) > max_ino)
goto bad_orphan;
brelse(bitmap_bh);
printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
NEXT_ORPHAN(inode));
printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
+ printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
/* Avoid freeing blocks if we got a bad deleted inode */
if (inode->i_nlink == 0)
inode->i_blocks = 0;
if (this_bh) {
BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
- ext3_journal_dirty_metadata(handle, this_bh);
+
+ /*
+ * The buffer head should have an attached journal head at this
+ * point. However, if the data is corrupted and an indirect
+ * block pointed to itself, it would have been detached when
+ * the block was cleared. Check for this instead of OOPSing.
+ */
+ if (bh2jh(this_bh))
+ ext3_journal_dirty_metadata(handle, this_bh);
+ else
+ ext3_error(inode->i_sb, "ext3_free_data",
+ "circular indirect block detected, "
+ "inode=%lu, block=%llu",
+ inode->i_ino,
+ (unsigned long long)this_bh->b_blocknr);
}
}
}
}
+int ext3_can_truncate(struct inode *inode)
+{
+ if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+ return 0;
+ if (S_ISREG(inode->i_mode))
+ return 1;
+ if (S_ISDIR(inode->i_mode))
+ return 1;
+ if (S_ISLNK(inode->i_mode))
+ return !ext3_inode_is_fast_symlink(inode);
+ return 0;
+}
+
/*
* ext3_truncate()
*
unsigned blocksize = inode->i_sb->s_blocksize;
struct page *page;
- if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
- S_ISLNK(inode->i_mode)))
- return;
- if (ext3_inode_is_fast_symlink(inode))
- return;
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+ if (!ext3_can_truncate(inode))
return;
/*
}
if (!buffer_uptodate(bh)) {
lock_buffer(bh);
+
+ /*
+ * If the buffer has the write error flag, we have failed
+ * to write out another inode in the same block. In this
+ * case, we don't have to read the block because we may
+ * read the old inode data successfully.
+ */
+ if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
+ set_buffer_uptodate(bh);
+
if (buffer_uptodate(bh)) {
/* someone brought it uptodate while we waited */
unlock_buffer(bh);
{
unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
EXT3_DIR_REC_LEN(2) - infosize;
- return 0? 20: entry_space / sizeof(struct dx_entry);
+ return entry_space / sizeof(struct dx_entry);
}
static inline unsigned dx_node_limit (struct inode *dir)
{
unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
- return 0? 22: entry_space / sizeof(struct dx_entry);
+ return entry_space / sizeof(struct dx_entry);
}
/*
de = (struct ext3_dir_entry_2 *) bh->b_data;
top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
EXT3_DIR_REC_LEN(0));
- for (; de < top; de = ext3_next_entry(de))
- if (ext3_match (namelen, name, de)) {
- if (!ext3_check_dir_entry("ext3_find_entry",
- dir, de, bh,
- (block<<EXT3_BLOCK_SIZE_BITS(sb))
- +((char *)de - bh->b_data))) {
- brelse (bh);
+ for (; de < top; de = ext3_next_entry(de)) {
+ int off = (block << EXT3_BLOCK_SIZE_BITS(sb))
+ + ((char *) de - bh->b_data);
+
+ if (!ext3_check_dir_entry(__func__, dir, de, bh, off)) {
+ brelse(bh);
*err = ERR_BAD_DX_DIR;
goto errout;
}
- *res_dir = de;
- dx_release (frames);
- return bh;
+
+ if (ext3_match(namelen, name, de)) {
+ *res_dir = de;
+ dx_release(frames);
+ return bh;
+ }
}
brelse (bh);
/* Check to see if we should continue to search */
int data_opt = 0;
int option;
#ifdef CONFIG_QUOTA
- int qtype;
+ int qtype, qfmt;
char *qname;
#endif
case Opt_grpjquota:
qtype = GRPQUOTA;
set_qf_name:
- if (sb_any_quota_enabled(sb)) {
+ if ((sb_any_quota_enabled(sb) ||
+ sb_any_quota_suspended(sb)) &&
+ !sbi->s_qf_names[qtype]) {
printk(KERN_ERR
- "EXT3-fs: Cannot change journalled "
+ "EXT3-fs: Cannot change journaled "
"quota options when quota turned on.\n");
return 0;
}
case Opt_offgrpjquota:
qtype = GRPQUOTA;
clear_qf_name:
- if (sb_any_quota_enabled(sb)) {
+ if ((sb_any_quota_enabled(sb) ||
+ sb_any_quota_suspended(sb)) &&
+ sbi->s_qf_names[qtype]) {
printk(KERN_ERR "EXT3-fs: Cannot change "
- "journalled quota options when "
+ "journaled quota options when "
"quota turned on.\n");
return 0;
}
sbi->s_qf_names[qtype] = NULL;
break;
case Opt_jqfmt_vfsold:
- sbi->s_jquota_fmt = QFMT_VFS_OLD;
- break;
+ qfmt = QFMT_VFS_OLD;
+ goto set_qf_format;
case Opt_jqfmt_vfsv0:
- sbi->s_jquota_fmt = QFMT_VFS_V0;
+ qfmt = QFMT_VFS_V0;
+set_qf_format:
+ if ((sb_any_quota_enabled(sb) ||
+ sb_any_quota_suspended(sb)) &&
+ sbi->s_jquota_fmt != qfmt) {
+ printk(KERN_ERR "EXT3-fs: Cannot change "
+ "journaled quota options when "
+ "quota turned on.\n");
+ return 0;
+ }
+ sbi->s_jquota_fmt = qfmt;
break;
case Opt_quota:
case Opt_usrquota:
set_opt(sbi->s_mount_opt, GRPQUOTA);
break;
case Opt_noquota:
- if (sb_any_quota_enabled(sb)) {
+ if (sb_any_quota_enabled(sb) ||
+ sb_any_quota_suspended(sb)) {
printk(KERN_ERR "EXT3-fs: Cannot change quota "
"options when quota turned on.\n");
return 0;
}
if (!sbi->s_jquota_fmt) {
- printk(KERN_ERR "EXT3-fs: journalled quota format "
+ printk(KERN_ERR "EXT3-fs: journaled quota format "
"not specified.\n");
return 0;
}
} else {
if (sbi->s_jquota_fmt) {
- printk(KERN_ERR "EXT3-fs: journalled quota format "
- "specified with no journalling "
+ printk(KERN_ERR "EXT3-fs: journaled quota format "
+ "specified with no journaling "
"enabled.\n");
return 0;
}
int ret = ext3_quota_on_mount(sb, i);
if (ret < 0)
printk(KERN_ERR
- "EXT3-fs: Cannot turn on journalled "
+ "EXT3-fs: Cannot turn on journaled "
"quota: error %d\n", ret);
}
}
static int ext3_mark_dquot_dirty(struct dquot *dquot)
{
- /* Are we journalling quotas? */
+ /* Are we journaling quotas? */
if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
dquot_mark_dquot_dirty(dquot);
if (!test_opt(sb, QUOTA))
return -EINVAL;
- /* Not journalling quota or remount? */
- if ((!EXT3_SB(sb)->s_qf_names[USRQUOTA] &&
- !EXT3_SB(sb)->s_qf_names[GRPQUOTA]) || remount)
+ /* When remounting, no checks are needed and in fact, path is NULL */
+ if (remount)
return vfs_quota_on(sb, type, format_id, path, remount);
+
err = path_lookup(path, LOOKUP_FOLLOW, &nd);
if (err)
return err;
+
/* Quotafile not on the same filesystem? */
if (nd.path.mnt->mnt_sb != sb) {
path_put(&nd.path);
return -EXDEV;
}
- /* Quotafile not in fs root? */
- if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
- printk(KERN_WARNING
- "EXT3-fs: Quota file not on filesystem root. "
- "Journalled quota will not work.\n");
+ /* Journaling quota? */
+ if (EXT3_SB(sb)->s_qf_names[type]) {
+ /* Quotafile not of fs root? */
+ if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
+ printk(KERN_WARNING
+ "EXT3-fs: Quota file not on filesystem root. "
+ "Journaled quota will not work.\n");
+ }
+
+ /*
+ * When we journal data on quota file, we have to flush journal to see
+ * all updates to the file when we bypass pagecache...
+ */
+ if (ext3_should_journal_data(nd.path.dentry->d_inode)) {
+ /*
+ * We don't need to lock updates but journal_flush() could
+ * otherwise be livelocked...
+ */
+ journal_lock_updates(EXT3_SB(sb)->s_journal);
+ journal_flush(EXT3_SB(sb)->s_journal);
+ journal_unlock_updates(EXT3_SB(sb)->s_journal);
+ }
+
path_put(&nd.path);
return vfs_quota_on(sb, type, format_id, path, remount);
}
ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
const char *name, size_t name_len)
{
- const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
+ const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
#include <linux/ext3_fs.h>
#include "xattr.h"
-#define XATTR_TRUSTED_PREFIX "trusted."
-
static size_t
ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
const char *name, size_t name_len)
{
- const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1;
+ const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
if (!capable(CAP_SYS_ADMIN))
#include <linux/ext3_fs.h>
#include "xattr.h"
-#define XATTR_USER_PREFIX "user."
-
static size_t
ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
const char *name, size_t name_len)
{
- const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1;
+ const size_t prefix_len = XATTR_USER_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
if (!test_opt(inode->i_sb, XATTR_USER))
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/msdos_fs.h>
-#include <linux/dirent.h>
#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/compat.h>
* but ignore that right now.
* Ahem... Stack smashing in ring 0 isn't fun. Fixed.
*/
-static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len,
+static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len,
int uni_xlate, struct nls_table *nls)
{
- wchar_t *ip, ec;
+ const wchar_t *ip;
+ wchar_t ec;
unsigned char *op, nc;
int charlen;
int k;
return (op - ascii);
}
+static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni,
+ unsigned char *buf, int size)
+{
+ if (sbi->options.utf8)
+ return utf8_wcstombs(buf, uni, size);
+ else
+ return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate,
+ sbi->nls_io);
+}
+
static inline int
fat_short2uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *uni)
{
return len;
}
+static inline int fat_name_match(struct msdos_sb_info *sbi,
+ const unsigned char *a, int a_len,
+ const unsigned char *b, int b_len)
+{
+ if (a_len != b_len)
+ return 0;
+
+ if (sbi->options.name_check != 's')
+ return !nls_strnicmp(sbi->nls_io, a, b, a_len);
+ else
+ return !memcmp(a, b, a_len);
+}
+
enum { PARSE_INVALID = 1, PARSE_NOT_LONGNAME, PARSE_EOF, };
/**
return 0;
}
+/*
+ * Maximum buffer size of short name.
+ * [(MSDOS_NAME + '.') * max one char + nul]
+ * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul]
+ */
+#define FAT_MAX_SHORT_SIZE ((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1)
+/*
+ * Maximum buffer size of unicode chars from slots.
+ * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)]
+ */
+#define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1)
+#define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t))
+
/*
* Return values: negative -> error, 0 -> not found, positive -> found,
* value is the total amount of slots, including the shortname entry.
struct msdos_sb_info *sbi = MSDOS_SB(sb);
struct buffer_head *bh = NULL;
struct msdos_dir_entry *de;
- struct nls_table *nls_io = sbi->nls_io;
struct nls_table *nls_disk = sbi->nls_disk;
- wchar_t bufuname[14];
unsigned char nr_slots;
- int xlate_len;
+ wchar_t bufuname[14];
wchar_t *unicode = NULL;
unsigned char work[MSDOS_NAME];
- unsigned char *bufname = NULL;
- int uni_xlate = sbi->options.unicode_xlate;
- int utf8 = sbi->options.utf8;
- int anycase = (sbi->options.name_check != 's');
+ unsigned char bufname[FAT_MAX_SHORT_SIZE];
unsigned short opt_shortname = sbi->options.shortname;
loff_t cpos = 0;
- int chl, i, j, last_u, err;
-
- bufname = __getname();
- if (!bufname)
- return -ENOMEM;
+ int chl, i, j, last_u, err, len;
err = -ENOENT;
- while(1) {
+ while (1) {
if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
- goto EODir;
+ goto end_of_dir;
parse_record:
nr_slots = 0;
if (de->name[0] == DELETED_FLAG)
else if (status == PARSE_NOT_LONGNAME)
goto parse_record;
else if (status == PARSE_EOF)
- goto EODir;
+ goto end_of_dir;
}
memcpy(work, de->name, sizeof(de->name));
if (!last_u)
continue;
+ /* Compare shortname */
bufuname[last_u] = 0x0000;
- xlate_len = utf8
- ?utf8_wcstombs(bufname, bufuname, PATH_MAX)
- :uni16_to_x8(bufname, bufuname, PATH_MAX, uni_xlate, nls_io);
- if (xlate_len == name_len)
- if ((!anycase && !memcmp(name, bufname, xlate_len)) ||
- (anycase && !nls_strnicmp(nls_io, name, bufname,
- xlate_len)))
- goto Found;
+ len = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
+ if (fat_name_match(sbi, name, name_len, bufname, len))
+ goto found;
if (nr_slots) {
- xlate_len = utf8
- ?utf8_wcstombs(bufname, unicode, PATH_MAX)
- :uni16_to_x8(bufname, unicode, PATH_MAX, uni_xlate, nls_io);
- if (xlate_len != name_len)
- continue;
- if ((!anycase && !memcmp(name, bufname, xlate_len)) ||
- (anycase && !nls_strnicmp(nls_io, name, bufname,
- xlate_len)))
- goto Found;
+ void *longname = unicode + FAT_MAX_UNI_CHARS;
+ int size = PATH_MAX - FAT_MAX_UNI_SIZE;
+
+ /* Compare longname */
+ len = fat_uni_to_x8(sbi, unicode, longname, size);
+ if (fat_name_match(sbi, name, name_len, longname, len))
+ goto found;
}
}
-Found:
+found:
nr_slots++; /* include the de */
sinfo->slot_off = cpos - nr_slots * sizeof(*de);
sinfo->nr_slots = nr_slots;
sinfo->bh = bh;
sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de);
err = 0;
-EODir:
- if (bufname)
- __putname(bufname);
+end_of_dir:
if (unicode)
__putname(unicode);
struct msdos_sb_info *sbi = MSDOS_SB(sb);
struct buffer_head *bh;
struct msdos_dir_entry *de;
- struct nls_table *nls_io = sbi->nls_io;
struct nls_table *nls_disk = sbi->nls_disk;
- unsigned char long_slots;
- const char *fill_name;
- int fill_len;
+ unsigned char nr_slots;
wchar_t bufuname[14];
wchar_t *unicode = NULL;
- unsigned char c, work[MSDOS_NAME], bufname[56], *ptname = bufname;
- unsigned long lpos, dummy, *furrfu = &lpos;
- int uni_xlate = sbi->options.unicode_xlate;
+ unsigned char c, work[MSDOS_NAME];
+ unsigned char bufname[FAT_MAX_SHORT_SIZE], *ptname = bufname;
+ unsigned short opt_shortname = sbi->options.shortname;
int isvfat = sbi->options.isvfat;
- int utf8 = sbi->options.utf8;
int nocase = sbi->options.nocase;
- unsigned short opt_shortname = sbi->options.shortname;
+ const char *fill_name = NULL;
unsigned long inum;
- int chi, chl, i, i2, j, last, last_u, dotoffset = 0;
+ unsigned long lpos, dummy, *furrfu = &lpos;
loff_t cpos;
+ int chi, chl, i, i2, j, last, last_u, dotoffset = 0, fill_len = 0;
int ret = 0;
lock_super(sb);
cpos = 0;
}
}
- if (cpos & (sizeof(struct msdos_dir_entry)-1)) {
+ if (cpos & (sizeof(struct msdos_dir_entry) - 1)) {
ret = -ENOENT;
goto out;
}
bh = NULL;
-GetNew:
+get_new:
if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
- goto EODir;
+ goto end_of_dir;
parse_record:
- long_slots = 0;
- /* Check for long filename entry */
- if (isvfat) {
+ nr_slots = 0;
+ /*
+ * Check for long filename entry, but if short_only, we don't
+ * need to parse long filename.
+ */
+ if (isvfat && !short_only) {
if (de->name[0] == DELETED_FLAG)
- goto RecEnd;
+ goto record_end;
if (de->attr != ATTR_EXT && (de->attr & ATTR_VOLUME))
- goto RecEnd;
+ goto record_end;
if (de->attr != ATTR_EXT && IS_FREE(de->name))
- goto RecEnd;
+ goto record_end;
} else {
if ((de->attr & ATTR_VOLUME) || IS_FREE(de->name))
- goto RecEnd;
+ goto record_end;
}
if (isvfat && de->attr == ATTR_EXT) {
int status = fat_parse_long(inode, &cpos, &bh, &de,
- &unicode, &long_slots);
+ &unicode, &nr_slots);
if (status < 0) {
filp->f_pos = cpos;
ret = status;
goto out;
} else if (status == PARSE_INVALID)
- goto RecEnd;
+ goto record_end;
else if (status == PARSE_NOT_LONGNAME)
goto parse_record;
else if (status == PARSE_EOF)
- goto EODir;
+ goto end_of_dir;
+
+ if (nr_slots) {
+ void *longname = unicode + FAT_MAX_UNI_CHARS;
+ int size = PATH_MAX - FAT_MAX_UNI_SIZE;
+ int len = fat_uni_to_x8(sbi, unicode, longname, size);
+
+ fill_name = longname;
+ fill_len = len;
+ /* !both && !short_only, so we don't need shortname. */
+ if (!both)
+ goto start_filldir;
+ }
}
if (sbi->options.dotsOK) {
}
}
if (!last)
- goto RecEnd;
+ goto record_end;
i = last + dotoffset;
j = last_u;
- lpos = cpos - (long_slots+1)*sizeof(struct msdos_dir_entry);
+ if (isvfat) {
+ bufuname[j] = 0x0000;
+ i = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
+ }
+ if (nr_slots) {
+ /* hack for fat_ioctl_filldir() */
+ struct fat_ioctl_filldir_callback *p = dirent;
+
+ p->longname = fill_name;
+ p->long_len = fill_len;
+ p->shortname = bufname;
+ p->short_len = i;
+ fill_name = NULL;
+ fill_len = 0;
+ } else {
+ fill_name = bufname;
+ fill_len = i;
+ }
+
+start_filldir:
+ lpos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry);
if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME))
inum = inode->i_ino;
else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) {
inum = iunique(sb, MSDOS_ROOT_INO);
}
- if (isvfat) {
- bufuname[j] = 0x0000;
- i = utf8 ? utf8_wcstombs(bufname, bufuname, sizeof(bufname))
- : uni16_to_x8(bufname, bufuname, sizeof(bufname), uni_xlate, nls_io);
- }
-
- fill_name = bufname;
- fill_len = i;
- if (!short_only && long_slots) {
- /* convert the unicode long name. 261 is maximum size
- * of unicode buffer. (13 * slots + nul) */
- void *longname = unicode + 261;
- int buf_size = PATH_MAX - (261 * sizeof(unicode[0]));
- int long_len = utf8
- ? utf8_wcstombs(longname, unicode, buf_size)
- : uni16_to_x8(longname, unicode, buf_size, uni_xlate, nls_io);
-
- if (!both) {
- fill_name = longname;
- fill_len = long_len;
- } else {
- /* hack for fat_ioctl_filldir() */
- struct fat_ioctl_filldir_callback *p = dirent;
-
- p->longname = longname;
- p->long_len = long_len;
- p->shortname = bufname;
- p->short_len = i;
- fill_name = NULL;
- fill_len = 0;
- }
- }
if (filldir(dirent, fill_name, fill_len, *furrfu, inum,
(de->attr & ATTR_DIR) ? DT_DIR : DT_REG) < 0)
- goto FillFailed;
+ goto fill_failed;
-RecEnd:
+record_end:
furrfu = &lpos;
filp->f_pos = cpos;
- goto GetNew;
-EODir:
+ goto get_new;
+end_of_dir:
filp->f_pos = cpos;
-FillFailed:
+fill_failed:
brelse(bh);
if (unicode)
__putname(unicode);
return -EFAULT; \
}
-FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, dirent)
+FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, __fat_dirent)
static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
void __user *dirent, filldir_t filldir,
static int fat_dir_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg)
{
- struct dirent __user *d1 = (struct dirent __user *)arg;
+ struct __fat_dirent __user *d1 = (struct __fat_dirent __user *)arg;
int short_only, both;
switch (cmd) {
return fat_generic_ioctl(inode, filp, cmd, arg);
}
- if (!access_ok(VERIFY_WRITE, d1, sizeof(struct dirent[2])))
+ if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2])))
return -EFAULT;
/*
* Yes, we don't need this put_user() absolutely. However old
goto error_free;
}
- fat_date_unix2dos(ts->tv_sec, &time, &date);
+ fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
de = (struct msdos_dir_entry *)bhs[0]->b_data;
/* filling the new directory slots ("." and ".." entries) */
inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
& ~((loff_t)sbi->cluster_size - 1)) >> 9;
inode->i_mtime.tv_sec =
- date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date));
+ date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date),
+ sbi->options.tz_utc);
inode->i_mtime.tv_nsec = 0;
if (sbi->options.isvfat) {
int secs = de->ctime_cs / 100;
int csecs = de->ctime_cs % 100;
inode->i_ctime.tv_sec =
date_dos2unix(le16_to_cpu(de->ctime),
- le16_to_cpu(de->cdate)) + secs;
+ le16_to_cpu(de->cdate),
+ sbi->options.tz_utc) + secs;
inode->i_ctime.tv_nsec = csecs * 10000000;
inode->i_atime.tv_sec =
- date_dos2unix(0, le16_to_cpu(de->adate));
+ date_dos2unix(0, le16_to_cpu(de->adate),
+ sbi->options.tz_utc);
inode->i_atime.tv_nsec = 0;
} else
inode->i_ctime = inode->i_atime = inode->i_mtime;
raw_entry->attr = fat_attr(inode);
raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart);
raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16);
- fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, &raw_entry->date);
+ fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time,
+ &raw_entry->date, sbi->options.tz_utc);
if (sbi->options.isvfat) {
__le16 atime;
- fat_date_unix2dos(inode->i_ctime.tv_sec,&raw_entry->ctime,&raw_entry->cdate);
- fat_date_unix2dos(inode->i_atime.tv_sec,&atime,&raw_entry->adate);
+ fat_date_unix2dos(inode->i_ctime.tv_sec, &raw_entry->ctime,
+ &raw_entry->cdate, sbi->options.tz_utc);
+ fat_date_unix2dos(inode->i_atime.tv_sec, &atime,
+ &raw_entry->adate, sbi->options.tz_utc);
raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 +
inode->i_ctime.tv_nsec / 10000000;
}
}
if (sbi->options.flush)
seq_puts(m, ",flush");
+ if (opts->tz_utc)
+ seq_puts(m, ",tz=UTC");
return 0;
}
Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
- Opt_obsolate, Opt_flush, Opt_err,
+ Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err,
};
static match_table_t fat_tokens = {
{Opt_obsolate, "cvf_options=%100s"},
{Opt_obsolate, "posix"},
{Opt_flush, "flush"},
+ {Opt_tz_utc, "tz=UTC"},
{Opt_err, NULL},
};
static match_table_t msdos_tokens = {
opts->utf8 = opts->unicode_xlate = 0;
opts->numtail = 1;
opts->usefree = opts->nocase = 0;
+ opts->tz_utc = 0;
*debug = 0;
if (!options)
- return 0;
+ goto out;
while ((p = strsep(&options, ",")) != NULL) {
int token;
case Opt_flush:
opts->flush = 1;
break;
+ case Opt_tz_utc:
+ opts->tz_utc = 1;
+ break;
/* msdos specific */
case Opt_dots:
return -EINVAL;
}
}
+
+out:
/* UTF-8 doesn't provide FAT semantics */
if (!strcmp(opts->iocharset, "utf8")) {
printk(KERN_ERR "FAT: utf8 is not a recommended IO charset"
- " for FAT filesystems, filesystem will be case sensitive!\n");
+ " for FAT filesystems, filesystem will be "
+ "case sensitive!\n");
}
/* If user doesn't specify allow_utime, it's initialized from dmask. */
};
/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */
-int date_dos2unix(unsigned short time, unsigned short date)
+int date_dos2unix(unsigned short time, unsigned short date, int tz_utc)
{
int month, year, secs;
((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 &&
month < 2 ? 1 : 0)+3653);
/* days since 1.1.70 plus 80's leap day */
- secs += sys_tz.tz_minuteswest*60;
+ if (!tz_utc)
+ secs += sys_tz.tz_minuteswest*60;
return secs;
}
/* Convert linear UNIX date to a MS-DOS time/date pair. */
-void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date)
+void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, int tz_utc)
{
int day, year, nl_day, month;
- unix_date -= sys_tz.tz_minuteswest*60;
+ if (!tz_utc)
+ unix_date -= sys_tz.tz_minuteswest*60;
/* Jan 1 GMT 00:00:00 1980. But what about another time zone? */
if (unix_date < 315532800)
* timeout is unknown (unlink, rmdir, rename and in some cases
* lookup)
*/
-static void fuse_invalidate_entry_cache(struct dentry *entry)
+void fuse_invalidate_entry_cache(struct dentry *entry)
{
fuse_dentry_settime(entry, 0);
}
fuse_invalidate_entry_cache(entry);
}
-static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
- struct dentry *entry,
+static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_req *req,
+ u64 nodeid, struct qstr *name,
struct fuse_entry_out *outarg)
{
- struct fuse_conn *fc = get_fuse_conn(dir);
-
memset(outarg, 0, sizeof(struct fuse_entry_out));
req->in.h.opcode = FUSE_LOOKUP;
- req->in.h.nodeid = get_node_id(dir);
+ req->in.h.nodeid = nodeid;
req->in.numargs = 1;
- req->in.args[0].size = entry->d_name.len + 1;
- req->in.args[0].value = entry->d_name.name;
+ req->in.args[0].size = name->len + 1;
+ req->in.args[0].value = name->name;
req->out.numargs = 1;
if (fc->minor < 9)
req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
attr_version = fuse_get_attr_version(fc);
parent = dget_parent(entry);
- fuse_lookup_init(req, parent->d_inode, entry, &outarg);
+ fuse_lookup_init(fc, req, get_node_id(parent->d_inode),
+ &entry->d_name, &outarg);
request_send(fc, req);
dput(parent);
err = req->out.h.error;
return !nodeid || nodeid == FUSE_ROOT_ID;
}
-static struct dentry_operations fuse_dentry_operations = {
+struct dentry_operations fuse_dentry_operations = {
.d_revalidate = fuse_dentry_revalidate,
};
* Add a directory inode to a dentry, ensuring that no other dentry
* refers to this inode. Called with fc->inst_mutex.
*/
-static int fuse_d_add_directory(struct dentry *entry, struct inode *inode)
+static struct dentry *fuse_d_add_directory(struct dentry *entry,
+ struct inode *inode)
{
struct dentry *alias = d_find_alias(inode);
- if (alias) {
+ if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) {
/* This tries to shrink the subtree below alias */
fuse_invalidate_entry(alias);
dput(alias);
if (!list_empty(&inode->i_dentry))
- return -EBUSY;
+ return ERR_PTR(-EBUSY);
+ } else {
+ dput(alias);
}
- d_add(entry, inode);
- return 0;
+ return d_splice_alias(inode, entry);
}
-static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
- struct nameidata *nd)
+int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
+ struct fuse_entry_out *outarg, struct inode **inode)
{
- int err;
- struct fuse_entry_out outarg;
- struct inode *inode = NULL;
- struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
struct fuse_req *req;
struct fuse_req *forget_req;
u64 attr_version;
+ int err;
- if (entry->d_name.len > FUSE_NAME_MAX)
- return ERR_PTR(-ENAMETOOLONG);
+ *inode = NULL;
+ err = -ENAMETOOLONG;
+ if (name->len > FUSE_NAME_MAX)
+ goto out;
req = fuse_get_req(fc);
+ err = PTR_ERR(req);
if (IS_ERR(req))
- return ERR_CAST(req);
+ goto out;
forget_req = fuse_get_req(fc);
+ err = PTR_ERR(forget_req);
if (IS_ERR(forget_req)) {
fuse_put_request(fc, req);
- return ERR_CAST(forget_req);
+ goto out;
}
attr_version = fuse_get_attr_version(fc);
- fuse_lookup_init(req, dir, entry, &outarg);
+ fuse_lookup_init(fc, req, nodeid, name, outarg);
request_send(fc, req);
err = req->out.h.error;
fuse_put_request(fc, req);
/* Zero nodeid is same as -ENOENT, but with valid timeout */
- if (!err && outarg.nodeid &&
- (invalid_nodeid(outarg.nodeid) ||
- !fuse_valid_type(outarg.attr.mode)))
- err = -EIO;
- if (!err && outarg.nodeid) {
- inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
- &outarg.attr, entry_attr_timeout(&outarg),
- attr_version);
- if (!inode) {
- fuse_send_forget(fc, forget_req, outarg.nodeid, 1);
- return ERR_PTR(-ENOMEM);
- }
+ if (err || !outarg->nodeid)
+ goto out_put_forget;
+
+ err = -EIO;
+ if (!outarg->nodeid)
+ goto out_put_forget;
+ if (!fuse_valid_type(outarg->attr.mode))
+ goto out_put_forget;
+
+ *inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
+ &outarg->attr, entry_attr_timeout(outarg),
+ attr_version);
+ err = -ENOMEM;
+ if (!*inode) {
+ fuse_send_forget(fc, forget_req, outarg->nodeid, 1);
+ goto out;
}
+ err = 0;
+
+ out_put_forget:
fuse_put_request(fc, forget_req);
- if (err && err != -ENOENT)
- return ERR_PTR(err);
+ out:
+ return err;
+}
+
+static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
+ struct nameidata *nd)
+{
+ int err;
+ struct fuse_entry_out outarg;
+ struct inode *inode;
+ struct dentry *newent;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ bool outarg_valid = true;
+
+ err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
+ &outarg, &inode);
+ if (err == -ENOENT) {
+ outarg_valid = false;
+ err = 0;
+ }
+ if (err)
+ goto out_err;
+
+ err = -EIO;
+ if (inode && get_node_id(inode) == FUSE_ROOT_ID)
+ goto out_iput;
if (inode && S_ISDIR(inode->i_mode)) {
mutex_lock(&fc->inst_mutex);
- err = fuse_d_add_directory(entry, inode);
+ newent = fuse_d_add_directory(entry, inode);
mutex_unlock(&fc->inst_mutex);
- if (err) {
- iput(inode);
- return ERR_PTR(err);
- }
- } else
- d_add(entry, inode);
+ err = PTR_ERR(newent);
+ if (IS_ERR(newent))
+ goto out_iput;
+ } else {
+ newent = d_splice_alias(inode, entry);
+ }
+ entry = newent ? newent : entry;
entry->d_op = &fuse_dentry_operations;
- if (!err)
+ if (outarg_valid)
fuse_change_entry_timeout(entry, &outarg);
else
fuse_invalidate_entry_cache(entry);
- return NULL;
+
+ return newent;
+
+ out_iput:
+ iput(inode);
+ out_err:
+ return ERR_PTR(err);
}
/*
pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0;
int err;
+ if (fl->fl_lmops && fl->fl_lmops->fl_grant) {
+ /* NLM needs asynchronous locks, which we don't support yet */
+ return -ENOLCK;
+ }
+
/* Unlock on close is handled by the flush method */
if (fl->fl_flags & FL_CLOSE)
return 0;
struct fuse_conn *fc = get_fuse_conn(inode);
int err;
- if (cmd == F_GETLK) {
+ if (cmd == F_CANCELLK) {
+ err = 0;
+ } else if (cmd == F_GETLK) {
if (fc->no_lock) {
posix_test_lock(file, fl);
err = 0;
err = fuse_getlk(file, fl);
} else {
if (fc->no_lock)
- err = posix_lock_file_wait(file, fl);
+ err = posix_lock_file(file, fl, NULL);
else
err = fuse_setlk(file, fl, 0);
}
/** Do not send separate SETATTR request before open(O_TRUNC) */
unsigned atomic_o_trunc : 1;
+ /** Filesystem supports NFS exporting. Only set in INIT */
+ unsigned export_support : 1;
+
/*
* The following bitfields are only for optimization purposes
* and hence races in setting them will not cause malfunction
/** Device operations */
extern const struct file_operations fuse_dev_operations;
+extern struct dentry_operations fuse_dentry_operations;
+
/**
* Get a filled in inode
*/
int generation, struct fuse_attr *attr,
u64 attr_valid, u64 attr_version);
+int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
+ struct fuse_entry_out *outarg, struct inode **inode);
+
/**
* Send FORGET command
*/
*/
void fuse_invalidate_attr(struct inode *inode);
+void fuse_invalidate_entry_cache(struct dentry *entry);
+
/**
* Acquire reference to fuse_conn
*/
#include <linux/statfs.h>
#include <linux/random.h>
#include <linux/sched.h>
+#include <linux/exportfs.h>
MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Filesystem in Userspace");
return fuse_iget(sb, 1, 0, &attr, 0, 0);
}
+struct fuse_inode_handle
+{
+ u64 nodeid;
+ u32 generation;
+};
+
+static struct dentry *fuse_get_dentry(struct super_block *sb,
+ struct fuse_inode_handle *handle)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+ struct inode *inode;
+ struct dentry *entry;
+ int err = -ESTALE;
+
+ if (handle->nodeid == 0)
+ goto out_err;
+
+ inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
+ if (!inode) {
+ struct fuse_entry_out outarg;
+ struct qstr name;
+
+ if (!fc->export_support)
+ goto out_err;
+
+ name.len = 1;
+ name.name = ".";
+ err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
+ &inode);
+ if (err && err != -ENOENT)
+ goto out_err;
+ if (err || !inode) {
+ err = -ESTALE;
+ goto out_err;
+ }
+ err = -EIO;
+ if (get_node_id(inode) != handle->nodeid)
+ goto out_iput;
+ }
+ err = -ESTALE;
+ if (inode->i_generation != handle->generation)
+ goto out_iput;
+
+ entry = d_alloc_anon(inode);
+ err = -ENOMEM;
+ if (!entry)
+ goto out_iput;
+
+ if (get_node_id(inode) != FUSE_ROOT_ID) {
+ entry->d_op = &fuse_dentry_operations;
+ fuse_invalidate_entry_cache(entry);
+ }
+
+ return entry;
+
+ out_iput:
+ iput(inode);
+ out_err:
+ return ERR_PTR(err);
+}
+
+static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
+ int connectable)
+{
+ struct inode *inode = dentry->d_inode;
+ bool encode_parent = connectable && !S_ISDIR(inode->i_mode);
+ int len = encode_parent ? 6 : 3;
+ u64 nodeid;
+ u32 generation;
+
+ if (*max_len < len)
+ return 255;
+
+ nodeid = get_fuse_inode(inode)->nodeid;
+ generation = inode->i_generation;
+
+ fh[0] = (u32)(nodeid >> 32);
+ fh[1] = (u32)(nodeid & 0xffffffff);
+ fh[2] = generation;
+
+ if (encode_parent) {
+ struct inode *parent;
+
+ spin_lock(&dentry->d_lock);
+ parent = dentry->d_parent->d_inode;
+ nodeid = get_fuse_inode(parent)->nodeid;
+ generation = parent->i_generation;
+ spin_unlock(&dentry->d_lock);
+
+ fh[3] = (u32)(nodeid >> 32);
+ fh[4] = (u32)(nodeid & 0xffffffff);
+ fh[5] = generation;
+ }
+
+ *max_len = len;
+ return encode_parent ? 0x82 : 0x81;
+}
+
+static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
+ struct fid *fid, int fh_len, int fh_type)
+{
+ struct fuse_inode_handle handle;
+
+ if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
+ return NULL;
+
+ handle.nodeid = (u64) fid->raw[0] << 32;
+ handle.nodeid |= (u64) fid->raw[1];
+ handle.generation = fid->raw[2];
+ return fuse_get_dentry(sb, &handle);
+}
+
+static struct dentry *fuse_fh_to_parent(struct super_block *sb,
+ struct fid *fid, int fh_len, int fh_type)
+{
+ struct fuse_inode_handle parent;
+
+ if (fh_type != 0x82 || fh_len < 6)
+ return NULL;
+
+ parent.nodeid = (u64) fid->raw[3] << 32;
+ parent.nodeid |= (u64) fid->raw[4];
+ parent.generation = fid->raw[5];
+ return fuse_get_dentry(sb, &parent);
+}
+
+static struct dentry *fuse_get_parent(struct dentry *child)
+{
+ struct inode *child_inode = child->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(child_inode);
+ struct inode *inode;
+ struct dentry *parent;
+ struct fuse_entry_out outarg;
+ struct qstr name;
+ int err;
+
+ if (!fc->export_support)
+ return ERR_PTR(-ESTALE);
+
+ name.len = 2;
+ name.name = "..";
+ err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
+ &name, &outarg, &inode);
+ if (err && err != -ENOENT)
+ return ERR_PTR(err);
+ if (err || !inode)
+ return ERR_PTR(-ESTALE);
+
+ parent = d_alloc_anon(inode);
+ if (!parent) {
+ iput(inode);
+ return ERR_PTR(-ENOMEM);
+ }
+ if (get_node_id(inode) != FUSE_ROOT_ID) {
+ parent->d_op = &fuse_dentry_operations;
+ fuse_invalidate_entry_cache(parent);
+ }
+
+ return parent;
+}
+
+static const struct export_operations fuse_export_operations = {
+ .fh_to_dentry = fuse_fh_to_dentry,
+ .fh_to_parent = fuse_fh_to_parent,
+ .encode_fh = fuse_encode_fh,
+ .get_parent = fuse_get_parent,
+};
+
static const struct super_operations fuse_super_operations = {
.alloc_inode = fuse_alloc_inode,
.destroy_inode = fuse_destroy_inode,
fc->no_lock = 1;
if (arg->flags & FUSE_ATOMIC_O_TRUNC)
fc->atomic_o_trunc = 1;
+ if (arg->minor >= 9) {
+ /* LOOKUP has dependency on proto version */
+ if (arg->flags & FUSE_EXPORT_SUPPORT)
+ fc->export_support = 1;
+ }
if (arg->flags & FUSE_BIG_WRITES)
fc->big_writes = 1;
} else {
arg->minor = FUSE_KERNEL_MINOR_VERSION;
arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
- FUSE_BIG_WRITES;
+ FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
sb->s_magic = FUSE_SUPER_MAGIC;
sb->s_op = &fuse_super_operations;
sb->s_maxbytes = MAX_LFS_FILESIZE;
+ sb->s_export_op = &fuse_export_operations;
file = fget(d.fd);
if (!file)
if (!*num_bits)
return 0;
- down(&HFS_SB(sb)->bitmap_lock);
+ mutex_lock(&HFS_SB(sb)->bitmap_lock);
bitmap = HFS_SB(sb)->bitmap;
pos = hfs_find_set_zero_bits(bitmap, HFS_SB(sb)->fs_ablocks, goal, num_bits);
HFS_SB(sb)->free_ablocks -= *num_bits;
hfs_bitmap_dirty(sb);
out:
- up(&HFS_SB(sb)->bitmap_lock);
+ mutex_unlock(&HFS_SB(sb)->bitmap_lock);
return pos;
}
if ((start + count) > HFS_SB(sb)->fs_ablocks)
return -2;
- down(&HFS_SB(sb)->bitmap_lock);
+ mutex_lock(&HFS_SB(sb)->bitmap_lock);
/* bitmap is always on a 32-bit boundary */
curr = HFS_SB(sb)->bitmap + (start / 32);
len = count;
}
out:
HFS_SB(sb)->free_ablocks += len;
- up(&HFS_SB(sb)->bitmap_lock);
+ mutex_unlock(&HFS_SB(sb)->bitmap_lock);
hfs_bitmap_dirty(sb);
return 0;
{
struct hfs_mdb *mdb = HFS_SB(sb)->mdb;
HFS_I(tree->inode)->flags = 0;
- init_MUTEX(&HFS_I(tree->inode)->extents_lock);
+ mutex_init(&HFS_I(tree->inode)->extents_lock);
switch (id) {
case HFS_EXT_CNID:
hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize,
goto done;
}
- down(&HFS_I(inode)->extents_lock);
+ mutex_lock(&HFS_I(inode)->extents_lock);
res = hfs_ext_read_extent(inode, ablock);
if (!res)
dblock = hfs_ext_find_block(HFS_I(inode)->cached_extents,
ablock - HFS_I(inode)->cached_start);
else {
- up(&HFS_I(inode)->extents_lock);
+ mutex_unlock(&HFS_I(inode)->extents_lock);
return -EIO;
}
- up(&HFS_I(inode)->extents_lock);
+ mutex_unlock(&HFS_I(inode)->extents_lock);
done:
map_bh(bh_result, sb, HFS_SB(sb)->fs_start +
u32 start, len, goal;
int res;
- down(&HFS_I(inode)->extents_lock);
+ mutex_lock(&HFS_I(inode)->extents_lock);
if (HFS_I(inode)->alloc_blocks == HFS_I(inode)->first_blocks)
goal = hfs_ext_lastblock(HFS_I(inode)->first_extents);
else {
goto insert_extent;
}
out:
- up(&HFS_I(inode)->extents_lock);
+ mutex_unlock(&HFS_I(inode)->extents_lock);
if (!res) {
HFS_I(inode)->alloc_blocks += len;
mark_inode_dirty(inode);
if (blk_cnt == alloc_cnt)
goto out;
- down(&HFS_I(inode)->extents_lock);
+ mutex_lock(&HFS_I(inode)->extents_lock);
hfs_find_init(HFS_SB(sb)->ext_tree, &fd);
while (1) {
if (alloc_cnt == HFS_I(inode)->first_blocks) {
hfs_brec_remove(&fd);
}
hfs_find_exit(&fd);
- up(&HFS_I(inode)->extents_lock);
+ mutex_unlock(&HFS_I(inode)->extents_lock);
HFS_I(inode)->alloc_blocks = blk_cnt;
out:
#include <linux/slab.h>
#include <linux/types.h>
+#include <linux/mutex.h>
#include <linux/buffer_head.h>
#include <linux/fs.h>
struct list_head open_dir_list;
struct inode *rsrc_inode;
- struct semaphore extents_lock;
+ struct mutex extents_lock;
u16 alloc_blocks, clump_blocks;
sector_t fs_blocks;
struct nls_table *nls_io, *nls_disk;
- struct semaphore bitmap_lock;
+ struct mutex bitmap_lock;
unsigned long flags;
if (!inode)
return NULL;
- init_MUTEX(&HFS_I(inode)->extents_lock);
+ mutex_init(&HFS_I(inode)->extents_lock);
INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name);
inode->i_ino = HFS_SB(sb)->next_id++;
HFS_I(inode)->flags = 0;
HFS_I(inode)->rsrc_inode = NULL;
- init_MUTEX(&HFS_I(inode)->extents_lock);
+ mutex_init(&HFS_I(inode)->extents_lock);
INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
/* Initialize the inode */
sb->s_op = &hfs_super_operations;
sb->s_flags |= MS_NODIRATIME;
- init_MUTEX(&sbi->bitmap_lock);
+ mutex_init(&sbi->bitmap_lock);
res = hfs_mdb_get(sb);
if (res) {
goto done;
}
- down(&HFSPLUS_I(inode).extents_lock);
+ mutex_lock(&HFSPLUS_I(inode).extents_lock);
res = hfsplus_ext_read_extent(inode, ablock);
if (!res) {
dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).cached_extents, ablock -
HFSPLUS_I(inode).cached_start);
} else {
- up(&HFSPLUS_I(inode).extents_lock);
+ mutex_unlock(&HFSPLUS_I(inode).extents_lock);
return -EIO;
}
- up(&HFSPLUS_I(inode).extents_lock);
+ mutex_unlock(&HFSPLUS_I(inode).extents_lock);
done:
dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock);
return -ENOSPC;
}
- down(&HFSPLUS_I(inode).extents_lock);
+ mutex_lock(&HFSPLUS_I(inode).extents_lock);
if (HFSPLUS_I(inode).alloc_blocks == HFSPLUS_I(inode).first_blocks)
goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).first_extents);
else {
goto insert_extent;
}
out:
- up(&HFSPLUS_I(inode).extents_lock);
+ mutex_unlock(&HFSPLUS_I(inode).extents_lock);
if (!res) {
HFSPLUS_I(inode).alloc_blocks += len;
mark_inode_dirty(inode);
if (blk_cnt == alloc_cnt)
goto out;
- down(&HFSPLUS_I(inode).extents_lock);
+ mutex_lock(&HFSPLUS_I(inode).extents_lock);
hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd);
while (1) {
if (alloc_cnt == HFSPLUS_I(inode).first_blocks) {
hfs_brec_remove(&fd);
}
hfs_find_exit(&fd);
- up(&HFSPLUS_I(inode).extents_lock);
+ mutex_unlock(&HFSPLUS_I(inode).extents_lock);
HFSPLUS_I(inode).alloc_blocks = blk_cnt;
out:
#define _LINUX_HFSPLUS_FS_H
#include <linux/fs.h>
+#include <linux/mutex.h>
#include <linux/buffer_head.h>
#include "hfsplus_raw.h"
struct hfsplus_inode_info {
- struct semaphore extents_lock;
+ struct mutex extents_lock;
u32 clump_blocks, alloc_blocks;
sector_t fs_blocks;
/* Allocation extents from catalog record or volume header */
inode->i_ino = dir->i_ino;
INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
- init_MUTEX(&HFSPLUS_I(inode).extents_lock);
+ mutex_init(&HFSPLUS_I(inode).extents_lock);
HFSPLUS_I(inode).flags = HFSPLUS_FLG_RSRC;
hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd);
inode->i_nlink = 1;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
- init_MUTEX(&HFSPLUS_I(inode).extents_lock);
+ mutex_init(&HFSPLUS_I(inode).extents_lock);
atomic_set(&HFSPLUS_I(inode).opencnt, 0);
HFSPLUS_I(inode).flags = 0;
memset(HFSPLUS_I(inode).first_extents, 0, sizeof(hfsplus_extent_rec));
return inode;
INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
- init_MUTEX(&HFSPLUS_I(inode).extents_lock);
+ mutex_init(&HFSPLUS_I(inode).extents_lock);
HFSPLUS_I(inode).flags = 0;
HFSPLUS_I(inode).rsrc_inode = NULL;
atomic_set(&HFSPLUS_I(inode).opencnt, 0);
while (rs.len > 2) { /* There may be one byte for padding somewhere */
rr = (struct rock_ridge *)rs.chr;
+ /*
+ * Ignore rock ridge info if rr->len is out of range, but
+ * don't return -EIO because that would make the file
+ * invisible.
+ */
if (rr->len < 3)
goto out; /* Something got screwed up here */
sig = isonum_721(rs.chr);
goto eio;
rs.chr += rr->len;
rs.len -= rr->len;
+ /*
+ * As above, just ignore the rock ridge info if rr->len
+ * is bogus.
+ */
if (rs.len < 0)
- goto eio; /* corrupted isofs */
+ goto out; /* Something got screwed up here */
switch (sig) {
case SIG('R', 'R'):
repeat:
while (rs.len > 2) { /* There may be one byte for padding somewhere */
rr = (struct rock_ridge *)rs.chr;
+ /*
+ * Ignore rock ridge info if rr->len is out of range, but
+ * don't return -EIO because that would make the file
+ * invisible.
+ */
if (rr->len < 3)
goto out; /* Something got screwed up here */
sig = isonum_721(rs.chr);
goto eio;
rs.chr += rr->len;
rs.len -= rr->len;
+ /*
+ * As above, just ignore the rock ridge info if rr->len
+ * is bogus.
+ */
if (rs.len < 0)
- goto eio; /* corrupted isofs */
+ goto out; /* Something got screwed up here */
switch (sig) {
#ifndef CONFIG_ZISOFS /* No flag for SF or ZF */
/*
* When an ext3-ordered file is truncated, it is possible that many pages are
- * not sucessfully freed, because they are attached to a committing transaction.
+ * not successfully freed, because they are attached to a committing transaction.
* After the transaction commits, these pages are left on the LRU, with no
* ->mapping, and with attached buffers. These pages are trivially reclaimable
* by the VM, but their apparent absence upsets the VM accounting, and it makes
* So here, we have a buffer which has just come off the forget list. Look to
* see if we can strip all buffers from the backing page.
*
- * Called under lock_journal(), and possibly under journal_datalist_lock. The
- * caller provided us with a ref against the buffer, and we drop that here.
+ * Called under journal->j_list_lock. The caller provided us with a ref
+ * against the buffer, and we drop that here.
*/
static void release_buffer_page(struct buffer_head *bh)
{
__brelse(bh);
}
+/*
+ * Decrement reference counter for data buffer. If it has been marked
+ * 'BH_Freed', release it and the page to which it belongs if possible.
+ */
+static void release_data_buffer(struct buffer_head *bh)
+{
+ if (buffer_freed(bh)) {
+ clear_buffer_freed(bh);
+ release_buffer_page(bh);
+ } else
+ put_bh(bh);
+}
+
/*
* Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
* held. For ranking reasons we must trylock. If we lose, schedule away and
/*
* Submit all the data buffers to disk
*/
-static void journal_submit_data_buffers(journal_t *journal,
+static int journal_submit_data_buffers(journal_t *journal,
transaction_t *commit_transaction)
{
struct journal_head *jh;
int locked;
int bufs = 0;
struct buffer_head **wbuf = journal->j_wbuf;
+ int err = 0;
/*
* Whenever we unlock the journal and sleep, things can get added
if (locked)
unlock_buffer(bh);
BUFFER_TRACE(bh, "already cleaned up");
- put_bh(bh);
+ release_data_buffer(bh);
continue;
}
if (locked && test_clear_buffer_dirty(bh)) {
put_bh(bh);
} else {
BUFFER_TRACE(bh, "writeout complete: unfile");
+ if (unlikely(!buffer_uptodate(bh)))
+ err = -EIO;
__journal_unfile_buffer(jh);
jbd_unlock_bh_state(bh);
if (locked)
unlock_buffer(bh);
journal_remove_journal_head(bh);
- /* Once for our safety reference, once for
+ /* One for our safety reference, other for
* journal_remove_journal_head() */
put_bh(bh);
- put_bh(bh);
+ release_data_buffer(bh);
}
if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
}
spin_unlock(&journal->j_list_lock);
journal_do_submit_data(wbuf, bufs);
+
+ return err;
}
/*
* Now start flushing things to disk, in the order they appear
* on the transaction lists. Data blocks go first.
*/
- err = 0;
- journal_submit_data_buffers(journal, commit_transaction);
+ err = journal_submit_data_buffers(journal, commit_transaction);
/*
* Wait for all previously submitted IO to complete.
if (buffer_locked(bh)) {
spin_unlock(&journal->j_list_lock);
wait_on_buffer(bh);
- if (unlikely(!buffer_uptodate(bh)))
- err = -EIO;
spin_lock(&journal->j_list_lock);
}
+ if (unlikely(!buffer_uptodate(bh))) {
+ if (TestSetPageLocked(bh->b_page)) {
+ spin_unlock(&journal->j_list_lock);
+ lock_page(bh->b_page);
+ spin_lock(&journal->j_list_lock);
+ }
+ if (bh->b_page->mapping)
+ set_bit(AS_EIO, &bh->b_page->mapping->flags);
+
+ unlock_page(bh->b_page);
+ SetPageError(bh->b_page);
+ err = -EIO;
+ }
if (!inverted_lock(journal, bh)) {
put_bh(bh);
spin_lock(&journal->j_list_lock);
} else {
jbd_unlock_bh_state(bh);
}
- put_bh(bh);
+ release_data_buffer(bh);
cond_resched_lock(&journal->j_list_lock);
}
spin_unlock(&journal->j_list_lock);
- if (err)
- journal_abort(journal, err);
+ if (err) {
+ char b[BDEVNAME_SIZE];
- journal_write_revoke_records(journal, commit_transaction);
+ printk(KERN_WARNING
+ "JBD: Detected IO errors while flushing file data "
+ "on %s\n", bdevname(journal->j_fs_dev, b));
+ err = 0;
+ }
- jbd_debug(3, "JBD: commit phase 2\n");
+ journal_write_revoke_records(journal, commit_transaction);
/*
* If we found any dirty or locked buffers, then we should have
EXPORT_SYMBOL(journal_create);
EXPORT_SYMBOL(journal_load);
EXPORT_SYMBOL(journal_destroy);
-EXPORT_SYMBOL(journal_update_superblock);
EXPORT_SYMBOL(journal_abort);
EXPORT_SYMBOL(journal_errno);
EXPORT_SYMBOL(journal_ack_err);
static void journal_destroy_journal_head_cache(void)
{
- J_ASSERT(journal_head_cache != NULL);
- kmem_cache_destroy(journal_head_cache);
- journal_head_cache = NULL;
+ if (journal_head_cache) {
+ kmem_cache_destroy(journal_head_cache);
+ journal_head_cache = NULL;
+ }
}
/*
return NULL;
}
+void journal_destroy_revoke_caches(void)
+{
+ if (revoke_record_cache) {
+ kmem_cache_destroy(revoke_record_cache);
+ revoke_record_cache = NULL;
+ }
+ if (revoke_table_cache) {
+ kmem_cache_destroy(revoke_table_cache);
+ revoke_table_cache = NULL;
+ }
+}
+
int __init journal_init_revoke_caches(void)
{
+ J_ASSERT(!revoke_record_cache);
+ J_ASSERT(!revoke_table_cache);
+
revoke_record_cache = kmem_cache_create("revoke_record",
sizeof(struct jbd_revoke_record_s),
0,
SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
NULL);
if (!revoke_record_cache)
- return -ENOMEM;
+ goto record_cache_failure;
revoke_table_cache = kmem_cache_create("revoke_table",
sizeof(struct jbd_revoke_table_s),
0, SLAB_TEMPORARY, NULL);
- if (!revoke_table_cache) {
- kmem_cache_destroy(revoke_record_cache);
- revoke_record_cache = NULL;
- return -ENOMEM;
- }
+ if (!revoke_table_cache)
+ goto table_cache_failure;
+
return 0;
-}
-void journal_destroy_revoke_caches(void)
-{
- kmem_cache_destroy(revoke_record_cache);
- revoke_record_cache = NULL;
- kmem_cache_destroy(revoke_table_cache);
- revoke_table_cache = NULL;
+table_cache_failure:
+ journal_destroy_revoke_caches();
+record_cache_failure:
+ return -ENOMEM;
}
-/* Initialise the revoke table for a given journal to a given size. */
-
-int journal_init_revoke(journal_t *journal, int hash_size)
+static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size)
{
- int shift, tmp;
+ int shift = 0;
+ int tmp = hash_size;
+ struct jbd_revoke_table_s *table;
- J_ASSERT (journal->j_revoke_table[0] == NULL);
+ table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
+ if (!table)
+ goto out;
- shift = 0;
- tmp = hash_size;
while((tmp >>= 1UL) != 0UL)
shift++;
- journal->j_revoke_table[0] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
- if (!journal->j_revoke_table[0])
- return -ENOMEM;
- journal->j_revoke = journal->j_revoke_table[0];
-
- /* Check that the hash_size is a power of two */
- J_ASSERT(is_power_of_2(hash_size));
-
- journal->j_revoke->hash_size = hash_size;
-
- journal->j_revoke->hash_shift = shift;
-
- journal->j_revoke->hash_table =
+ table->hash_size = hash_size;
+ table->hash_shift = shift;
+ table->hash_table =
kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
- if (!journal->j_revoke->hash_table) {
- kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
- journal->j_revoke = NULL;
- return -ENOMEM;
+ if (!table->hash_table) {
+ kmem_cache_free(revoke_table_cache, table);
+ table = NULL;
+ goto out;
}
for (tmp = 0; tmp < hash_size; tmp++)
- INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
+ INIT_LIST_HEAD(&table->hash_table[tmp]);
- journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
- if (!journal->j_revoke_table[1]) {
- kfree(journal->j_revoke_table[0]->hash_table);
- kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
- return -ENOMEM;
+out:
+ return table;
+}
+
+static void journal_destroy_revoke_table(struct jbd_revoke_table_s *table)
+{
+ int i;
+ struct list_head *hash_list;
+
+ for (i = 0; i < table->hash_size; i++) {
+ hash_list = &table->hash_table[i];
+ J_ASSERT(list_empty(hash_list));
}
- journal->j_revoke = journal->j_revoke_table[1];
+ kfree(table->hash_table);
+ kmem_cache_free(revoke_table_cache, table);
+}
- /* Check that the hash_size is a power of two */
+/* Initialise the revoke table for a given journal to a given size. */
+int journal_init_revoke(journal_t *journal, int hash_size)
+{
+ J_ASSERT(journal->j_revoke_table[0] == NULL);
J_ASSERT(is_power_of_2(hash_size));
- journal->j_revoke->hash_size = hash_size;
+ journal->j_revoke_table[0] = journal_init_revoke_table(hash_size);
+ if (!journal->j_revoke_table[0])
+ goto fail0;
- journal->j_revoke->hash_shift = shift;
+ journal->j_revoke_table[1] = journal_init_revoke_table(hash_size);
+ if (!journal->j_revoke_table[1])
+ goto fail1;
- journal->j_revoke->hash_table =
- kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
- if (!journal->j_revoke->hash_table) {
- kfree(journal->j_revoke_table[0]->hash_table);
- kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
- kmem_cache_free(revoke_table_cache, journal->j_revoke_table[1]);
- journal->j_revoke = NULL;
- return -ENOMEM;
- }
-
- for (tmp = 0; tmp < hash_size; tmp++)
- INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
+ journal->j_revoke = journal->j_revoke_table[1];
spin_lock_init(&journal->j_revoke_lock);
return 0;
-}
-/* Destoy a journal's revoke table. The table must already be empty! */
+fail1:
+ journal_destroy_revoke_table(journal->j_revoke_table[0]);
+fail0:
+ return -ENOMEM;
+}
+/* Destroy a journal's revoke table. The table must already be empty! */
void journal_destroy_revoke(journal_t *journal)
{
- struct jbd_revoke_table_s *table;
- struct list_head *hash_list;
- int i;
-
- table = journal->j_revoke_table[0];
- if (!table)
- return;
-
- for (i=0; i<table->hash_size; i++) {
- hash_list = &table->hash_table[i];
- J_ASSERT (list_empty(hash_list));
- }
-
- kfree(table->hash_table);
- kmem_cache_free(revoke_table_cache, table);
- journal->j_revoke = NULL;
-
- table = journal->j_revoke_table[1];
- if (!table)
- return;
-
- for (i=0; i<table->hash_size; i++) {
- hash_list = &table->hash_table[i];
- J_ASSERT (list_empty(hash_list));
- }
-
- kfree(table->hash_table);
- kmem_cache_free(revoke_table_cache, table);
journal->j_revoke = NULL;
+ if (journal->j_revoke_table[0])
+ journal_destroy_revoke_table(journal->j_revoke_table[0]);
+ if (journal->j_revoke_table[1])
+ journal_destroy_revoke_table(journal->j_revoke_table[1]);
}
return;
}
+/*
+ * journal_try_to_free_buffers() could race with journal_commit_transaction()
+ * The latter might still hold the a count on buffers when inspecting
+ * them on t_syncdata_list or t_locked_list.
+ *
+ * journal_try_to_free_buffers() will call this function to
+ * wait for the current transaction to finish syncing data buffers, before
+ * tryinf to free that buffer.
+ *
+ * Called with journal->j_state_lock held.
+ */
+static void journal_wait_for_transaction_sync_data(journal_t *journal)
+{
+ transaction_t *transaction = NULL;
+ tid_t tid;
+
+ spin_lock(&journal->j_state_lock);
+ transaction = journal->j_committing_transaction;
+
+ if (!transaction) {
+ spin_unlock(&journal->j_state_lock);
+ return;
+ }
+
+ tid = transaction->t_tid;
+ spin_unlock(&journal->j_state_lock);
+ log_wait_commit(journal, tid);
+}
/**
* int journal_try_to_free_buffers() - try to free page buffers.
* @journal: journal for operation
* @page: to try and free
- * @unused_gfp_mask: unused
+ * @gfp_mask: we use the mask to detect how hard should we try to release
+ * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
+ * release the buffers.
*
*
* For all the buffers on this page,
* journal_try_to_free_buffer() is changing its state. But that
* cannot happen because we never reallocate freed data as metadata
* while the data is part of a transaction. Yes?
+ *
+ * Return 0 on failure, 1 on success
*/
int journal_try_to_free_buffers(journal_t *journal,
- struct page *page, gfp_t unused_gfp_mask)
+ struct page *page, gfp_t gfp_mask)
{
struct buffer_head *head;
struct buffer_head *bh;
if (buffer_jbd(bh))
goto busy;
} while ((bh = bh->b_this_page) != head);
+
ret = try_to_free_buffers(page);
+
+ /*
+ * There are a number of places where journal_try_to_free_buffers()
+ * could race with journal_commit_transaction(), the later still
+ * holds the reference to the buffers to free while processing them.
+ * try_to_free_buffers() failed to free those buffers. Some of the
+ * caller of releasepage() request page buffers to be dropped, otherwise
+ * treat the fail-to-free as errors (such as generic_file_direct_IO())
+ *
+ * So, if the caller of try_to_release_page() wants the synchronous
+ * behaviour(i.e make sure buffers are dropped upon return),
+ * let's wait for the current transaction to finish flush of
+ * dirty data buffers, then try to free those buffers again,
+ * with the journal locked.
+ */
+ if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
+ journal_wait_for_transaction_sync_data(journal);
+ ret = try_to_free_buffers(page);
+ }
+
busy:
return ret;
}
#include <linux/parser.h>
#include <linux/completion.h>
#include <linux/vfs.h>
+#include <linux/quotaops.h>
#include <linux/mount.h>
#include <linux/moduleparam.h>
#include <linux/kthread.h>
}
if (status < 0)
goto out_unlock;
- status = nlm_stat_to_errno(resp->status);
+ /*
+ * EAGAIN doesn't make sense for sleeping locks, and in some
+ * cases NLM_LCK_DENIED is returned for a permanent error. So
+ * turn it into an ENOLCK.
+ */
+ if (resp->status == nlm_lck_denied && (fl_flags & FL_SLEEP))
+ status = -ENOLCK;
+ else
+ status = nlm_stat_to_errno(resp->status);
out_unblock:
nlmclnt_finish_block(block);
out:
goto out;
case -EAGAIN:
ret = nlm_lck_denied;
- break;
- case -EINPROGRESS:
+ goto out;
+ case FILE_LOCK_DEFERRED:
if (wait)
break;
/* Filesystem lock operation is in progress
goto out;
}
- ret = nlm_lck_denied;
- if (!wait)
- goto out;
-
ret = nlm_lck_blocked;
/* Append to list of blocked */
}
error = vfs_test_lock(file->f_file, &lock->fl);
- if (error == -EINPROGRESS) {
+ if (error == FILE_LOCK_DEFERRED) {
ret = nlmsvc_defer_lock_rqst(rqstp, block);
goto out;
}
switch (error) {
case 0:
break;
- case -EAGAIN:
- case -EINPROGRESS:
+ case FILE_LOCK_DEFERRED:
dprintk("lockd: lock still blocked error %d\n", error);
nlmsvc_insert_block(block, NLM_NEVER);
nlmsvc_release_block(block);
if (!flock_locks_conflict(request, fl))
continue;
error = -EAGAIN;
- if (request->fl_flags & FL_SLEEP)
- locks_insert_block(fl, request);
+ if (!(request->fl_flags & FL_SLEEP))
+ goto out;
+ error = FILE_LOCK_DEFERRED;
+ locks_insert_block(fl, request);
goto out;
}
if (request->fl_flags & FL_ACCESS)
error = -EDEADLK;
if (posix_locks_deadlock(request, fl))
goto out;
- error = -EAGAIN;
+ error = FILE_LOCK_DEFERRED;
locks_insert_block(fl, request);
goto out;
}
might_sleep ();
for (;;) {
error = posix_lock_file(filp, fl, NULL);
- if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
+ if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
if (!error)
for (;;) {
error = __posix_lock_file(inode, &fl, NULL);
- if (error != -EAGAIN)
- break;
- if (!(fl.fl_flags & FL_SLEEP))
+ if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
if (!error) {
might_sleep();
for (;;) {
error = flock_lock_file(filp, fl);
- if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
+ if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
if (!error)
* fl_grant is set. Callers expecting ->lock() to return asynchronously
* will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if)
* the request is for a blocking lock. When ->lock() does return asynchronously,
- * it must return -EINPROGRESS, and call ->fl_grant() when the lock
+ * it must return FILE_LOCK_DEFERRED, and call ->fl_grant() when the lock
* request completes.
* If the request is for non-blocking lock the file system should return
- * -EINPROGRESS then try to get the lock and call the callback routine with
- * the result. If the request timed out the callback routine will return a
+ * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine
+ * with the result. If the request timed out the callback routine will return a
* nonzero return code and the file system should release the lock. The file
* system is also responsible to keep a corresponding posix lock when it
* grants a lock so the VFS can find out which locks are locally held and do
* the correct lock cleanup when required.
* The underlying filesystem must not drop the kernel lock or call
- * ->fl_grant() before returning to the caller with a -EINPROGRESS
+ * ->fl_grant() before returning to the caller with a FILE_LOCK_DEFERRED
* return code.
*/
int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
}
EXPORT_SYMBOL_GPL(vfs_lock_file);
+static int do_lock_file_wait(struct file *filp, unsigned int cmd,
+ struct file_lock *fl)
+{
+ int error;
+
+ error = security_file_lock(filp, fl->fl_type);
+ if (error)
+ return error;
+
+ for (;;) {
+ error = vfs_lock_file(filp, cmd, fl, NULL);
+ if (error != FILE_LOCK_DEFERRED)
+ break;
+ error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
+ if (!error)
+ continue;
+
+ locks_delete_block(fl);
+ break;
+ }
+
+ return error;
+}
+
/* Apply the lock described by l to an open file descriptor.
* This implements both the F_SETLK and F_SETLKW commands of fcntl().
*/
goto out;
}
- error = security_file_lock(filp, file_lock->fl_type);
- if (error)
- goto out;
-
- if (filp->f_op && filp->f_op->lock != NULL)
- error = filp->f_op->lock(filp, cmd, file_lock);
- else {
- for (;;) {
- error = posix_lock_file(filp, file_lock, NULL);
- if (error != -EAGAIN || cmd == F_SETLK)
- break;
- error = wait_event_interruptible(file_lock->fl_wait,
- !file_lock->fl_next);
- if (!error)
- continue;
-
- locks_delete_block(file_lock);
- break;
- }
- }
+ error = do_lock_file_wait(filp, cmd, file_lock);
/*
* Attempt to detect a close/fcntl race and recover by
goto out;
}
- error = security_file_lock(filp, file_lock->fl_type);
- if (error)
- goto out;
-
- if (filp->f_op && filp->f_op->lock != NULL)
- error = filp->f_op->lock(filp, cmd, file_lock);
- else {
- for (;;) {
- error = posix_lock_file(filp, file_lock, NULL);
- if (error != -EAGAIN || cmd == F_SETLK64)
- break;
- error = wait_event_interruptible(file_lock->fl_wait,
- !file_lock->fl_next);
- if (!error)
- continue;
-
- locks_delete_block(file_lock);
- break;
- }
- }
+ error = do_lock_file_wait(filp, cmd, file_lock);
/*
* Attempt to detect a close/fcntl race and recover by
if (!s->s_root)
goto out_iput;
- if (!NO_TRUNCATE)
- s->s_root->d_op = &minix_dentry_operations;
-
if (!(s->s_flags & MS_RDONLY)) {
if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
ms->s_state &= ~MINIX_VALID_FS;
#include <linux/pagemap.h>
#include <linux/minix_fs.h>
-/*
- * change the define below to 0 if you want names > info->s_namelen chars to be
- * truncated. Else they will be disallowed (ENAMETOOLONG).
- */
-#define NO_TRUNCATE 1
#define INODE_VERSION(inode) minix_sb(inode->i_sb)->s_version
#define MINIX_V1 0x0001 /* original minix fs */
#define MINIX_V2 0x0002 /* minix V2 fs */
extern const struct inode_operations minix_dir_inode_operations;
extern const struct file_operations minix_file_operations;
extern const struct file_operations minix_dir_operations;
-extern struct dentry_operations minix_dentry_operations;
static inline struct minix_sb_info *minix_sb(struct super_block *sb)
{
return err;
}
-static int minix_hash(struct dentry *dentry, struct qstr *qstr)
-{
- unsigned long hash;
- int i;
- const unsigned char *name;
-
- i = minix_sb(dentry->d_inode->i_sb)->s_namelen;
- if (i >= qstr->len)
- return 0;
- /* Truncate the name in place, avoids having to define a compare
- function. */
- qstr->len = i;
- name = qstr->name;
- hash = init_name_hash();
- while (i--)
- hash = partial_name_hash(*name++, hash);
- qstr->hash = end_name_hash(hash);
- return 0;
-}
-
-struct dentry_operations minix_dentry_operations = {
- .d_hash = minix_hash,
-};
-
static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
{
struct inode * inode = NULL;
/* Characters that are undesirable in an MS-DOS file name */
static unsigned char bad_chars[] = "*?<>|\"";
-static unsigned char bad_if_strict_pc[] = "+=,; ";
-/* GEMDOS is less restrictive */
-static unsigned char bad_if_strict_atari[] = " ";
-
-#define bad_if_strict(opts) \
- ((opts)->atari ? bad_if_strict_atari : bad_if_strict_pc)
+static unsigned char bad_if_strict[] = "+=,; ";
/***** Formats an MS-DOS file name. Rejects invalid names. */
static int msdos_format_name(const unsigned char *name, int len,
/* Get rid of dot - test for it elsewhere */
name++;
len--;
- } else if (!opts->atari)
+ } else
return -EINVAL;
}
/*
- * disallow names that _really_ start with a dot for MS-DOS,
- * GEMDOS does not care
+ * disallow names that _really_ start with a dot
*/
- space = !opts->atari;
+ space = 1;
c = 0;
for (walk = res; len && walk - res < 8; walk++) {
c = *name++;
len--;
if (opts->name_check != 'r' && strchr(bad_chars, c))
return -EINVAL;
- if (opts->name_check == 's' && strchr(bad_if_strict(opts), c))
+ if (opts->name_check == 's' && strchr(bad_if_strict, c))
return -EINVAL;
if (c >= 'A' && c <= 'Z' && opts->name_check == 's')
return -EINVAL;
if (opts->name_check != 'r' && strchr(bad_chars, c))
return -EINVAL;
if (opts->name_check == 's' &&
- strchr(bad_if_strict(opts), c))
+ strchr(bad_if_strict, c))
return -EINVAL;
if (c < ' ' || c == ':' || c == '\\')
return -EINVAL;
int is_dir, int is_hid, int cluster,
struct timespec *ts, struct fat_slot_info *sinfo)
{
+ struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb);
struct msdos_dir_entry de;
__le16 time, date;
int err;
if (is_hid)
de.attr |= ATTR_HIDDEN;
de.lcase = 0;
- fat_date_unix2dos(ts->tv_sec, &time, &date);
+ fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
de.cdate = de.adate = 0;
de.ctime = 0;
de.ctime_cs = 0;
#define NFSDDBG_FACILITY NFSDDBG_LOCKD
+#ifdef CONFIG_LOCKD_V4
+#define nlm_stale_fh nlm4_stale_fh
+#define nlm_failed nlm4_failed
+#else
+#define nlm_stale_fh nlm_lck_denied_nolocks
+#define nlm_failed nlm_lck_denied_nolocks
+#endif
/*
* Note: we hold the dentry use count while the file is open.
*/
return 0;
case nfserr_dropit:
return nlm_drop_reply;
-#ifdef CONFIG_LOCKD_V4
case nfserr_stale:
- return nlm4_stale_fh;
-#endif
+ return nlm_stale_fh;
default:
- return nlm_lck_denied;
+ return nlm_failed;
}
}
static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
whole_disk_show, NULL);
-void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
+int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
{
struct hd_struct *p;
int err;
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
- return;
+ return -ENOMEM;
if (!init_part_stats(p)) {
- kfree(p);
- return;
+ err = -ENOMEM;
+ goto out0;
}
p->start_sect = start;
p->nr_sects = len;
/* delay uevent until 'holders' subdir is created */
p->dev.uevent_suppress = 1;
- device_add(&p->dev);
+ err = device_add(&p->dev);
+ if (err)
+ goto out1;
partition_sysfs_add_subdir(p);
p->dev.uevent_suppress = 0;
- if (flags & ADDPART_FLAG_WHOLEDISK)
+ if (flags & ADDPART_FLAG_WHOLEDISK) {
err = device_create_file(&p->dev, &dev_attr_whole_disk);
+ if (err)
+ goto out2;
+ }
/* suppress uevent if the disk supresses it */
if (!disk->dev.uevent_suppress)
kobject_uevent(&p->dev.kobj, KOBJ_ADD);
+
+ return 0;
+
+out2:
+ device_del(&p->dev);
+out1:
+ put_device(&p->dev);
+ free_part_stats(p);
+out0:
+ kfree(p);
+ return err;
}
/* Not exported, helper to add_disk(). */
if (!size)
continue;
if (from + size > get_capacity(disk)) {
- printk(" %s: p%d exceeds device capacity\n",
+ printk(KERN_ERR " %s: p%d exceeds device capacity\n",
disk->disk_name, p);
+ continue;
+ }
+ res = add_partition(disk, p, from, size, state->parts[p].flags);
+ if (res) {
+ printk(KERN_ERR " %s: p%d could not be added: %d\n",
+ disk->disk_name, p, -res);
+ continue;
}
- add_partition(disk, p, from, size, state->parts[p].flags);
#ifdef CONFIG_BLK_DEV_MD
if (state->parts[p].flags & ADDPART_FLAG_RAID)
md_autodetect_dev(bdev->bd_dev+p);
#include "check.h"
#include "efi.h"
-#undef EFI_DEBUG
-#ifdef EFI_DEBUG
-#define Dprintk(x...) printk(KERN_DEBUG x)
-#else
-#define Dprintk(x...)
-#endif
-
/* This allows a kernel command line option 'gpt' to override
* the test for invalid PMBR. Not __initdata because reloading
* the partition tables happens after init too.
/* Check the GUID Partition Table signature */
if (le64_to_cpu((*gpt)->signature) != GPT_HEADER_SIGNATURE) {
- Dprintk("GUID Partition Table Header signature is wrong:"
- "%lld != %lld\n",
- (unsigned long long)le64_to_cpu((*gpt)->signature),
- (unsigned long long)GPT_HEADER_SIGNATURE);
+ pr_debug("GUID Partition Table Header signature is wrong:"
+ "%lld != %lld\n",
+ (unsigned long long)le64_to_cpu((*gpt)->signature),
+ (unsigned long long)GPT_HEADER_SIGNATURE);
goto fail;
}
crc = efi_crc32((const unsigned char *) (*gpt), le32_to_cpu((*gpt)->header_size));
if (crc != origcrc) {
- Dprintk
- ("GUID Partition Table Header CRC is wrong: %x != %x\n",
- crc, origcrc);
+ pr_debug("GUID Partition Table Header CRC is wrong: %x != %x\n",
+ crc, origcrc);
goto fail;
}
(*gpt)->header_crc32 = cpu_to_le32(origcrc);
/* Check that the my_lba entry points to the LBA that contains
* the GUID Partition Table */
if (le64_to_cpu((*gpt)->my_lba) != lba) {
- Dprintk("GPT my_lba incorrect: %lld != %lld\n",
- (unsigned long long)le64_to_cpu((*gpt)->my_lba),
- (unsigned long long)lba);
+ pr_debug("GPT my_lba incorrect: %lld != %lld\n",
+ (unsigned long long)le64_to_cpu((*gpt)->my_lba),
+ (unsigned long long)lba);
goto fail;
}
*/
lastlba = last_lba(bdev);
if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) {
- Dprintk("GPT: first_usable_lba incorrect: %lld > %lld\n",
- (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba),
- (unsigned long long)lastlba);
+ pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n",
+ (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba),
+ (unsigned long long)lastlba);
goto fail;
}
if (le64_to_cpu((*gpt)->last_usable_lba) > lastlba) {
- Dprintk("GPT: last_usable_lba incorrect: %lld > %lld\n",
- (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba),
- (unsigned long long)lastlba);
+ pr_debug("GPT: last_usable_lba incorrect: %lld > %lld\n",
+ (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba),
+ (unsigned long long)lastlba);
goto fail;
}
le32_to_cpu((*gpt)->sizeof_partition_entry));
if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) {
- Dprintk("GUID Partitition Entry Array CRC check failed.\n");
+ pr_debug("GUID Partitition Entry Array CRC check failed.\n");
goto fail_ptes;
}
return 0;
}
- Dprintk("GUID Partition Table is valid! Yea!\n");
+ pr_debug("GUID Partition Table is valid! Yea!\n");
for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) {
if (!is_pte_valid(&ptes[i], last_lba(bdev)))
bool is_vista = false;
BUG_ON(!data || !ph);
- if (MAGIC_PRIVHEAD != BE64(data)) {
+ if (MAGIC_PRIVHEAD != get_unaligned_be64(data)) {
ldm_error("Cannot find PRIVHEAD structure. LDM database is"
" corrupt. Aborting.");
return false;
}
- ph->ver_major = BE16(data + 0x000C);
- ph->ver_minor = BE16(data + 0x000E);
- ph->logical_disk_start = BE64(data + 0x011B);
- ph->logical_disk_size = BE64(data + 0x0123);
- ph->config_start = BE64(data + 0x012B);
- ph->config_size = BE64(data + 0x0133);
+ ph->ver_major = get_unaligned_be16(data + 0x000C);
+ ph->ver_minor = get_unaligned_be16(data + 0x000E);
+ ph->logical_disk_start = get_unaligned_be64(data + 0x011B);
+ ph->logical_disk_size = get_unaligned_be64(data + 0x0123);
+ ph->config_start = get_unaligned_be64(data + 0x012B);
+ ph->config_size = get_unaligned_be64(data + 0x0133);
/* Version 2.11 is Win2k/XP and version 2.12 is Vista. */
if (ph->ver_major == 2 && ph->ver_minor == 12)
is_vista = true;
{
BUG_ON (!data || !toc);
- if (MAGIC_TOCBLOCK != BE64 (data)) {
+ if (MAGIC_TOCBLOCK != get_unaligned_be64(data)) {
ldm_crit ("Cannot find TOCBLOCK, database may be corrupt.");
return false;
}
strncpy (toc->bitmap1_name, data + 0x24, sizeof (toc->bitmap1_name));
toc->bitmap1_name[sizeof (toc->bitmap1_name) - 1] = 0;
- toc->bitmap1_start = BE64 (data + 0x2E);
- toc->bitmap1_size = BE64 (data + 0x36);
+ toc->bitmap1_start = get_unaligned_be64(data + 0x2E);
+ toc->bitmap1_size = get_unaligned_be64(data + 0x36);
if (strncmp (toc->bitmap1_name, TOC_BITMAP1,
sizeof (toc->bitmap1_name)) != 0) {
}
strncpy (toc->bitmap2_name, data + 0x46, sizeof (toc->bitmap2_name));
toc->bitmap2_name[sizeof (toc->bitmap2_name) - 1] = 0;
- toc->bitmap2_start = BE64 (data + 0x50);
- toc->bitmap2_size = BE64 (data + 0x58);
+ toc->bitmap2_start = get_unaligned_be64(data + 0x50);
+ toc->bitmap2_size = get_unaligned_be64(data + 0x58);
if (strncmp (toc->bitmap2_name, TOC_BITMAP2,
sizeof (toc->bitmap2_name)) != 0) {
ldm_crit ("TOCBLOCK's second bitmap is '%s', should be '%s'.",
{
BUG_ON (!data || !vm);
- if (MAGIC_VMDB != BE32 (data)) {
+ if (MAGIC_VMDB != get_unaligned_be32(data)) {
ldm_crit ("Cannot find the VMDB, database may be corrupt.");
return false;
}
- vm->ver_major = BE16 (data + 0x12);
- vm->ver_minor = BE16 (data + 0x14);
+ vm->ver_major = get_unaligned_be16(data + 0x12);
+ vm->ver_minor = get_unaligned_be16(data + 0x14);
if ((vm->ver_major != 4) || (vm->ver_minor != 10)) {
ldm_error ("Expected VMDB version %d.%d, got %d.%d. "
"Aborting.", 4, 10, vm->ver_major, vm->ver_minor);
return false;
}
- vm->vblk_size = BE32 (data + 0x08);
- vm->vblk_offset = BE32 (data + 0x0C);
- vm->last_vblk_seq = BE32 (data + 0x04);
+ vm->vblk_size = get_unaligned_be32(data + 0x08);
+ vm->vblk_offset = get_unaligned_be32(data + 0x0C);
+ vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
ldm_debug ("Parsed VMDB successfully.");
return true;
goto out; /* Already logged */
/* Are there uncommitted transactions? */
- if (BE16(data + 0x10) != 0x01) {
+ if (get_unaligned_be16(data + 0x10) != 0x01) {
ldm_crit ("Database is not in a consistent state. Aborting.");
goto out;
}
return false;
len += VBLK_SIZE_CMP3;
- if (len != BE32 (buffer + 0x14))
+ if (len != get_unaligned_be32(buffer + 0x14))
return false;
comp = &vb->vblk.comp;
return false;
len += VBLK_SIZE_DGR3;
- if (len != BE32 (buffer + 0x14))
+ if (len != get_unaligned_be32(buffer + 0x14))
return false;
dgrp = &vb->vblk.dgrp;
return false;
len += VBLK_SIZE_DGR4;
- if (len != BE32 (buffer + 0x14))
+ if (len != get_unaligned_be32(buffer + 0x14))
return false;
dgrp = &vb->vblk.dgrp;
return false;
len += VBLK_SIZE_DSK3;
- if (len != BE32 (buffer + 0x14))
+ if (len != get_unaligned_be32(buffer + 0x14))
return false;
disk = &vb->vblk.disk;
return false;
len += VBLK_SIZE_DSK4;
- if (len != BE32 (buffer + 0x14))
+ if (len != get_unaligned_be32(buffer + 0x14))
return false;
disk = &vb->vblk.disk;
return false;
}
len += VBLK_SIZE_PRT3;
- if (len > BE32(buffer + 0x14)) {
+ if (len > get_unaligned_be32(buffer + 0x14)) {
ldm_error("len %d > BE32(buffer + 0x14) %d", len,
- BE32(buffer + 0x14));
+ get_unaligned_be32(buffer + 0x14));
return false;
}
part = &vb->vblk.part;
- part->start = BE64(buffer + 0x24 + r_name);
- part->volume_offset = BE64(buffer + 0x2C + r_name);
+ part->start = get_unaligned_be64(buffer + 0x24 + r_name);
+ part->volume_offset = get_unaligned_be64(buffer + 0x2C + r_name);
part->size = ldm_get_vnum(buffer + 0x34 + r_name);
part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size);
part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent);
return false;
}
len += VBLK_SIZE_VOL5;
- if (len > BE32(buffer + 0x14)) {
+ if (len > get_unaligned_be32(buffer + 0x14)) {
ldm_error("len %d > BE32(buffer + 0x14) %d", len,
- BE32(buffer + 0x14));
+ get_unaligned_be32(buffer + 0x14));
return false;
}
volu = &vb->vblk.volu;
BUG_ON (!data || !frags);
- group = BE32 (data + 0x08);
- rec = BE16 (data + 0x0C);
- num = BE16 (data + 0x0E);
+ group = get_unaligned_be32(data + 0x08);
+ rec = get_unaligned_be16(data + 0x0C);
+ num = get_unaligned_be16(data + 0x0E);
if ((num < 1) || (num > 4)) {
ldm_error ("A VBLK claims to have %d parts.", num);
return false;
}
for (v = 0; v < perbuf; v++, data+=size) { /* For each vblk */
- if (MAGIC_VBLK != BE32 (data)) {
+ if (MAGIC_VBLK != get_unaligned_be32(data)) {
ldm_error ("Expected to find a VBLK.");
goto out;
}
- recs = BE16 (data + 0x0E); /* Number of records */
+ recs = get_unaligned_be16(data + 0x0E); /* Number of records */
if (recs == 1) {
if (!ldm_ldmdb_add (data, size, ldb))
goto out; /* Already logged */
#define TOC_BITMAP1 "config" /* Names of the two defined */
#define TOC_BITMAP2 "log" /* bitmaps in the TOCBLOCK. */
-/* Most numbers we deal with are big-endian and won't be aligned. */
-#define BE16(x) ((u16)be16_to_cpu(get_unaligned((__be16*)(x))))
-#define BE32(x) ((u32)be32_to_cpu(get_unaligned((__be32*)(x))))
-#define BE64(x) ((u64)be64_to_cpu(get_unaligned((__be64*)(x))))
-
/* Borrowed from msdos.c */
#define SYS_IND(p) (get_unaligned(&(p)->sys_ind))
--- /dev/null
+config PROC_FS
+ bool "/proc file system support" if EMBEDDED
+ default y
+ help
+ This is a virtual file system providing information about the status
+ of the system. "Virtual" means that it doesn't take up any space on
+ your hard disk: the files are created on the fly by the kernel when
+ you try to access them. Also, you cannot read the files with older
+ version of the program less: you need to use more or cat.
+
+ It's totally cool; for example, "cat /proc/interrupts" gives
+ information about what the different IRQs are used for at the moment
+ (there is a small number of Interrupt ReQuest lines in your computer
+ that are used by the attached devices to gain the CPU's attention --
+ often a source of trouble if two devices are mistakenly configured
+ to use the same IRQ). The program procinfo to display some
+ information about your system gathered from the /proc file system.
+
+ Before you can use the /proc file system, it has to be mounted,
+ meaning it has to be given a location in the directory hierarchy.
+ That location should be /proc. A command such as "mount -t proc proc
+ /proc" or the equivalent line in /etc/fstab does the job.
+
+ The /proc file system is explained in the file
+ <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
+ ("man 5 proc").
+
+ This option will enlarge your kernel by about 67 KB. Several
+ programs depend on this, so everyone should say Y here.
+
+config PROC_KCORE
+ bool "/proc/kcore support" if !ARM
+ depends on PROC_FS && MMU
+
+config PROC_VMCORE
+ bool "/proc/vmcore support (EXPERIMENTAL)"
+ depends on PROC_FS && CRASH_DUMP
+ default y
+ help
+ Exports the dump image of crashed kernel in ELF format.
+
+config PROC_SYSCTL
+ bool "Sysctl support (/proc/sys)" if EMBEDDED
+ depends on PROC_FS
+ select SYSCTL
+ default y
+ ---help---
+ The sysctl interface provides a means of dynamically changing
+ certain kernel parameters and variables on the fly without requiring
+ a recompile of the kernel or reboot of the system. The primary
+ interface is through /proc/sys. If you say Y here a tree of
+ modifiable sysctl entries will be generated beneath the
+ /proc/sys directory. They are explained in the files
+ in <file:Documentation/sysctl/>. Note that enabling this
+ option will enlarge the kernel by at least 8 KB.
+
+ As it is generally a good thing, you should say Y here unless
+ building a kernel for install/rescue disks or your system is very
+ limited in memory.
}
#ifdef CONFIG_TASK_IO_ACCOUNTING
-static int proc_pid_io_accounting(struct task_struct *task, char *buffer)
-{
+static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
+{
+ u64 rchar, wchar, syscr, syscw;
+ struct task_io_accounting ioac;
+
+ if (!whole) {
+ rchar = task->rchar;
+ wchar = task->wchar;
+ syscr = task->syscr;
+ syscw = task->syscw;
+ memcpy(&ioac, &task->ioac, sizeof(ioac));
+ } else {
+ unsigned long flags;
+ struct task_struct *t = task;
+ rchar = wchar = syscr = syscw = 0;
+ memset(&ioac, 0, sizeof(ioac));
+
+ rcu_read_lock();
+ do {
+ rchar += t->rchar;
+ wchar += t->wchar;
+ syscr += t->syscr;
+ syscw += t->syscw;
+
+ ioac.read_bytes += t->ioac.read_bytes;
+ ioac.write_bytes += t->ioac.write_bytes;
+ ioac.cancelled_write_bytes +=
+ t->ioac.cancelled_write_bytes;
+ t = next_thread(t);
+ } while (t != task);
+ rcu_read_unlock();
+
+ if (lock_task_sighand(task, &flags)) {
+ struct signal_struct *sig = task->signal;
+
+ rchar += sig->rchar;
+ wchar += sig->wchar;
+ syscr += sig->syscr;
+ syscw += sig->syscw;
+
+ ioac.read_bytes += sig->ioac.read_bytes;
+ ioac.write_bytes += sig->ioac.write_bytes;
+ ioac.cancelled_write_bytes +=
+ sig->ioac.cancelled_write_bytes;
+
+ unlock_task_sighand(task, &flags);
+ }
+ }
+
return sprintf(buffer,
-#ifdef CONFIG_TASK_XACCT
"rchar: %llu\n"
"wchar: %llu\n"
"syscr: %llu\n"
"syscw: %llu\n"
-#endif
"read_bytes: %llu\n"
"write_bytes: %llu\n"
"cancelled_write_bytes: %llu\n",
-#ifdef CONFIG_TASK_XACCT
- (unsigned long long)task->rchar,
- (unsigned long long)task->wchar,
- (unsigned long long)task->syscr,
- (unsigned long long)task->syscw,
-#endif
- (unsigned long long)task->ioac.read_bytes,
- (unsigned long long)task->ioac.write_bytes,
- (unsigned long long)task->ioac.cancelled_write_bytes);
+ (unsigned long long)rchar,
+ (unsigned long long)wchar,
+ (unsigned long long)syscr,
+ (unsigned long long)syscw,
+ (unsigned long long)ioac.read_bytes,
+ (unsigned long long)ioac.write_bytes,
+ (unsigned long long)ioac.cancelled_write_bytes);
+}
+
+static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
+{
+ return do_io_accounting(task, buffer, 0);
}
-#endif
+
+static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
+{
+ return do_io_accounting(task, buffer, 1);
+}
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
/*
* Thread groups
REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter),
#endif
#ifdef CONFIG_TASK_IO_ACCOUNTING
- INF("io", S_IRUGO, pid_io_accounting),
+ INF("io", S_IRUGO, tgid_io_accounting),
#endif
};
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
#endif
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+ INF("io", S_IRUGO, tid_io_accounting),
+#endif
};
static int proc_tid_base_readdir(struct file * filp,
ent->pde_users = 0;
spin_lock_init(&ent->pde_unload_lock);
ent->pde_unload_completion = NULL;
+ INIT_LIST_HEAD(&ent->pde_openers);
out:
return ent;
}
spin_unlock(&de->pde_unload_lock);
continue_removing:
+ spin_lock(&de->pde_unload_lock);
+ while (!list_empty(&de->pde_openers)) {
+ struct pde_opener *pdeo;
+
+ pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
+ list_del(&pdeo->lh);
+ spin_unlock(&de->pde_unload_lock);
+ pdeo->release(pdeo->inode, pdeo->file);
+ kfree(pdeo);
+ spin_lock(&de->pde_unload_lock);
+ }
+ spin_unlock(&de->pde_unload_lock);
+
if (S_ISDIR(de->mode))
parent->nlink--;
de->nlink = 0;
return 0;
}
-static int proc_remount(struct super_block *sb, int *flags, char *data)
-{
- *flags |= MS_NODIRATIME;
- return 0;
-}
-
static const struct super_operations proc_sops = {
.alloc_inode = proc_alloc_inode,
.destroy_inode = proc_destroy_inode,
.drop_inode = generic_delete_inode,
.delete_inode = proc_delete_inode,
.statfs = simple_statfs,
- .remount_fs = proc_remount,
};
-static void pde_users_dec(struct proc_dir_entry *pde)
+static void __pde_users_dec(struct proc_dir_entry *pde)
{
- spin_lock(&pde->pde_unload_lock);
pde->pde_users--;
if (pde->pde_unload_completion && pde->pde_users == 0)
complete(pde->pde_unload_completion);
+}
+
+static void pde_users_dec(struct proc_dir_entry *pde)
+{
+ spin_lock(&pde->pde_unload_lock);
+ __pde_users_dec(pde);
spin_unlock(&pde->pde_unload_lock);
}
struct proc_dir_entry *pde = PDE(inode);
int rv = 0;
int (*open)(struct inode *, struct file *);
+ int (*release)(struct inode *, struct file *);
+ struct pde_opener *pdeo;
+
+ /*
+ * What for, you ask? Well, we can have open, rmmod, remove_proc_entry
+ * sequence. ->release won't be called because ->proc_fops will be
+ * cleared. Depending on complexity of ->release, consequences vary.
+ *
+ * We can't wait for mercy when close will be done for real, it's
+ * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release
+ * by hand in remove_proc_entry(). For this, save opener's credentials
+ * for later.
+ */
+ pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
+ if (!pdeo)
+ return -ENOMEM;
spin_lock(&pde->pde_unload_lock);
if (!pde->proc_fops) {
spin_unlock(&pde->pde_unload_lock);
+ kfree(pdeo);
return rv;
}
pde->pde_users++;
open = pde->proc_fops->open;
+ release = pde->proc_fops->release;
spin_unlock(&pde->pde_unload_lock);
if (open)
rv = open(inode, file);
- pde_users_dec(pde);
+ spin_lock(&pde->pde_unload_lock);
+ if (rv == 0 && release) {
+ /* To know what to release. */
+ pdeo->inode = inode;
+ pdeo->file = file;
+ /* Strictly for "too late" ->release in proc_reg_release(). */
+ pdeo->release = release;
+ list_add(&pdeo->lh, &pde->pde_openers);
+ } else
+ kfree(pdeo);
+ __pde_users_dec(pde);
+ spin_unlock(&pde->pde_unload_lock);
return rv;
}
+static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde,
+ struct inode *inode, struct file *file)
+{
+ struct pde_opener *pdeo;
+
+ list_for_each_entry(pdeo, &pde->pde_openers, lh) {
+ if (pdeo->inode == inode && pdeo->file == file)
+ return pdeo;
+ }
+ return NULL;
+}
+
static int proc_reg_release(struct inode *inode, struct file *file)
{
struct proc_dir_entry *pde = PDE(inode);
int rv = 0;
int (*release)(struct inode *, struct file *);
+ struct pde_opener *pdeo;
spin_lock(&pde->pde_unload_lock);
+ pdeo = find_pde_opener(pde, inode, file);
if (!pde->proc_fops) {
- spin_unlock(&pde->pde_unload_lock);
+ /*
+ * Can't simply exit, __fput() will think that everything is OK,
+ * and move on to freeing struct file. remove_proc_entry() will
+ * find slacker in opener's list and will try to do non-trivial
+ * things with struct file. Therefore, remove opener from list.
+ *
+ * But if opener is removed from list, who will ->release it?
+ */
+ if (pdeo) {
+ list_del(&pdeo->lh);
+ spin_unlock(&pde->pde_unload_lock);
+ rv = pdeo->release(inode, file);
+ kfree(pdeo);
+ } else
+ spin_unlock(&pde->pde_unload_lock);
return rv;
}
pde->pde_users++;
release = pde->proc_fops->release;
+ if (pdeo) {
+ list_del(&pdeo->lh);
+ kfree(pdeo);
+ }
spin_unlock(&pde->pde_unload_lock);
if (release)
extern const struct file_operations proc_clear_refs_operations;
extern const struct file_operations proc_pagemap_operations;
extern const struct file_operations proc_net_operations;
+extern const struct file_operations proc_kmsg_operations;
extern const struct inode_operations proc_net_inode_operations;
void free_proc_entry(struct proc_dir_entry *de);
struct dentry *dentry);
int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
filldir_t filldir);
+
+struct pde_opener {
+ struct inode *inode;
+ struct file *file;
+ int (*release)(struct inode *, struct file *);
+ struct list_head lh;
+};
#define CORE_STR "CORE"
+#ifndef ELF_CORE_EFLAGS
+#define ELF_CORE_EFLAGS 0
+#endif
+
static int open_kcore(struct inode * inode, struct file * filp)
{
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
elf->e_entry = 0;
elf->e_phoff = sizeof(struct elfhdr);
elf->e_shoff = 0;
-#if defined(CONFIG_H8300)
- elf->e_flags = ELF_FLAGS;
-#else
- elf->e_flags = 0;
-#endif
+ elf->e_flags = ELF_CORE_EFLAGS;
elf->e_ehsize = sizeof(struct elfhdr);
elf->e_phentsize= sizeof(struct elf_phdr);
elf->e_phnum = nphdr;
#include <asm/uaccess.h>
#include <asm/io.h>
+#include "internal.h"
+
extern wait_queue_head_t log_wait;
extern int do_syslog(int type, char __user *bug, int count);
void sync_dquots(struct super_block *sb, int type)
{
- int cnt, dirty;
+ int cnt;
if (sb) {
if (sb->s_qcop->quota_sync)
restart:
list_for_each_entry(sb, &super_blocks, s_list) {
/* This test just improves performance so it needn't be reliable... */
- for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
- if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
- && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
- dirty = 1;
- if (!dirty)
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ if (type != -1 && type != cnt)
+ continue;
+ if (!sb_has_quota_enabled(sb, cnt))
+ continue;
+ if (!info_dirty(&sb_dqopt(sb)->info[cnt]) &&
+ list_empty(&sb_dqopt(sb)->info[cnt].dqi_dirty_list))
+ continue;
+ break;
+ }
+ if (cnt == MAXQUOTAS)
continue;
sb->s_count++;
spin_unlock(&sb_lock);
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/quota.h>
+#include <linux/quotaops.h>
#include <linux/dqblk_v1.h>
#include <linux/quotaio_v1.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/quotaops.h>
#include <asm/byteorder.h>
** from within kupdate, it will ignore the immediate flag
*/
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
#include <linux/time.h>
#include <linux/semaphore.h>
-
#include <linux/vmalloc.h>
#include <linux/reiserfs_fs.h>
-
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/fcntl.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
+#include <linux/uaccess.h>
+
+#include <asm/system.h>
/* gets a struct reiserfs_journal_list * from a list head */
#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
static inline void lock_journal(struct super_block *p_s_sb)
{
PROC_INFO_INC(p_s_sb, journal.lock_journal);
- down(&SB_JOURNAL(p_s_sb)->j_lock);
+ mutex_lock(&SB_JOURNAL(p_s_sb)->j_mutex);
}
/* unlock the current transaction */
static inline void unlock_journal(struct super_block *p_s_sb)
{
- up(&SB_JOURNAL(p_s_sb)->j_lock);
+ mutex_unlock(&SB_JOURNAL(p_s_sb)->j_mutex);
}
static inline void get_journal_list(struct reiserfs_journal_list *jl)
}
/* make sure nobody is trying to flush this one at the same time */
- down(&jl->j_commit_lock);
+ mutex_lock(&jl->j_commit_mutex);
if (!journal_list_still_alive(s, trans_id)) {
- up(&jl->j_commit_lock);
+ mutex_unlock(&jl->j_commit_mutex);
goto put_jl;
}
BUG_ON(jl->j_trans_id == 0);
if (flushall) {
atomic_set(&(jl->j_older_commits_done), 1);
}
- up(&jl->j_commit_lock);
+ mutex_unlock(&jl->j_commit_mutex);
goto put_jl;
}
if (flushall) {
atomic_set(&(jl->j_older_commits_done), 1);
}
- up(&jl->j_commit_lock);
+ mutex_unlock(&jl->j_commit_mutex);
put_jl:
put_journal_list(s, jl);
/* if flushall == 0, the lock is already held */
if (flushall) {
- down(&journal->j_flush_sem);
- } else if (!down_trylock(&journal->j_flush_sem)) {
+ mutex_lock(&journal->j_flush_mutex);
+ } else if (mutex_trylock(&journal->j_flush_mutex)) {
BUG();
}
jl->j_state = 0;
put_journal_list(s, jl);
if (flushall)
- up(&journal->j_flush_sem);
+ mutex_unlock(&journal->j_flush_mutex);
put_fs_excl();
return err;
}
struct reiserfs_journal *journal = SB_JOURNAL(s);
chunk.nr = 0;
- down(&journal->j_flush_sem);
+ mutex_lock(&journal->j_flush_mutex);
if (!journal_list_still_alive(s, orig_trans_id)) {
goto done;
}
- /* we've got j_flush_sem held, nobody is going to delete any
+ /* we've got j_flush_mutex held, nobody is going to delete any
* of these lists out from underneath us
*/
while ((num_trans && transactions_flushed < num_trans) ||
}
done:
- up(&journal->j_flush_sem);
+ mutex_unlock(&journal->j_flush_mutex);
return ret;
}
INIT_LIST_HEAD(&jl->j_working_list);
INIT_LIST_HEAD(&jl->j_tail_bh_list);
INIT_LIST_HEAD(&jl->j_bh_list);
- sema_init(&jl->j_commit_lock, 1);
+ mutex_init(&jl->j_commit_mutex);
SB_JOURNAL(s)->j_num_lists++;
get_journal_list(jl);
return jl;
journal->j_last = NULL;
journal->j_first = NULL;
init_waitqueue_head(&(journal->j_join_wait));
- sema_init(&journal->j_lock, 1);
- sema_init(&journal->j_flush_sem, 1);
+ mutex_init(&journal->j_mutex);
+ mutex_init(&journal->j_flush_mutex);
journal->j_trans_id = 10;
journal->j_mount_id = 10;
* the new transaction is fully setup, and we've already flushed the
* ordered bh list
*/
- down(&jl->j_commit_lock);
+ mutex_lock(&jl->j_commit_mutex);
/* save the transaction id in case we need to commit it later */
commit_trans_id = jl->j_trans_id;
lock_kernel();
}
BUG_ON(!list_empty(&jl->j_tail_bh_list));
- up(&jl->j_commit_lock);
+ mutex_unlock(&jl->j_commit_mutex);
/* honor the flush wishes from the caller, simple commits can
** be done outside the journal lock, they are done below
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/exportfs.h>
+#include <linux/quotaops.h>
#include <linux/vfs.h>
#include <linux/mnt_namespace.h>
#include <linux/mount.h>
int ret = reiserfs_quota_on_mount(s, i);
if (ret < 0)
reiserfs_warning(s,
- "reiserfs: cannot turn on journalled quota: error %d",
+ "reiserfs: cannot turn on journaled quota: error %d",
ret);
}
}
mount options were selected. */
unsigned long *blocks, /* strtol-ed from NNN of resize=NNN */
char **jdev_name,
- unsigned int *commit_max_age)
+ unsigned int *commit_max_age,
+ char **qf_names,
+ unsigned int *qfmt)
{
int c;
char *arg = NULL;
if (c == 'u' || c == 'g') {
int qtype = c == 'u' ? USRQUOTA : GRPQUOTA;
- if (sb_any_quota_enabled(s)) {
+ if ((sb_any_quota_enabled(s) ||
+ sb_any_quota_suspended(s)) &&
+ (!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) {
reiserfs_warning(s,
- "reiserfs_parse_options: cannot change journalled quota options when quota turned on.");
+ "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
return 0;
}
if (*arg) { /* Some filename specified? */
"reiserfs_parse_options: quotafile must be on filesystem root.");
return 0;
}
- REISERFS_SB(s)->s_qf_names[qtype] =
+ qf_names[qtype] =
kmalloc(strlen(arg) + 1, GFP_KERNEL);
- if (!REISERFS_SB(s)->s_qf_names[qtype]) {
+ if (!qf_names[qtype]) {
reiserfs_warning(s,
"reiserfs_parse_options: not enough memory for storing quotafile name.");
return 0;
}
- strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg);
+ strcpy(qf_names[qtype], arg);
*mount_options |= 1 << REISERFS_QUOTA;
} else {
- kfree(REISERFS_SB(s)->s_qf_names[qtype]);
- REISERFS_SB(s)->s_qf_names[qtype] = NULL;
+ if (qf_names[qtype] !=
+ REISERFS_SB(s)->s_qf_names[qtype])
+ kfree(qf_names[qtype]);
+ qf_names[qtype] = NULL;
}
}
if (c == 'f') {
if (!strcmp(arg, "vfsold"))
- REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_OLD;
+ *qfmt = QFMT_VFS_OLD;
else if (!strcmp(arg, "vfsv0"))
- REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_V0;
+ *qfmt = QFMT_VFS_V0;
else {
reiserfs_warning(s,
"reiserfs_parse_options: unknown quota format specified.");
return 0;
}
+ if ((sb_any_quota_enabled(s) ||
+ sb_any_quota_suspended(s)) &&
+ *qfmt != REISERFS_SB(s)->s_jquota_fmt) {
+ reiserfs_warning(s,
+ "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
+ return 0;
+ }
}
#else
if (c == 'u' || c == 'g' || c == 'f') {
reiserfs_warning(s,
- "reiserfs_parse_options: journalled quota options not supported.");
+ "reiserfs_parse_options: journaled quota options not supported.");
return 0;
}
#endif
}
#ifdef CONFIG_QUOTA
- if (!REISERFS_SB(s)->s_jquota_fmt
- && (REISERFS_SB(s)->s_qf_names[USRQUOTA]
- || REISERFS_SB(s)->s_qf_names[GRPQUOTA])) {
+ if (!REISERFS_SB(s)->s_jquota_fmt && !*qfmt
+ && (qf_names[USRQUOTA] || qf_names[GRPQUOTA])) {
reiserfs_warning(s,
- "reiserfs_parse_options: journalled quota format not specified.");
+ "reiserfs_parse_options: journaled quota format not specified.");
return 0;
}
/* This checking is not precise wrt the quota type but for our purposes it is sufficient */
}
}
+#ifdef CONFIG_QUOTA
+static void handle_quota_files(struct super_block *s, char **qf_names,
+ unsigned int *qfmt)
+{
+ int i;
+
+ for (i = 0; i < MAXQUOTAS; i++) {
+ if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i])
+ kfree(REISERFS_SB(s)->s_qf_names[i]);
+ REISERFS_SB(s)->s_qf_names[i] = qf_names[i];
+ }
+ REISERFS_SB(s)->s_jquota_fmt = *qfmt;
+}
+#endif
+
static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
{
struct reiserfs_super_block *rs;
struct reiserfs_journal *journal = SB_JOURNAL(s);
char *new_opts = kstrdup(arg, GFP_KERNEL);
int err;
+ char *qf_names[MAXQUOTAS];
+ unsigned int qfmt = 0;
#ifdef CONFIG_QUOTA
int i;
+
+ memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names));
#endif
rs = SB_DISK_SUPER_BLOCK(s);
if (!reiserfs_parse_options
- (s, arg, &mount_options, &blocks, NULL, &commit_max_age)) {
+ (s, arg, &mount_options, &blocks, NULL, &commit_max_age,
+ qf_names, &qfmt)) {
#ifdef CONFIG_QUOTA
- for (i = 0; i < MAXQUOTAS; i++) {
- kfree(REISERFS_SB(s)->s_qf_names[i]);
- REISERFS_SB(s)->s_qf_names[i] = NULL;
- }
+ for (i = 0; i < MAXQUOTAS; i++)
+ if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i])
+ kfree(qf_names[i]);
#endif
err = -EINVAL;
goto out_err;
}
+#ifdef CONFIG_QUOTA
+ handle_quota_files(s, qf_names, &qfmt);
+#endif
handle_attrs(s);
char *jdev_name;
struct reiserfs_sb_info *sbi;
int errval = -EINVAL;
+ char *qf_names[MAXQUOTAS] = {};
+ unsigned int qfmt = 0;
save_mount_options(s, data);
jdev_name = NULL;
if (reiserfs_parse_options
(s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
- &commit_max_age) == 0) {
+ &commit_max_age, qf_names, &qfmt) == 0) {
goto error;
}
+#ifdef CONFIG_QUOTA
+ handle_quota_files(s, qf_names, &qfmt);
+#endif
if (blocks) {
SWARN(silent, s, "jmacd-7: reiserfs_fill_super: resize option "
return (0);
- error:
+error:
if (jinit_done) { /* kill the commit thread, free journal ram */
journal_release_error(NULL, s);
}
#ifdef CONFIG_QUOTA
{
int j;
- for (j = 0; j < MAXQUOTAS; j++) {
- kfree(sbi->s_qf_names[j]);
- sbi->s_qf_names[j] = NULL;
- }
+ for (j = 0; j < MAXQUOTAS; j++)
+ kfree(qf_names[j]);
}
#endif
kfree(sbi);
static int reiserfs_mark_dquot_dirty(struct dquot *dquot)
{
- /* Are we journalling quotas? */
+ /* Are we journaling quotas? */
if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
dquot_mark_dquot_dirty(dquot);
int err;
struct nameidata nd;
struct inode *inode;
+ struct reiserfs_transaction_handle th;
if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA)))
return -EINVAL;
}
mark_inode_dirty(inode);
}
- /* Not journalling quota? No more tests needed... */
- if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] &&
- !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) {
- path_put(&nd.path);
- return vfs_quota_on(sb, type, format_id, path, 0);
- }
- /* Quotafile not of fs root? */
- if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
- reiserfs_warning(sb,
+ /* Journaling quota? */
+ if (REISERFS_SB(sb)->s_qf_names[type]) {
+ /* Quotafile not of fs root? */
+ if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
+ reiserfs_warning(sb,
"reiserfs: Quota file not on filesystem root. "
"Journalled quota will not work.");
+ }
+
+ /*
+ * When we journal data on quota file, we have to flush journal to see
+ * all updates to the file when we bypass pagecache...
+ */
+ if (reiserfs_file_data_log(inode)) {
+ /* Just start temporary transaction and finish it */
+ err = journal_begin(&th, sb, 1);
+ if (err)
+ return err;
+ err = journal_end_sync(&th, sb, 1);
+ if (err)
+ return err;
+ }
path_put(&nd.path);
return vfs_quota_on(sb, type, format_id, path, 0);
}
#include <linux/reiserfs_xattr.h>
#include <asm/uaccess.h>
-#define XATTR_SECURITY_PREFIX "security."
-
static int
security_get(struct inode *inode, const char *name, void *buffer, size_t size)
{
#include <linux/reiserfs_xattr.h>
#include <asm/uaccess.h>
-#define XATTR_TRUSTED_PREFIX "trusted."
-
static int
trusted_get(struct inode *inode, const char *name, void *buffer, size_t size)
{
# include <linux/reiserfs_acl.h>
#endif
-#define XATTR_USER_PREFIX "user."
-
static int
user_get(struct inode *inode, const char *name, void *buffer, size_t size)
{
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/mm.h>
-#include <linux/dirent.h>
#include <linux/smb_fs.h>
#include <linux/pagemap.h>
#include <linux/net.h>
#include <linux/stat.h>
#include <linux/fcntl.h>
#include <linux/dcache.h>
-#include <linux/dirent.h>
#include <linux/nls.h>
#include <linux/smp_lock.h>
#include <linux/net.h>
#include <linux/errno.h>
#include <linux/fs.h>
+#include <linux/quotaops.h>
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/stat.h>
memcpy(de->name, msdos_name, MSDOS_NAME);
de->attr = is_dir ? ATTR_DIR : ATTR_ARCH;
de->lcase = lcase;
- fat_date_unix2dos(ts->tv_sec, &time, &date);
+ fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
de->time = de->ctime = time;
de->date = de->cdate = de->adate = date;
de->ctime_cs = 0;
+++ /dev/null
-#ifndef __LINUX_KVM_ALPHA_H
-#define __LINUX_KVM_ALPHA_H
-
-/* alpha does not support KVM */
-
-#endif
#define current_thread_info() __current_thread_info
/* Thread information allocation. */
+#define THREAD_SIZE_ORDER 1
#define THREAD_SIZE (2*PAGE_SIZE)
-#define alloc_thread_info(tsk) \
- ((struct thread_info *) __get_free_pages(GFP_KERNEL,1))
-#define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
#endif /* __ASSEMBLY__ */
+++ /dev/null
-#ifndef __LINUX_KVM_ARM_H
-#define __LINUX_KVM_ARM_H
-
-/* arm does not support KVM */
-
-#endif
return 0;
}
-#endif /* __KERNEL__ */
-
#define pc_pointer(v) \
((v) & ~PCMASK)
#define profile_pc(regs) instruction_pointer(regs)
#endif
-#ifdef __KERNEL__
#define predicate(x) ((x) & 0xf0000000)
#define PREDICATE_ALWAYS 0xe0000000
-#endif
+
+#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
}
-/* thread information allocation */
-#ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk) \
- ((struct thread_info *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, \
- THREAD_SIZE_ORDER))
-#else
-#define alloc_thread_info(tsk) \
- ((struct thread_info *)__get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER))
-#endif
-
-#define free_thread_info(info) \
- free_pages((unsigned long)info, THREAD_SIZE_ORDER);
-
#define thread_saved_pc(tsk) \
((unsigned long)(pc_pointer(task_thread_info(tsk)->cpu_context.pc)))
#define thread_saved_fp(tsk) \
+++ /dev/null
-#ifndef __LINUX_KVM_AVR32_H
-#define __LINUX_KVM_AVR32_H
-
-/* avr32 does not support KVM */
-
-#endif
return (struct thread_info *)addr;
}
-/* thread information allocation */
-#define alloc_thread_info(ti) \
- ((struct thread_info *) __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER))
-#define free_thread_info(ti) free_pages((unsigned long)(ti), 1)
#define get_thread_info(ti) get_task_struct((ti)->task)
#define put_thread_info(ti) put_task_struct((ti)->task)
+++ /dev/null
-#ifndef __LINUX_KVM_BLACKFIN_H
-#define __LINUX_KVM_BLACKFIN_H
-
-/* blackfin does not support KVM */
-
-#endif
#define PTRACE_GETREGS 12
#define PTRACE_SETREGS 13 /* ptrace signal */
-#ifdef CONFIG_BINFMT_ELF_FDPIC
#define PTRACE_GETFDPIC 31
#define PTRACE_GETFDPIC_EXEC 0
#define PTRACE_GETFDPIC_INTERP 1
-#endif
#define PS_S (0x0002)
+#ifdef __KERNEL__
+
/* user_mode returns true if only one bit is set in IPEND, other than the
master interrupt enable. */
#define user_mode(regs) (!(((regs)->ipend & ~0x10) & (((regs)->ipend & ~0x10) - 1)))
#define profile_pc(regs) instruction_pointer(regs)
extern void show_regs(struct pt_regs *);
+#endif /* __KERNEL__ */
+
#endif /* __ASSEMBLY__ */
/*
/*
* Size of kernel stack for each process. This must be a power of 2...
*/
+#define THREAD_SIZE_ORDER 1
#define THREAD_SIZE 8192 /* 2 pages */
#ifndef __ASSEMBLY__
return (struct thread_info *)((long)ti & ~((long)THREAD_SIZE-1));
}
-/* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) \
- __get_free_pages(GFP_KERNEL, 1))
-#define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
#endif /* __ASSEMBLY__ */
/*
-header-y += ptrace.h
header-y += user.h
header-y += svinto.h
header-y += sv_addr_ag.h
unsigned long return_ip; /* ip that _resume will return to */
};
+#ifdef __KERNEL__
+
/* bit 8 is user-mode flag */
#define user_mode(regs) (((regs)->dccr & 0x100) != 0)
#define instruction_pointer(regs) ((regs)->irp)
#define profile_pc(regs) instruction_pointer(regs)
extern void show_regs(struct pt_regs *);
+#endif /* __KERNEL__ */
+
#endif
-header-y += ptrace.h
header-y += user.h
header-y += cryptocop.h
unsigned long return_ip; /* ip that _resume will return to */
};
+#ifdef __KERNEL__
+
#define user_mode(regs) (((regs)->ccs & (1 << (U_CCS_BITNR + CCS_SHIFT))) != 0)
#define instruction_pointer(regs) ((regs)->erp)
extern void show_regs(struct pt_regs *);
#define profile_pc(regs) instruction_pointer(regs)
+#endif /* __KERNEL__ */
+
#endif
+++ /dev/null
-#ifndef __LINUX_KVM_CRIS_H
-#define __LINUX_KVM_CRIS_H
-
-/* cris does not support KVM */
-
-#endif
#include <asm/arch/ptrace.h>
#ifdef __KERNEL__
+
/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
#define PTRACE_GETREGS 12
#define PTRACE_SETREGS 13
-#endif
#define profile_pc(regs) instruction_pointer(regs)
+#endif /* __KERNEL__ */
+
#endif /* _CRIS_PTRACE_H */
#ifdef __KERNEL__
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
#ifndef __ASSEMBLY__
#include <asm/types.h>
#include <asm/processor.h>
header-y += registers.h
unifdef-y += termios.h
-unifdef-y += ptrace.h
+++ /dev/null
-#ifndef __LINUX_KVM_FRV_H
-#define __LINUX_KVM_FRV_H
-
-/* frv does not support KVM */
-
-#endif
#define current_thread_info() ({ __current_thread_info; })
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
/* thread information allocation */
#ifdef CONFIG_DEBUG_STACK_USAGE
#define alloc_thread_info(tsk) \
+ifneq ($(wildcard $(srctree)/include/asm-$(SRCARCH)/kvm.h),)
header-y += kvm.h
+endif
ifneq ($(wildcard $(srctree)/include/asm-$(SRCARCH)/a.out.h),)
unifdef-y += a.out.h
#ifndef __WARN
#ifndef __ASSEMBLY__
extern void warn_on_slowpath(const char *file, const int line);
+extern void warn_slowpath(const char *file, const int line,
+ const char *fmt, ...) __attribute__((format(printf, 3, 4)));
#define WANT_WARN_ON_SLOWPATH
#endif
#define __WARN() warn_on_slowpath(__FILE__, __LINE__)
+#define __WARN_printf(arg...) warn_slowpath(__FILE__, __LINE__, arg)
+#else
+#define __WARN_printf(arg...) __WARN()
#endif
#ifndef WARN_ON
})
#endif
+#ifndef WARN
+#define WARN(condition, format...) ({ \
+ int __ret_warn_on = !!(condition); \
+ if (unlikely(__ret_warn_on)) \
+ __WARN_printf(format); \
+ unlikely(__ret_warn_on); \
+})
+#endif
+
#else /* !CONFIG_BUG */
#ifndef HAVE_ARCH_BUG
#define BUG()
unlikely(__ret_warn_on); \
})
#endif
+
+#ifndef WARN
+#define WARN(condition, format...) ({ \
+ int __ret_warn_on = !!(condition); \
+ unlikely(__ret_warn_on); \
+})
+#endif
+
#endif
#define WARN_ON_ONCE(condition) ({ \
unlikely(__ret_warn_once); \
})
+#define WARN_ON_RATELIMIT(condition, state) \
+ WARN_ON((condition) && __ratelimit(state))
+
#ifdef CONFIG_SMP
# define WARN_ON_SMP(x) WARN_ON(x)
#else
#include <linux/types.h>
-#ifdef CONFIG_HAVE_GPIO_LIB
+#ifdef CONFIG_GPIOLIB
#include <linux/compiler.h>
/**
* struct gpio_chip - abstract a GPIO controller
* @label: for diagnostics
+ * @dev: optional device providing the GPIOs
+ * @owner: helps prevent removal of modules exporting active GPIOs
* @direction_input: configures signal "offset" as input, or returns error
* @get: returns value for signal "offset"; for output signals this
* returns either the value actually sensed, or zero
*/
struct gpio_chip {
char *label;
+ struct device *dev;
struct module *owner;
int (*direction_input)(struct gpio_chip *chip,
int base;
u16 ngpio;
unsigned can_sleep:1;
+ unsigned exported:1;
};
extern const char *gpiochip_is_requested(struct gpio_chip *chip,
extern int __gpio_cansleep(unsigned gpio);
-#else
+#ifdef CONFIG_GPIO_SYSFS
+
+/*
+ * A sysfs interface can be exported by individual drivers if they want,
+ * but more typically is configured entirely from userspace.
+ */
+extern int gpio_export(unsigned gpio, bool direction_may_change);
+extern void gpio_unexport(unsigned gpio);
+
+#endif /* CONFIG_GPIO_SYSFS */
+
+#else /* !CONFIG_HAVE_GPIO_LIB */
static inline int gpio_is_valid(int number)
{
gpio_set_value(gpio, value);
}
-#endif
+#endif /* !CONFIG_HAVE_GPIO_LIB */
+
+#ifndef CONFIG_GPIO_SYSFS
+
+/* sysfs support is only available with gpiolib, where it's optional */
+
+static inline int gpio_export(unsigned gpio, bool direction_may_change)
+{
+ return -ENOSYS;
+}
+
+static inline void gpio_unexport(unsigned gpio)
+{
+}
+#endif /* CONFIG_GPIO_SYSFS */
#endif /* _ASM_GENERIC_GPIO_H */
#ifdef __GNUC__
__extension__ typedef __signed__ long long __s64;
__extension__ typedef unsigned long long __u64;
-#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#else
typedef __signed__ long long __s64;
typedef unsigned long long __u64;
#endif
#define ELF_DATA ELFDATA2MSB
#define ELF_ARCH EM_H8_300
#if defined(__H8300H__)
-#define ELF_FLAGS 0x810000
+#define ELF_CORE_EFLAGS 0x810000
#endif
#if defined(__H8300S__)
-#define ELF_FLAGS 0x820000
+#define ELF_CORE_EFLAGS 0x820000
#endif
#define ELF_PLAT_INIT(_r) _r->er1 = 0
+++ /dev/null
-#ifndef __LINUX_KVM_H8300_H
-#define __LINUX_KVM_H8300_H
-
-/* h8300 does not support KVM */
-
-#endif
/*
* Size of kernel stack for each process. This must be a power of 2...
*/
+#define THREAD_SIZE_ORDER 1
#define THREAD_SIZE 8192 /* 2 pages */
return ti;
}
-/* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) \
- __get_free_pages(GFP_KERNEL, 1))
-#define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
#endif /* __ASSEMBLY__ */
/*
}, \
}
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
#ifndef ASM_OFFSETS_C
/* how to get the thread information struct from C */
#define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE))
+++ /dev/null
-#ifndef __LINUX_KVM_M32R_H
-#define __LINUX_KVM_M32R_H
-
-/* m32r does not support KVM */
-
-#endif
return ti;
}
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
/* thread information allocation */
#ifdef CONFIG_DEBUG_STACK_USAGE
#define alloc_thread_info(tsk) \
+++ /dev/null
-#ifndef __LINUX_KVM_M68K_H
-#define __LINUX_KVM_M68K_H
-
-/* m68k does not support KVM */
-
-#endif
}
/* THREAD_SIZE should be 8k, so handle differently for 4k and 8k machines */
-#if PAGE_SHIFT == 13 /* 8k machines */
-#define alloc_thread_info(tsk) ((struct thread_info *)__get_free_pages(GFP_KERNEL,0))
-#define free_thread_info(ti) free_pages((unsigned long)(ti),0)
-#else /* otherwise assume 4k pages */
-#define alloc_thread_info(tsk) ((struct thread_info *)__get_free_pages(GFP_KERNEL,1))
-#define free_thread_info(ti) free_pages((unsigned long)(ti),1)
-#endif /* PAGE_SHIFT == 13 */
+#define THREAD_SIZE_ORDER (13 - PAGE_SHIFT)
#define init_thread_info (init_task.thread.info)
#define init_stack (init_thread_union.stack)
+++ /dev/null
-#ifndef __LINUX_KVM_M68KNOMMU_H
-#define __LINUX_KVM_M68KNOMMU_H
-
-/* m68knommu does not support KVM */
-
-#endif
/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
#define PTRACE_GETREGS 12
#define PTRACE_SETREGS 13
-#ifdef CONFIG_FPU
#define PTRACE_GETFPREGS 14
#define PTRACE_SETFPREGS 15
-#endif
#ifdef __KERNEL__
return ti;
}
-/* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) \
- __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER))
-#define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_SIZE_ORDER)
#endif /* __ASSEMBLY__ */
#define PREEMPT_ACTIVE 0x4000000
+++ /dev/null
-#ifndef __LINUX_KVM_MIPS_H
-#define __LINUX_KVM_MIPS_H
-
-/* mips does not support KVM */
-
-#endif
#ifndef __ASM_MACH_GENERIC_GPIO_H
#define __ASM_MACH_GENERIC_GPIO_H
-#ifdef CONFIG_HAVE_GPIO_LIB
+#ifdef CONFIG_GPIOLIB
#define gpio_get_value __gpio_get_value
#define gpio_set_value __gpio_set_value
#define gpio_cansleep __gpio_cansleep
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
#define THREAD_MASK (THREAD_SIZE - 1UL)
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
#ifdef CONFIG_DEBUG_STACK_USAGE
#define alloc_thread_info(tsk) \
({ \
+++ /dev/null
-#ifndef __LINUX_KVM_MN10300_H
-#define __LINUX_KVM_MN10300_H
-
-/* mn10300 does not support KVM */
-
-#endif
/* options set using PTRACE_SETOPTIONS */
#define PTRACE_O_TRACESYSGOOD 0x00000001
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#if defined(__KERNEL__)
+
+#if !defined(__ASSEMBLY__)
#define user_mode(regs) (((regs)->epsw & EPSW_nSL) == EPSW_nSL)
#define instruction_pointer(regs) ((regs)->pc)
extern void show_regs(struct pt_regs *);
-#endif
+#endif /* !__ASSEMBLY */
#define profile_pc(regs) ((regs)->pc)
+#endif /* __KERNEL__ */
+
#endif /* _ASM_PTRACE_H */
return sp;
}
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
/* thread information allocation */
#ifdef CONFIG_DEBUG_STACK_USAGE
#define alloc_thread_info(tsk) kzalloc(THREAD_SIZE, GFP_KERNEL)
+++ /dev/null
-#ifndef __LINUX_KVM_PARISC_H
-#define __LINUX_KVM_PARISC_H
-
-/* parisc does not support KVM */
-
-#endif
unsigned long ipsw; /* CR22 */
};
-#define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS))
/*
* The numbers chosen here are somewhat arbitrary but absolutely MUST
* not overlap with any of the number assigned in <linux/ptrace.h>.
* since we have taken branch traps too)
*/
#define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */
+
#ifdef __KERNEL__
+#define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS))
+
/* XXX should we use iaoq[1] or iaoq[0] ? */
#define user_mode(regs) (((regs)->iaoq[0] & 3) ? 1 : 0)
#define user_space(regs) (((regs)->iasq[1] != 0) ? 1 : 0)
/* thread information allocation */
-#define THREAD_ORDER 2
+#define THREAD_SIZE_ORDER 2
/* Be sure to hunt all references to this down when you change the size of
* the kernel stack */
-#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER)
-#define THREAD_SHIFT (PAGE_SHIFT + THREAD_ORDER)
-
-#define alloc_thread_info(tsk) ((struct thread_info *) \
- __get_free_pages(GFP_KERNEL, THREAD_ORDER))
-#define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER)
+#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
+#define THREAD_SHIFT (PAGE_SHIFT + THREAD_SIZE_ORDER)
/* how to get the thread information struct from C */
#define current_thread_info() ((struct thread_info *)mfctl(30))
unifdef-y += nvram.h
unifdef-y += param.h
unifdef-y += posix_types.h
-unifdef-y += ptrace.h
unifdef-y += seccomp.h
unifdef-y += signal.h
unifdef-y += spu_info.h
extern void do_feature_fixups(unsigned long value, void *fixup_start,
void *fixup_end);
+extern const char *powerpc_base_platform;
+
#endif /* __ASSEMBLY__ */
/* CPU kernel features */
#define ELF_PLATFORM (cur_cpu_spec->platform)
+/* While ELF_PLATFORM indicates the ISA supported by the platform, it
+ * may not accurately reflect the underlying behavior of the hardware
+ * (as in the case of running in Power5+ compatibility mode on a
+ * Power6 machine). ELF_BASE_PLATFORM allows ld.so to load libraries
+ * that are tuned for the real hardware.
+ */
+#define ELF_BASE_PLATFORM (powerpc_base_platform)
+
#ifdef __powerpc64__
# define ELF_PLAT_INIT(_r, load_addr) do { \
_r->gpr[2] = load_addr; \
#define FW_FEATURE_PS3_LV1 ASM_CONST(0x0000000000800000)
#define FW_FEATURE_BEAT ASM_CONST(0x0000000001000000)
#define FW_FEATURE_BULK_REMOVE ASM_CONST(0x0000000002000000)
+#define FW_FEATURE_CMO ASM_CONST(0x0000000004000000)
#ifndef __ASSEMBLY__
FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ |
FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN |
FW_FEATURE_BULK | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE |
- FW_FEATURE_SPLPAR | FW_FEATURE_LPAR,
+ FW_FEATURE_SPLPAR | FW_FEATURE_LPAR | FW_FEATURE_CMO,
FW_FEATURE_PSERIES_ALWAYS = 0,
FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES | FW_FEATURE_LPAR,
FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES | FW_FEATURE_LPAR,
#include <linux/errno.h>
#include <asm-generic/gpio.h>
-#ifdef CONFIG_HAVE_GPIO_LIB
+#ifdef CONFIG_GPIOLIB
/*
* We don't (yet) implement inlined/rapid versions for on-chip gpios.
return -EINVAL;
}
-#endif /* CONFIG_HAVE_GPIO_LIB */
+#endif /* CONFIG_GPIOLIB */
#endif /* __ASM_POWERPC_GPIO_H */
#define H_EXACT (1UL<<(63-24)) /* Use exact PTE or return H_PTEG_FULL */
#define H_R_XLATE (1UL<<(63-25)) /* include a valid logical page num in the pte if the valid bit is set */
#define H_READ_4 (1UL<<(63-26)) /* Return 4 PTEs */
+#define H_PAGE_STATE_CHANGE (1UL<<(63-28))
+#define H_PAGE_UNUSED ((1UL<<(63-29)) | (1UL<<(63-30)))
+#define H_PAGE_SET_UNUSED (H_PAGE_STATE_CHANGE | H_PAGE_UNUSED)
+#define H_PAGE_SET_LOANED (H_PAGE_SET_UNUSED | (1UL<<(63-31)))
+#define H_PAGE_SET_ACTIVE H_PAGE_STATE_CHANGE
#define H_AVPN (1UL<<(63-32)) /* An avpn is provided as a sanity test */
#define H_ANDCOND (1UL<<(63-33))
#define H_ICACHE_INVALIDATE (1UL<<(63-40)) /* icbi, etc. (ignored for IO pages) */
#define H_JOIN 0x298
#define H_VASI_STATE 0x2A4
#define H_ENABLE_CRQ 0x2B0
-#define MAX_HCALL_OPCODE H_ENABLE_CRQ
+#define H_SET_MPP 0x2D0
+#define H_GET_MPP 0x2D4
+#define MAX_HCALL_OPCODE H_GET_MPP
#ifndef __ASSEMBLY__
};
#define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1)
+struct hvcall_mpp_data {
+ unsigned long entitled_mem;
+ unsigned long mapped_mem;
+ unsigned short group_num;
+ unsigned short pool_num;
+ unsigned char mem_weight;
+ unsigned char unallocated_mem_weight;
+ unsigned long unallocated_entitlement; /* value in bytes */
+ unsigned long pool_size;
+ signed long loan_request;
+ unsigned long backing_mem;
+};
+
+int h_get_mpp(struct hvcall_mpp_data *);
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HVCALL_H */
// NOTE: This value will ALWAYS be zero for dedicated processors and
// will NEVER be zero for shared processors (ie, initialized to a 1).
volatile u32 yield_count; // PLIC increments each dispatchx00-x03
- u8 reserved6[124]; // Reserved x04-x7F
+ u32 reserved6;
+ volatile u64 cmo_faults; // CMO page fault count x08-x0F
+ volatile u64 cmo_fault_time; // CMO page fault time x10-x17
+ u8 reserved7[104]; // Reserved x18-x7F
//=============================================================================
// CACHE_LINE_4-5 0x0180 - 0x027F Contains PMC interrupt data
* destroyed as well */
void (*hpte_clear_all)(void);
- void (*tce_build)(struct iommu_table * tbl,
+ int (*tce_build)(struct iommu_table *tbl,
long index,
long npages,
unsigned long uaddr,
#define MPC52xx_PSC_RXTX_FIFO_ALARM 0x0002
#define MPC52xx_PSC_RXTX_FIFO_EMPTY 0x0001
-/* PSC interrupt mask bits */
+/* PSC interrupt status/mask bits */
#define MPC52xx_PSC_IMR_TXRDY 0x0100
#define MPC52xx_PSC_IMR_RXRDY 0x0200
#define MPC52xx_PSC_IMR_DB 0x0400
+#define MPC52xx_PSC_IMR_TXEMP 0x0800
+#define MPC52xx_PSC_IMR_ORERR 0x1000
#define MPC52xx_PSC_IMR_IPC 0x8000
/* PSC input port change bit */
#define MPC52xx_PSC_RFNUM_MASK 0x01ff
+#define MPC52xx_PSC_SICR_DTS1 (1 << 29)
+#define MPC52xx_PSC_SICR_SHDR (1 << 28)
+#define MPC52xx_PSC_SICR_SIM_MASK (0xf << 24)
+#define MPC52xx_PSC_SICR_SIM_UART (0x0 << 24)
+#define MPC52xx_PSC_SICR_SIM_UART_DCD (0x8 << 24)
+#define MPC52xx_PSC_SICR_SIM_CODEC_8 (0x1 << 24)
+#define MPC52xx_PSC_SICR_SIM_CODEC_16 (0x2 << 24)
+#define MPC52xx_PSC_SICR_SIM_AC97 (0x3 << 24)
+#define MPC52xx_PSC_SICR_SIM_SIR (0x8 << 24)
+#define MPC52xx_PSC_SICR_SIM_SIR_DCD (0xc << 24)
+#define MPC52xx_PSC_SICR_SIM_MIR (0x5 << 24)
+#define MPC52xx_PSC_SICR_SIM_FIR (0x6 << 24)
+#define MPC52xx_PSC_SICR_SIM_CODEC_24 (0x7 << 24)
+#define MPC52xx_PSC_SICR_SIM_CODEC_32 (0xf << 24)
+#define MPC52xx_PSC_SICR_GENCLK (1 << 23)
+#define MPC52xx_PSC_SICR_I2S (1 << 22)
+#define MPC52xx_PSC_SICR_CLKPOL (1 << 21)
+#define MPC52xx_PSC_SICR_SYNCPOL (1 << 20)
+#define MPC52xx_PSC_SICR_CELLSLAVE (1 << 19)
+#define MPC52xx_PSC_SICR_CELL2XCLK (1 << 18)
+#define MPC52xx_PSC_SICR_ESAI (1 << 17)
+#define MPC52xx_PSC_SICR_ENAC97 (1 << 16)
+#define MPC52xx_PSC_SICR_SPI (1 << 15)
+#define MPC52xx_PSC_SICR_MSTR (1 << 14)
+#define MPC52xx_PSC_SICR_CPOL (1 << 13)
+#define MPC52xx_PSC_SICR_CPHA (1 << 12)
+#define MPC52xx_PSC_SICR_USEEOF (1 << 11)
+#define MPC52xx_PSC_SICR_DISABLEEOF (1 << 10)
/* Structure of the hardware registers */
struct mpc52xx_psc {
u8 reserved5[3];
u8 ctlr; /* PSC + 0x1c */
u8 reserved6[3];
- u16 ccr; /* PSC + 0x20 */
- u8 reserved7[14];
+ /* BitClkDiv field of CCR is byte swapped in
+ * the hardware for mpc5200/b compatibility */
+ u32 ccr; /* PSC + 0x20 */
+ u32 ac97_slots; /* PSC + 0x24 */
+ u32 ac97_cmd; /* PSC + 0x28 */
+ u32 ac97_data; /* PSC + 0x2c */
u8 ivr; /* PSC + 0x30 */
u8 reserved8[3];
u8 ip; /* PSC + 0x34 */
remap_pfn_range(vma, vaddr, pfn, size, prot)
#include <asm-generic/pgtable.h>
+
+
+/*
+ * This gets called at the end of handling a page fault, when
+ * the kernel has put a new PTE into the page table for the process.
+ * We use it to ensure coherency between the i-cache and d-cache
+ * for the page which has just been mapped in.
+ * On machines which use an MMU hash table, we use this to put a
+ * corresponding HPTE into the hash table ahead of time, instead of
+ * waiting for the inevitable extra hash-table miss exception.
+ */
+extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
+
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
unsigned long p3, unsigned long p4, unsigned long p5,
unsigned long p6, struct pt_regs *regs);
asmlinkage long sys_pipe(int __user *fildes);
+asmlinkage long sys_pipe2(int __user *fildes, int flags);
asmlinkage long sys_rt_sigaction(int sig,
const struct sigaction __user *act,
struct sigaction __user *oact, size_t sigsetsize);
SYSCALL(subpage_prot)
COMPAT_SYS_SPU(timerfd_settime)
COMPAT_SYS_SPU(timerfd_gettime)
+COMPAT_SYS_SPU(signalfd4)
+SYSCALL_SPU(eventfd2)
+SYSCALL_SPU(epoll_create1)
+SYSCALL_SPU(dup3)
+SYSCALL_SPU(pipe2)
+SYSCALL(inotify_init1)
#endif
extern int set_dabr(unsigned long dabr);
+extern void do_dabr(struct pt_regs *regs, unsigned long address,
+ unsigned long error_code);
extern void print_backtrace(unsigned long *);
extern void show_regs(struct pt_regs * regs);
extern void flush_instruction_cache(void);
#if THREAD_SHIFT >= PAGE_SHIFT
-#define THREAD_ORDER (THREAD_SHIFT - PAGE_SHIFT)
-
-#ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk) \
- ((struct thread_info *)__get_free_pages(GFP_KERNEL | \
- __GFP_ZERO, THREAD_ORDER))
-#else
-#define alloc_thread_info(tsk) \
- ((struct thread_info *)__get_free_pages(GFP_KERNEL, THREAD_ORDER))
-#endif
-#define free_thread_info(ti) free_pages((unsigned long)ti, THREAD_ORDER)
+#define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT)
#else /* THREAD_SHIFT < PAGE_SHIFT */
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
extern struct thread_info *alloc_thread_info(struct task_struct *tsk);
extern void free_thread_info(struct thread_info *ti);
#endif
-/*
- * This gets called at the end of handling a page fault, when
- * the kernel has put a new PTE into the page table for the process.
- * We use it to ensure coherency between the i-cache and d-cache
- * for the page which has just been mapped in.
- * On machines which use an MMU hash table, we use this to put a
- * corresponding HPTE into the hash table ahead of time, instead of
- * waiting for the inevitable extra hash-table miss exception.
- */
-extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
-
#endif /*__KERNEL__ */
#endif /* _ASM_POWERPC_TLBFLUSH_H */
#define __NR_subpage_prot 310
#define __NR_timerfd_settime 311
#define __NR_timerfd_gettime 312
+#define __NR_signalfd4 313
+#define __NR_eventfd2 314
+#define __NR_epoll_create1 315
+#define __NR_dup3 316
+#define __NR_pipe2 317
+#define __NR_inotify_init1 318
#ifdef __KERNEL__
-#define __NR_syscalls 313
+#define __NR_syscalls 319
#define __NR__exit __NR_exit
#define NR_syscalls __NR_syscalls
#define VIO_IRQ_DISABLE 0UL
#define VIO_IRQ_ENABLE 1UL
+/*
+ * VIO CMO minimum entitlement for all devices and spare entitlement
+ */
+#define VIO_CMO_MIN_ENT 1562624
+
struct iommu_table;
-/*
- * The vio_dev structure is used to describe virtual I/O devices.
+/**
+ * vio_dev - This structure is used to describe virtual I/O devices.
+ *
+ * @desired: set from return of driver's get_desired_dma() function
+ * @entitled: bytes of IO data that has been reserved for this device.
+ * @allocated: bytes of IO data currently in use by the device.
+ * @allocs_failed: number of DMA failures due to insufficient entitlement.
*/
struct vio_dev {
const char *name;
const char *type;
uint32_t unit_address;
unsigned int irq;
+ struct {
+ size_t desired;
+ size_t entitled;
+ size_t allocated;
+ atomic_t allocs_failed;
+ } cmo;
struct device dev;
};
const struct vio_device_id *id_table;
int (*probe)(struct vio_dev *dev, const struct vio_device_id *id);
int (*remove)(struct vio_dev *dev);
+ /* A driver must have a get_desired_dma() function to
+ * be loaded in a CMO environment if it uses DMA.
+ */
+ unsigned long (*get_desired_dma)(struct vio_dev *dev);
struct device_driver driver;
};
extern int vio_register_driver(struct vio_driver *drv);
extern void vio_unregister_driver(struct vio_driver *drv);
+extern int vio_cmo_entitlement_update(size_t);
+extern void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired);
+
extern void __devinit vio_unregister_device(struct vio_dev *dev);
struct device_node;
header-y += ucontext.h
header-y += vtoc.h
header-y += zcrypt.h
-header-y += kvm.h
header-y += chsc.h
unifdef-y += cmb.h
#define KVM_S390_VIRTIO_RESET 1
#define KVM_S390_VIRTIO_SET_STATUS 2
+#ifdef __KERNEL__
+/* early virtio console setup */
+#ifdef CONFIG_VIRTIO_CONSOLE
+extern void s390_virtio_console_init(void);
+#else
+static inline void s390_virtio_console_init(void)
+{
+}
+#endif /* CONFIG_VIRTIO_CONSOLE */
+#endif /* __KERNEL__ */
#endif
return (struct thread_info *)((*(unsigned long *) __LC_KERNEL_STACK)-THREAD_SIZE);
}
-/* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) \
- __get_free_pages(GFP_KERNEL,THREAD_ORDER))
-#define free_thread_info(ti) free_pages((unsigned long) (ti),THREAD_ORDER)
+#define THREAD_SIZE_ORDER THREAD_ORDER
#endif
+++ /dev/null
-#ifndef __LINUX_KVM_SH_H
-#define __LINUX_KVM_SH_H
-
-/* sh does not support KVM */
-
-#endif
* Copyright (C) 1999, 2000 Niibe Yutaka
*
*/
-#if defined(__SH5__) || defined(CONFIG_SUPERH64)
+#if defined(__SH5__)
struct pt_regs {
unsigned long long pc;
unsigned long long sr;
return ti;
}
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
/* thread information allocation */
#ifdef CONFIG_DEBUG_STACK_USAGE
#define alloc_thread_info(ti) kzalloc(THREAD_SIZE, GFP_KERNEL)
+++ /dev/null
-#ifndef __LINUX_KVM_SPARC_H
-#define __LINUX_KVM_SPARC_H
-
-/* sparc does not support KVM */
-
-#endif
#define THREAD_INFO_ORDER 1
#endif
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
BTFIXUPDEF_CALL(struct thread_info *, alloc_thread_info, void)
#define alloc_thread_info(tsk) BTFIXUP_CALL(alloc_thread_info)()
#define __THREAD_INFO_ORDER 0
#endif /* PAGE_SHIFT == 13 */
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
#ifdef CONFIG_DEBUG_STACK_USAGE
#define alloc_thread_info(tsk) \
({ \
+++ /dev/null
-#include <asm-sparc/kvm.h>
+++ /dev/null
-#ifndef __LINUX_KVM_UM_H
-#define __LINUX_KVM_UM_H
-
-/* um does not support KVM */
-
-#endif
return ti;
}
-#ifdef CONFIG_DEBUG_STACK_USAGE
-
-#define alloc_thread_info(tsk) \
- ((struct thread_info *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, \
- CONFIG_KERNEL_STACK_ORDER))
-#else
-
-/* thread information allocation */
-#define alloc_thread_info(tsk) \
- ((struct thread_info *) __get_free_pages(GFP_KERNEL, \
- CONFIG_KERNEL_STACK_ORDER))
-#endif
-
-#define free_thread_info(ti) \
- free_pages((unsigned long)(ti),CONFIG_KERNEL_STACK_ORDER)
+#define THREAD_SIZE_ORDER CONFIG_KERNEL_STACK_ORDER
#endif
header-y += boot.h
header-y += bootparam.h
header-y += debugreg.h
-header-y += kvm.h
header-y += ldt.h
header-y += msr-index.h
header-y += prctl.h
unifdef-y += mtrr.h
unifdef-y += posix_types_32.h
unifdef-y += posix_types_64.h
-unifdef-y += ptrace.h
unifdef-y += unistd_32.h
unifdef-y += unistd_64.h
unifdef-y += vm86.h
+/*
+ * Generic GPIO API implementation for x86.
+ *
+ * Derived from the generic GPIO API for powerpc:
+ *
+ * Copyright (c) 2007-2008 MontaVista Software, Inc.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
#ifndef _ASM_I386_GPIO_H
#define _ASM_I386_GPIO_H
+#ifdef CONFIG_X86_RDC321X
#include <gpio.h>
+#else /* CONFIG_X86_RDC321X */
+
+#include <asm-generic/gpio.h>
+
+#ifdef CONFIG_GPIOLIB
+
+/*
+ * Just call gpiolib.
+ */
+static inline int gpio_get_value(unsigned int gpio)
+{
+ return __gpio_get_value(gpio);
+}
+
+static inline void gpio_set_value(unsigned int gpio, int value)
+{
+ __gpio_set_value(gpio, value);
+}
+
+static inline int gpio_cansleep(unsigned int gpio)
+{
+ return __gpio_cansleep(gpio);
+}
+
+/*
+ * Not implemented, yet.
+ */
+static inline int gpio_to_irq(unsigned int gpio)
+{
+ return -ENOSYS;
+}
+
+static inline int irq_to_gpio(unsigned int irq)
+{
+ return -EINVAL;
+}
+
+#endif /* CONFIG_GPIOLIB */
+
+#endif /* CONFIG_X86_RDC321X */
#endif /* _ASM_I386_GPIO_H */
#define THREAD_FLAGS GFP_KERNEL
#endif
+#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+
#define alloc_thread_info(tsk) \
((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER))
+++ /dev/null
-#ifndef __LINUX_KVM_XTENSA_H
-#define __LINUX_KVM_XTENSA_H
-
-/* xtensa does not support KVM */
-
-#endif
#define PTRACE_GETXTREGS 18
#define PTRACE_SETXTREGS 19
-#ifndef __ASSEMBLY__
-
#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
/*
* This struct defines the way the registers are stored on the
* kernel stack during a system call or other kernel entry.
# ifndef CONFIG_SMP
# define profile_pc(regs) instruction_pointer(regs)
# endif
-#endif /* __KERNEL__ */
#else /* __ASSEMBLY__ */
-#ifdef __KERNEL__
# include <asm/asm-offsets.h>
#define PT_REGS_OFFSET (KERNEL_STACK_SIZE - PT_USER_SIZE)
-#endif
#endif /* !__ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+
#endif /* _XTENSA_PTRACE_H */
return ti;
}
-/* thread information allocation */
-#define alloc_thread_info(tsk) ((struct thread_info *) __get_free_pages(GFP_KERNEL,1))
-#define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
-
#else /* !__ASSEMBLY__ */
/* how to get the thread information struct from ASM */
#define TS_USEDFPU 0x0001 /* FPU was used by this task this quantum (SMP) */
#define THREAD_SIZE 8192 //(2*PAGE_SIZE)
+#define THREAD_SIZE_ORDER 1
#endif /* __KERNEL__ */
#endif /* _XTENSA_THREAD_INFO */
unifdef-y += cuda.h
unifdef-y += cyclades.h
unifdef-y += dccp.h
-unifdef-y += dirent.h
unifdef-y += dlm.h
unifdef-y += dlm_plock.h
unifdef-y += edd.h
unifdef-y += kernelcapi.h
unifdef-y += kernel.h
unifdef-y += keyboard.h
+ifneq ($(wildcard $(srctree)/include/asm-$(SRCARCH)/kvm.h),)
unifdef-y += kvm.h
+endif
unifdef-y += llc.h
unifdef-y += loop.h
unifdef-y += lp.h
struct vfsmount;
struct super_block;
struct pacct_struct;
+struct pid_namespace;
extern void acct_auto_close_mnt(struct vfsmount *m);
extern void acct_auto_close(struct super_block *sb);
extern void acct_init_pacct(struct pacct_struct *pacct);
extern void acct_collect(long exitcode, int group_dead);
extern void acct_process(void);
+extern void acct_exit_ns(struct pid_namespace *);
#else
#define acct_auto_close_mnt(x) do { } while (0)
#define acct_auto_close(x) do { } while (0)
#define acct_init_pacct(x) do { } while (0)
#define acct_collect(x,y) do { } while (0)
#define acct_process() do { } while (0)
+#define acct_exit_ns(ns) do { } while (0)
#endif
/*
#define AT_SECURE 23 /* secure mode boolean */
+#define AT_BASE_PLATFORM 24 /* string identifying real platform, may
+ * differ from AT_PLATFORM. */
+
#define AT_EXECFN 31 /* filename of program */
+
#ifdef __KERNEL__
-#define AT_VECTOR_SIZE_BASE 17 /* NEW_AUX_ENT entries in auxiliary table */
+#define AT_VECTOR_SIZE_BASE 18 /* NEW_AUX_ENT entries in auxiliary table */
/* number of "#define AT_.*" above, minus {AT_NULL, AT_IGNORE, AT_NOTELF} */
#endif
__alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_low_pages(x) \
__alloc_bootmem_low(x, PAGE_SIZE, 0)
-#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
-
-extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
- int flags);
-
#define alloc_bootmem_node(pgdat, x) \
__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_pages_node(pgdat, x) \
__alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_low_pages_node(pgdat, x) \
__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
+#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
+
+extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
+ int flags);
extern void *alloc_bootmem_section(unsigned long size,
unsigned long section_nr);
#define __le32_to_cpus(x) __swab32s((x))
#define __cpu_to_le16s(x) __swab16s((x))
#define __le16_to_cpus(x) __swab16s((x))
-#define __cpu_to_be64s(x) do {} while (0)
-#define __be64_to_cpus(x) do {} while (0)
-#define __cpu_to_be32s(x) do {} while (0)
-#define __be32_to_cpus(x) do {} while (0)
-#define __cpu_to_be16s(x) do {} while (0)
-#define __be16_to_cpus(x) do {} while (0)
+#define __cpu_to_be64s(x) do { (void)(x); } while (0)
+#define __be64_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_be32s(x) do { (void)(x); } while (0)
+#define __be32_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_be16s(x) do { (void)(x); } while (0)
+#define __be16_to_cpus(x) do { (void)(x); } while (0)
#ifdef __KERNEL__
#include <linux/byteorder/generic.h>
{
return __swab16p((__u16 *)p);
}
-#define __cpu_to_le64s(x) do {} while (0)
-#define __le64_to_cpus(x) do {} while (0)
-#define __cpu_to_le32s(x) do {} while (0)
-#define __le32_to_cpus(x) do {} while (0)
-#define __cpu_to_le16s(x) do {} while (0)
-#define __le16_to_cpus(x) do {} while (0)
+#define __cpu_to_le64s(x) do { (void)(x); } while (0)
+#define __le64_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_le32s(x) do { (void)(x); } while (0)
+#define __le32_to_cpus(x) do { (void)(x); } while (0)
+#define __cpu_to_le16s(x) do { (void)(x); } while (0)
+#define __le16_to_cpus(x) do { (void)(x); } while (0)
#define __cpu_to_be64s(x) __swab64s((x))
#define __be64_to_cpus(x) __swab64s((x))
#define __cpu_to_be32s(x) __swab32s((x))
struct cgroupfs_root;
struct cgroup_subsys;
struct inode;
+struct cgroup;
extern int cgroup_init_early(void);
extern int cgroup_init(void);
extern void cgroup_init_smp(void);
extern void cgroup_lock(void);
+extern bool cgroup_lock_live_group(struct cgroup *cgrp);
extern void cgroup_unlock(void);
extern void cgroup_fork(struct task_struct *p);
extern void cgroup_fork_callbacks(struct task_struct *p);
* subsystem, followed by a period */
char name[MAX_CFTYPE_NAME];
int private;
- int (*open) (struct inode *inode, struct file *file);
- ssize_t (*read) (struct cgroup *cgrp, struct cftype *cft,
- struct file *file,
- char __user *buf, size_t nbytes, loff_t *ppos);
+
+ /*
+ * If non-zero, defines the maximum length of string that can
+ * be passed to write_string; defaults to 64
+ */
+ size_t max_write_len;
+
+ int (*open)(struct inode *inode, struct file *file);
+ ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
+ struct file *file,
+ char __user *buf, size_t nbytes, loff_t *ppos);
/*
* read_u64() is a shortcut for the common case of returning a
* single integer. Use it in place of read()
*/
- u64 (*read_u64) (struct cgroup *cgrp, struct cftype *cft);
+ u64 (*read_u64)(struct cgroup *cgrp, struct cftype *cft);
/*
* read_s64() is a signed version of read_u64()
*/
- s64 (*read_s64) (struct cgroup *cgrp, struct cftype *cft);
+ s64 (*read_s64)(struct cgroup *cgrp, struct cftype *cft);
/*
* read_map() is used for defining a map of key/value
* pairs. It should call cb->fill(cb, key, value) for each
* entry. The key/value pairs (and their ordering) should not
* change between reboots.
*/
- int (*read_map) (struct cgroup *cont, struct cftype *cft,
- struct cgroup_map_cb *cb);
+ int (*read_map)(struct cgroup *cont, struct cftype *cft,
+ struct cgroup_map_cb *cb);
/*
* read_seq_string() is used for outputting a simple sequence
* using seqfile.
*/
- int (*read_seq_string) (struct cgroup *cont, struct cftype *cft,
- struct seq_file *m);
+ int (*read_seq_string)(struct cgroup *cont, struct cftype *cft,
+ struct seq_file *m);
- ssize_t (*write) (struct cgroup *cgrp, struct cftype *cft,
- struct file *file,
- const char __user *buf, size_t nbytes, loff_t *ppos);
+ ssize_t (*write)(struct cgroup *cgrp, struct cftype *cft,
+ struct file *file,
+ const char __user *buf, size_t nbytes, loff_t *ppos);
/*
* write_u64() is a shortcut for the common case of accepting
* a single integer (as parsed by simple_strtoull) from
* userspace. Use in place of write(); return 0 or error.
*/
- int (*write_u64) (struct cgroup *cgrp, struct cftype *cft, u64 val);
+ int (*write_u64)(struct cgroup *cgrp, struct cftype *cft, u64 val);
/*
* write_s64() is a signed version of write_u64()
*/
- int (*write_s64) (struct cgroup *cgrp, struct cftype *cft, s64 val);
+ int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val);
+ /*
+ * write_string() is passed a nul-terminated kernelspace
+ * buffer of maximum length determined by max_write_len.
+ * Returns 0 or -ve error code.
+ */
+ int (*write_string)(struct cgroup *cgrp, struct cftype *cft,
+ const char *buffer);
/*
* trigger() callback can be used to get some kick from the
* userspace, when the actual string written is not important
*/
int (*trigger)(struct cgroup *cgrp, unsigned int event);
- int (*release) (struct inode *inode, struct file *file);
+ int (*release)(struct inode *inode, struct file *file);
};
struct cgroup_scanner {
return task_subsys_state(task, subsys_id)->cgroup;
}
-int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss);
+int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss,
+ char *nodename);
/* A cgroup_iter should be treated as an opaque object */
struct cgroup_iter {
typedef u_int32_t vgid_t;
#endif /*_VUID_T_ */
-#ifdef CONFIG_CODA_FS_OLD_API
-struct CodaFid {
- u_int32_t opaque[3];
-};
-
-static __inline__ ino_t coda_f2i(struct CodaFid *fid)
-{
- if ( ! fid )
- return 0;
- if (fid->opaque[1] == 0xfffffffe || fid->opaque[1] == 0xffffffff)
- return ((fid->opaque[0] << 20) | (fid->opaque[2] & 0xfffff));
- else
- return (fid->opaque[2] + (fid->opaque[1]<<10) + (fid->opaque[0]<<20));
-}
-
-struct coda_cred {
- vuid_t cr_uid, cr_euid, cr_suid, cr_fsuid; /* Real, efftve, set, fs uid*/
- vgid_t cr_groupid, cr_egid, cr_sgid, cr_fsgid; /* same for groups */
-};
-
-#else /* not defined(CONFIG_CODA_FS_OLD_API) */
-
struct CodaFid {
u_int32_t opaque[4];
};
#define coda_f2i(fid)\
(fid ? (fid->opaque[3] ^ (fid->opaque[2]<<10) ^ (fid->opaque[1]<<20) ^ fid->opaque[0]) : 0)
-#endif
-
#ifndef _VENUS_VATTR_T_
#define _VENUS_VATTR_T_
/*
#define CIOC_KERNEL_VERSION _IOWR('c', 10, size_t)
-#if 0
-#define CODA_KERNEL_VERSION 0 /* don't care about kernel version number */
-#define CODA_KERNEL_VERSION 1 /* The old venus 4.6 compatible interface */
-#endif
-#ifdef CONFIG_CODA_FS_OLD_API
-#define CODA_KERNEL_VERSION 2 /* venus_lookup got an extra parameter */
-#else
#define CODA_KERNEL_VERSION 3 /* 128-bit file identifiers */
-#endif
/*
* Venus <-> Coda RPC arguments
struct coda_in_hdr {
u_int32_t opcode;
u_int32_t unique; /* Keep multiple outstanding msgs distinct */
-#ifdef CONFIG_CODA_FS_OLD_API
- u_int16_t pid; /* Common to all */
- u_int16_t pgid; /* Common to all */
- u_int16_t sid; /* Common to all */
- struct coda_cred cred; /* Common to all */
-#else
pid_t pid;
pid_t pgid;
vuid_t uid;
-#endif
};
/* Really important that opcode and unique are 1st two fields! */
/* CODA_PURGEUSER is a venus->kernel call */
struct coda_purgeuser_out {
struct coda_out_hdr oh;
-#ifdef CONFIG_CODA_FS_OLD_API
- struct coda_cred cred;
-#else
vuid_t uid;
-#endif
};
/* coda_zapfile: */
*
* Interface between console.c, selection.c and consolemap.c
*/
+#ifndef __LINUX_CONSOLEMAP_H__
+#define __LINUX_CONSOLEMAP_H__
+
#define LAT1_MAP 0
#define GRAF_MAP 1
#define IBMPC_MAP 2
#include <linux/types.h>
+#ifdef CONFIG_CONSOLE_TRANSLATIONS
struct vc_data;
extern u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode);
extern u32 conv_8bit_to_uni(unsigned char c);
extern int conv_uni_to_8bit(u32 uni);
void console_map_init(void);
+#else
+#define inverse_translate(conp, glyph, uni) ((uint16_t)glyph)
+#define set_translate(m, vc) ((unsigned short *)NULL)
+#define conv_uni_to_pc(conp, ucs) ((int) (ucs > 0xff ? -1: ucs))
+#define conv_8bit_to_uni(c) ((uint32_t)(c))
+#define conv_uni_to_8bit(c) ((int) ((c) & 0xff))
+#define console_map_init(c) do { ; } while (0)
+#endif /* CONFIG_CONSOLE_TRANSLATIONS */
+
+#endif /* __LINUX_CONSOLEMAP_H__ */
#endif
int cpu_up(unsigned int cpu);
-
extern void cpu_hotplug_init(void);
+extern void cpu_maps_update_begin(void);
+extern void cpu_maps_update_done(void);
-#else
+#else /* CONFIG_SMP */
static inline int register_cpu_notifier(struct notifier_block *nb)
{
{
}
+static inline void cpu_maps_update_begin(void)
+{
+}
+
+static inline void cpu_maps_update_done(void)
+{
+}
+
#endif /* CONFIG_SMP */
extern struct sysdev_class cpu_sysdev_class;
-extern void cpu_maps_update_begin(void);
-extern void cpu_maps_update_done(void);
#ifdef CONFIG_HOTPLUG_CPU
/* Stop CPUs going up and down. */
#define vmcore_elf_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x))
+static inline int is_kdump_kernel(void)
+{
+ return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0;
+}
+#else /* !CONFIG_CRASH_DUMP */
+static inline int is_kdump_kernel(void) { return 0; }
#endif /* CONFIG_CRASH_DUMP */
+
+extern unsigned long saved_max_pfn;
#endif /* LINUX_CRASHDUMP_H */
extern void __delayacct_blkio_end(void);
extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
extern __u64 __delayacct_blkio_ticks(struct task_struct *);
+extern void __delayacct_freepages_start(void);
+extern void __delayacct_freepages_end(void);
static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
{
return 0;
}
+static inline void delayacct_freepages_start(void)
+{
+ if (current->delays)
+ __delayacct_freepages_start();
+}
+
+static inline void delayacct_freepages_end(void)
+{
+ if (current->delays)
+ __delayacct_freepages_end();
+}
+
#else
static inline void delayacct_set_flag(int flag)
{}
{ return 0; }
static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
{ return 0; }
+static inline void delayacct_freepages_start(void)
+{}
+static inline void delayacct_freepages_end(void)
+{}
+
#endif /* CONFIG_TASK_DELAY_ACCT */
#endif
#ifndef _LINUX_DIRENT_H
#define _LINUX_DIRENT_H
-struct dirent {
- long d_ino;
- __kernel_off_t d_off;
- unsigned short d_reclen;
- char d_name[256]; /* We must not include limits.h! */
-};
-
-struct dirent64 {
- __u64 d_ino;
- __s64 d_off;
- unsigned short d_reclen;
- unsigned char d_type;
- char d_name[256];
-};
-
-#ifdef __KERNEL__
-
struct linux_dirent64 {
u64 d_ino;
s64 d_off;
char d_name[0];
};
-#endif /* __KERNEL__ */
-
-
#endif
#ifdef __hurd__
#define i_translator osd1.hurd1.h_i_translator
-#define i_frag osd2.hurd2.h_i_frag;
-#define i_fsize osd2.hurd2.h_i_fsize;
+#define i_frag osd2.hurd2.h_i_frag
+#define i_fsize osd2.hurd2.h_i_fsize
#define i_uid_high osd2.hurd2.h_i_uid_high
#define i_gid_high osd2.hurd2.h_i_gid_high
#define i_author osd2.hurd2.h_i_author
extern void ext3_dirty_inode(struct inode *);
extern int ext3_change_inode_journal_flag(struct inode *, int);
extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
+extern int ext3_can_truncate(struct inode *inode);
extern void ext3_truncate (struct inode *);
extern void ext3_set_inode_flags(struct inode *);
extern void ext3_get_inode_flags(struct ext3_inode_info *);
+++ /dev/null
-#ifndef _LINUX_FD1772REG_H
-#define _LINUX_FD1772REG_H
-
-/*
-** WD1772 stuff - originally from the M68K Linux
- * Modified for Archimedes by Dave Gilbert (gilbertd@cs.man.ac.uk)
- */
-
-/* register codes */
-
-#define FDC1772SELREG_STP (0x80) /* command/status register */
-#define FDC1772SELREG_TRA (0x82) /* track register */
-#define FDC1772SELREG_SEC (0x84) /* sector register */
-#define FDC1772SELREG_DTA (0x86) /* data register */
-
-/* register names for FDC1772_READ/WRITE macros */
-
-#define FDC1772REG_CMD 0
-#define FDC1772REG_STATUS 0
-#define FDC1772REG_TRACK 2
-#define FDC1772REG_SECTOR 4
-#define FDC1772REG_DATA 6
-
-/* command opcodes */
-
-#define FDC1772CMD_RESTORE (0x00) /* - */
-#define FDC1772CMD_SEEK (0x10) /* | */
-#define FDC1772CMD_STEP (0x20) /* | TYP 1 Commands */
-#define FDC1772CMD_STIN (0x40) /* | */
-#define FDC1772CMD_STOT (0x60) /* - */
-#define FDC1772CMD_RDSEC (0x80) /* - TYP 2 Commands */
-#define FDC1772CMD_WRSEC (0xa0) /* - " */
-#define FDC1772CMD_RDADR (0xc0) /* - */
-#define FDC1772CMD_RDTRA (0xe0) /* | TYP 3 Commands */
-#define FDC1772CMD_WRTRA (0xf0) /* - */
-#define FDC1772CMD_FORCI (0xd0) /* - TYP 4 Command */
-
-/* command modifier bits */
-
-#define FDC1772CMDADD_SR6 (0x00) /* step rate settings */
-#define FDC1772CMDADD_SR12 (0x01)
-#define FDC1772CMDADD_SR2 (0x02)
-#define FDC1772CMDADD_SR3 (0x03)
-#define FDC1772CMDADD_V (0x04) /* verify */
-#define FDC1772CMDADD_H (0x08) /* wait for spin-up */
-#define FDC1772CMDADD_U (0x10) /* update track register */
-#define FDC1772CMDADD_M (0x10) /* multiple sector access */
-#define FDC1772CMDADD_E (0x04) /* head settling flag */
-#define FDC1772CMDADD_P (0x02) /* precompensation */
-#define FDC1772CMDADD_A0 (0x01) /* DAM flag */
-
-/* status register bits */
-
-#define FDC1772STAT_MOTORON (0x80) /* motor on */
-#define FDC1772STAT_WPROT (0x40) /* write protected (FDC1772CMD_WR*) */
-#define FDC1772STAT_SPINUP (0x20) /* motor speed stable (Type I) */
-#define FDC1772STAT_DELDAM (0x20) /* sector has deleted DAM (Type II+III) */
-#define FDC1772STAT_RECNF (0x10) /* record not found */
-#define FDC1772STAT_CRC (0x08) /* CRC error */
-#define FDC1772STAT_TR00 (0x04) /* Track 00 flag (Type I) */
-#define FDC1772STAT_LOST (0x04) /* Lost Data (Type II+III) */
-#define FDC1772STAT_IDX (0x02) /* Index status (Type I) */
-#define FDC1772STAT_DRQ (0x02) /* DRQ status (Type II+III) */
-#define FDC1772STAT_BUSY (0x01) /* FDC1772 is busy */
-
-
-/* PSG Port A Bit Nr 0 .. Side Sel .. 0 -> Side 1 1 -> Side 2 */
-#define DSKSIDE (0x01)
-
-#define DSKDRVNONE (0x06)
-#define DSKDRV0 (0x02)
-#define DSKDRV1 (0x04)
-
-/* step rates */
-#define FDC1772STEP_6 0x00
-#define FDC1772STEP_12 0x01
-#define FDC1772STEP_2 0x02
-#define FDC1772STEP_3 0x03
-
-#endif
#define FL_CLOSE 64 /* unlock on close */
#define FL_SLEEP 128 /* A blocking lock */
+/*
+ * Special return value from posix_lock_file() and vfs_lock_file() for
+ * asynchronous locking.
+ */
+#define FILE_LOCK_DEFERRED 1
+
/*
* The POSIX file lock owner is determined by
* the "struct files_struct" in the thread group
/**
* INIT request/reply flags
+ *
+ * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
#define FUSE_FILE_OPS (1 << 2)
#define FUSE_ATOMIC_O_TRUNC (1 << 3)
+#define FUSE_EXPORT_SUPPORT (1 << 4)
#define FUSE_BIG_WRITES (1 << 5)
/**
extern char *disk_name (struct gendisk *hd, int part, char *buf);
extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
-extern void add_partition(struct gendisk *, int, sector_t, sector_t, int);
+extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int);
extern void delete_partition(struct gendisk *, int);
extern void printk_all_partitions(void);
WARN_ON(1);
}
+static inline int gpio_export(unsigned gpio, bool direction_may_change)
+{
+ /* GPIO can never have been requested or set as {in,out}put */
+ WARN_ON(1);
+ return -EINVAL;
+}
+
+static inline void gpio_unexport(unsigned gpio)
+{
+ /* GPIO can never have been exported */
+ WARN_ON(1);
+}
+
static inline int gpio_to_irq(unsigned gpio)
{
/* GPIO can never have been requested or set as input */
--- /dev/null
+#ifndef __LINUX_I2C_MAX732X_H
+#define __LINUX_I2C_MAX732X_H
+
+/* platform data for the MAX732x 8/16-bit I/O expander driver */
+
+struct max732x_platform_data {
+ /* number of the first GPIO */
+ unsigned gpio_base;
+
+ void *context; /* param to setup/teardown */
+
+ int (*setup)(struct i2c_client *client,
+ unsigned gpio, unsigned ngpio,
+ void *context);
+ int (*teardown)(struct i2c_client *client,
+ unsigned gpio, unsigned ngpio,
+ void *context);
+};
+#endif /* __LINUX_I2C_MAX732X_H */
#include <linux/types.h>
#include <linux/bitops.h>
#include <linux/init.h>
+#include <linux/rcupdate.h>
#if BITS_PER_LONG == 32
# define IDR_BITS 5
unsigned long bitmap; /* A zero bit means "space here" */
struct idr_layer *ary[1<<IDR_BITS];
int count; /* When zero, we can release it */
+ struct rcu_head rcu_head;
};
struct idr {
}
#define DEFINE_IDR(name) struct idr name = IDR_INIT(name)
+/* Actions to be taken after a call to _idr_sub_alloc */
+#define IDR_NEED_TO_GROW -2
+#define IDR_NOMORE_SPACE -3
+
+#define _idr_rc_to_errno(rc) ((rc) == -1 ? -EAGAIN : -ENOSPC)
+
+/**
+ * idr synchronization (stolen from radix-tree.h)
+ *
+ * idr_find() is able to be called locklessly, using RCU. The caller must
+ * ensure calls to this function are made within rcu_read_lock() regions.
+ * Other readers (lock-free or otherwise) and modifications may be running
+ * concurrently.
+ *
+ * It is still required that the caller manage the synchronization and
+ * lifetimes of the items. So if RCU lock-free lookups are used, typically
+ * this would mean that the items have their own locks, or are amenable to
+ * lock-free access; and that the items are freed by RCU (or only freed after
+ * having been deleted from the idr tree *and* a synchronize_rcu() grace
+ * period).
+ */
+
/*
* This is what we export.
*/
#define security_initcall(fn) module_init(fn)
-/* These macros create a dummy inline: gcc 2.9x does not count alias
- as usage, hence the `unused function' warning when __init functions
- are declared static. We use the dummy __*_module_inline functions
- both to kill the warning and check the type of the init/cleanup
- function. */
-
-/* Each module must use one module_init(), or one no_module_init */
+/* Each module must use one module_init(). */
#define module_init(initfn) \
static inline initcall_t __inittest(void) \
{ return initfn; } \
.state = 0, \
.stack = &init_thread_info, \
.usage = ATOMIC_INIT(2), \
- .flags = 0, \
+ .flags = PF_KTHREAD, \
.lock_depth = -1, \
.prio = MAX_PRIO-20, \
.static_prio = MAX_PRIO-20, \
int msg_ctlmni;
atomic_t msg_bytes;
atomic_t msg_hdrs;
+ int auto_msgmni;
size_t shm_ctlmax;
size_t shm_ctlall;
extern int register_ipcns_notifier(struct ipc_namespace *);
extern int cond_register_ipcns_notifier(struct ipc_namespace *);
-extern int unregister_ipcns_notifier(struct ipc_namespace *);
+extern void unregister_ipcns_notifier(struct ipc_namespace *);
extern int ipcns_notify(unsigned long);
#else /* CONFIG_SYSVIPC */
#ifndef _LINUX_TRACE_IRQFLAGS_H
#define _LINUX_TRACE_IRQFLAGS_H
+#include <linux/typecheck.h>
+
#ifdef CONFIG_TRACE_IRQFLAGS
extern void trace_softirqs_on(unsigned long ip);
extern void trace_softirqs_off(unsigned long ip);
do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0)
#define local_irq_disable() \
do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0)
-#define local_irq_save(flags) \
- do { raw_local_irq_save(flags); trace_hardirqs_off(); } while (0)
+#define local_irq_save(flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ raw_local_irq_save(flags); \
+ trace_hardirqs_off(); \
+ } while (0)
-#define local_irq_restore(flags) \
- do { \
- if (raw_irqs_disabled_flags(flags)) { \
- raw_local_irq_restore(flags); \
- trace_hardirqs_off(); \
- } else { \
- trace_hardirqs_on(); \
- raw_local_irq_restore(flags); \
- } \
+
+#define local_irq_restore(flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ if (raw_irqs_disabled_flags(flags)) { \
+ raw_local_irq_restore(flags); \
+ trace_hardirqs_off(); \
+ } else { \
+ trace_hardirqs_on(); \
+ raw_local_irq_restore(flags); \
+ } \
} while (0)
#else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */
/*
*/
# define raw_local_irq_disable() local_irq_disable()
# define raw_local_irq_enable() local_irq_enable()
-# define raw_local_irq_save(flags) local_irq_save(flags)
-# define raw_local_irq_restore(flags) local_irq_restore(flags)
+# define raw_local_irq_save(flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ local_irq_save(flags); \
+ } while (0)
+# define raw_local_irq_restore(flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ local_irq_restore(flags); \
+ } while (0)
#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
raw_safe_halt(); \
} while (0)
-#define local_save_flags(flags) raw_local_save_flags(flags)
+#define local_save_flags(flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ raw_local_save_flags(flags); \
+ } while (0)
#define irqs_disabled() \
({ \
raw_irqs_disabled_flags(_flags); \
})
-#define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags)
+#define irqs_disabled_flags(flags) \
+({ \
+ typecheck(unsigned long, flags); \
+ raw_irqs_disabled_flags(flags); \
+})
#endif /* CONFIG_X86 */
#endif
#define _LINUX_KALLSYMS_H
#include <linux/errno.h>
+#include <linux/kernel.h>
#include <linux/stddef.h>
#define KSYM_NAME_LEN 128
print_symbol(fmt, (unsigned long)addr);
}
-#ifndef CONFIG_64BIT
-#define print_ip_sym(ip) \
-do { \
- printk("[<%08lx>]", ip); \
- print_symbol(" %s\n", ip); \
-} while(0)
-#else
-#define print_ip_sym(ip) \
-do { \
- printk("[<%016lx>]", ip); \
- print_symbol(" %s\n", ip); \
-} while(0)
-#endif
+static inline void print_ip_sym(unsigned long ip)
+{
+ printk("[<%p>]", (void *) ip);
+ print_symbol(" %s\n", ip);
+}
#endif /*_LINUX_KALLSYMS_H*/
#include <linux/compiler.h>
#include <linux/bitops.h>
#include <linux/log2.h>
+#include <linux/typecheck.h>
+#include <linux/ratelimit.h>
#include <asm/byteorder.h>
#include <asm/bug.h>
asmlinkage int printk(const char * fmt, ...)
__attribute__ ((format (printf, 1, 2))) __cold;
-extern int printk_ratelimit_jiffies;
-extern int printk_ratelimit_burst;
+extern struct ratelimit_state printk_ratelimit_state;
extern int printk_ratelimit(void);
-extern int __ratelimit(int ratelimit_jiffies, int ratelimit_burst);
-extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst);
extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
unsigned int interval_msec);
#else
__attribute__ ((format (printf, 1, 2)));
static inline int __cold printk(const char *s, ...) { return 0; }
static inline int printk_ratelimit(void) { return 0; }
-static inline int __printk_ratelimit(int ratelimit_jiffies, \
- int ratelimit_burst) { return 0; }
static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \
unsigned int interval_msec) \
{ return false; }
const typeof( ((type *)0)->member ) *__mptr = (ptr); \
(type *)( (char *)__mptr - offsetof(type,member) );})
-/*
- * Check at compile time that something is of a particular type.
- * Always evaluates to 1 so you may use it easily in comparisons.
- */
-#define typecheck(type,x) \
-({ type __dummy; \
- typeof(x) __dummy2; \
- (void)(&__dummy == &__dummy2); \
- 1; \
-})
-
-/*
- * Check at compile time that 'function' is a certain type, or is a pointer
- * to that type (needs to use typedef for the function type.)
- */
-#define typecheck_fn(type,function) \
-({ typeof(type) __tmp = function; \
- (void)__tmp; \
-})
-
struct sysinfo;
extern int do_sysinfo(struct sysinfo *info);
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+#include <linux/gfp.h>
#include <linux/stddef.h>
#include <linux/errno.h>
#include <linux/compiler.h>
struct subprocess_info;
/* Allocate a subprocess_info structure */
-struct subprocess_info *call_usermodehelper_setup(char *path,
- char **argv, char **envp);
+struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
+ char **envp, gfp_t gfp_mask);
/* Set various pieces of state into the subprocess_info structure */
void call_usermodehelper_setkeys(struct subprocess_info *info,
call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait)
{
struct subprocess_info *info;
+ gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
- info = call_usermodehelper_setup(path, argv, envp);
+ info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
if (info == NULL)
return -ENOMEM;
return call_usermodehelper_exec(info, wait);
struct key *session_keyring, enum umh_wait wait)
{
struct subprocess_info *info;
+ gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
- info = call_usermodehelper_setup(path, argv, envp);
+ info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
if (info == NULL)
return -ENOMEM;
int nmissed;
size_t data_size;
struct hlist_head free_instances;
- struct hlist_head used_instances;
+ spinlock_t lock;
};
struct kretprobe_instance {
- struct hlist_node uflist; /* either on free list or used list */
struct hlist_node hlist;
struct kretprobe *rp;
kprobe_opcode_t *ret_addr;
}
#endif /* CONFIG_KPROBES_SANITY_TEST */
-extern spinlock_t kretprobe_lock;
extern struct mutex kprobe_mutex;
extern int arch_prepare_kprobe(struct kprobe *p);
extern void arch_arm_kprobe(struct kprobe *p);
/* Get the kprobe at this addr (if any) - called with preemption disabled */
struct kprobe *get_kprobe(void *addr);
+void kretprobe_hash_lock(struct task_struct *tsk,
+ struct hlist_head **head, unsigned long *flags);
+void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags);
struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk);
/* kprobe_running() will just return the current_kprobe on this CPU */
struct task_struct *kthread_create(int (*threadfn)(void *data),
void *data,
- const char namefmt[], ...);
+ const char namefmt[], ...)
+ __attribute__((format(printf, 3, 4)));
/**
* kthread_run - create and wake a thread.
* Insert a new entry after the specified head.
* This is good for implementing stacks.
*/
-#ifndef CONFIG_DEBUG_LIST
static inline void list_add(struct list_head *new, struct list_head *head)
{
__list_add(new, head, head->next);
}
-#else
-extern void list_add(struct list_head *new, struct list_head *head);
-#endif
/**
extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask);
extern void mem_cgroup_uncharge_page(struct page *page);
+extern void mem_cgroup_uncharge_cache_page(struct page *page);
extern void mem_cgroup_move_lists(struct page *page, bool active);
+extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask);
+
extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
struct list_head *dst,
unsigned long *scanned, int order,
#define mm_match_cgroup(mm, cgroup) \
((cgroup) == mem_cgroup_from_task((mm)->owner))
-extern int mem_cgroup_prepare_migration(struct page *page);
+extern int
+mem_cgroup_prepare_migration(struct page *page, struct page *newpage);
extern void mem_cgroup_end_migration(struct page *page);
-extern void mem_cgroup_page_migration(struct page *page, struct page *newpage);
/*
* For memory reclaim.
{
}
+static inline void mem_cgroup_uncharge_cache_page(struct page *page)
+{
+}
+
+static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
+{
+ return 0;
+}
+
static inline void mem_cgroup_move_lists(struct page *page, bool active)
{
}
return 1;
}
-static inline int mem_cgroup_prepare_migration(struct page *page)
+static inline int
+mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
{
return 0;
}
{
}
-static inline void
-mem_cgroup_page_migration(struct page *page, struct page *newpage)
-{
-}
-
static inline int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
{
return 0;
#endif
};
+struct core_thread {
+ struct task_struct *task;
+ struct core_thread *next;
+};
+
+struct core_state {
+ atomic_t nr_threads;
+ struct core_thread dumper;
+ struct completion startup;
+};
+
struct mm_struct {
struct vm_area_struct * mmap; /* list of VMAs */
struct rb_root mm_rb;
atomic_t mm_users; /* How many users with user space? */
atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */
int map_count; /* number of VMAs */
- int core_waiters;
struct rw_semaphore mmap_sem;
spinlock_t page_table_lock; /* Protects page tables and some counters */
unsigned long flags; /* Must use atomic bitops to access the bits */
- /* coredumping support */
- struct completion *core_startup_done, core_done;
+ struct core_state *core_state; /* coredumping support */
/* aio bits */
rwlock_t ioctx_list_lock; /* aio lock */
#define _LINUX_MSDOS_FS_H
#include <linux/magic.h>
+#include <asm/byteorder.h>
/*
* The MS-DOS filesystem constants/structures
*/
-#include <asm/byteorder.h>
#define SECTOR_SIZE 512 /* sector size (bytes) */
#define SECTOR_BITS 9 /* log2(SECTOR_SIZE) */
#define IS_FSINFO(x) (le32_to_cpu((x)->signature1) == FAT_FSINFO_SIG1 \
&& le32_to_cpu((x)->signature2) == FAT_FSINFO_SIG2)
+struct __fat_dirent {
+ long d_ino;
+ __kernel_off_t d_off;
+ unsigned short d_reclen;
+ char d_name[256]; /* We must not include limits.h! */
+};
+
/*
* ioctl commands
*/
-#define VFAT_IOCTL_READDIR_BOTH _IOR('r', 1, struct dirent [2])
-#define VFAT_IOCTL_READDIR_SHORT _IOR('r', 2, struct dirent [2])
+#define VFAT_IOCTL_READDIR_BOTH _IOR('r', 1, struct __fat_dirent[2])
+#define VFAT_IOCTL_READDIR_SHORT _IOR('r', 2, struct __fat_dirent[2])
/* <linux/videotext.h> has used 0x72 ('r') in collision, so skip a few */
#define FAT_IOCTL_GET_ATTRIBUTES _IOR('r', 0x10, __u32)
#define FAT_IOCTL_SET_ATTRIBUTES _IOW('r', 0x11, __u32)
-/*
- * vfat shortname flags
- */
-#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */
-#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */
-#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */
-#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */
-#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */
-
struct fat_boot_sector {
__u8 ignored[3]; /* Boot strap short or near jump */
__u8 system_id[8]; /* Name - can be used to special case
__u8 name11_12[4]; /* last 2 characters in name */
};
-struct fat_slot_info {
- loff_t i_pos; /* on-disk position of directory entry */
- loff_t slot_off; /* offset for slot or de start */
- int nr_slots; /* number of slots + 1(de) in filename */
- struct msdos_dir_entry *de;
- struct buffer_head *bh;
-};
-
#ifdef __KERNEL__
#include <linux/buffer_head.h>
#include <linux/fs.h>
#include <linux/mutex.h>
+/*
+ * vfat shortname flags
+ */
+#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */
+#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */
+#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */
+#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */
+#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */
+
struct fat_mount_options {
uid_t fs_uid;
gid_t fs_gid;
utf8:1, /* Use of UTF-8 character set (Default) */
unicode_xlate:1, /* create escape sequences for unhandled Unicode */
numtail:1, /* Does first alias have a numeric '~1' type tail? */
- atari:1, /* Use Atari GEMDOS variation of MS-DOS fs */
flush:1, /* write things quickly */
nocase:1, /* Does this need case conversion? 0=need case conversion*/
- usefree:1; /* Use free_clusters for FAT32 */
+ usefree:1, /* Use free_clusters for FAT32 */
+ tz_utc:1; /* Filesystem timestamps are in UTC */
};
#define FAT_HASH_BITS 8
struct inode vfs_inode;
};
+struct fat_slot_info {
+ loff_t i_pos; /* on-disk position of directory entry */
+ loff_t slot_off; /* offset for slot or de start */
+ int nr_slots; /* number of slots + 1(de) in filename */
+ struct msdos_dir_entry *de;
+ struct buffer_head *bh;
+};
+
static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb)
{
return sb->s_fs_info;
extern void fat_fs_panic(struct super_block *s, const char *fmt, ...);
extern void fat_clusters_flush(struct super_block *sb);
extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster);
-extern int date_dos2unix(unsigned short time, unsigned short date);
-extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date);
+extern int date_dos2unix(unsigned short time, unsigned short date, int tz_utc);
+extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date,
+ int tz_utc);
extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs);
int fat_cache_init(void);
* @size: how many physical eraseblocks are reserved for this volume
* @used_bytes: how many bytes of data this volume contains
* @used_ebs: how many physical eraseblocks of this volume actually contain any
- * data
+ * data
* @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME)
* @corrupted: non-zero if the volume is corrupted (static volumes only)
* @upd_marker: non-zero if the volume has update marker set
* @alignment: volume alignment
* @usable_leb_size: how many bytes are available in logical eraseblocks of
- * this volume
+ * this volume
* @name_len: volume name length
* @name: volume name
* @cdev: UBI volume character device major and minor numbers
int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum);
int ubi_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype);
int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum);
+int ubi_sync(int ubi_num);
/*
* This function is the same as the 'ubi_leb_read()' function, but it does not
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
-extern int net_msg_cost;
-extern int net_msg_burst;
+extern struct ratelimit_state net_ratelimit_state;
#endif
#endif /* __KERNEL__ */
#include <linux/types.h>
#include <linux/unistd.h>
-#include <linux/dirent.h>
#include <linux/fs.h>
#include <linux/posix_acl.h>
#include <linux/mount.h>
#define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */
#define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task,
* not handling interrupts, soon dead */
+#define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug
+ * lock is dropped */
/* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
* operation in progress
}
#ifdef CONFIG_CGROUP_NS
-int ns_cgroup_clone(struct task_struct *tsk);
+int ns_cgroup_clone(struct task_struct *tsk, struct pid *pid);
#else
-static inline int ns_cgroup_clone(struct task_struct *tsk) { return 0; }
+static inline int ns_cgroup_clone(struct task_struct *tsk, struct pid *pid)
+{
+ return 0;
+}
#endif
#endif
#define PCI_DEVICE_ID_INTEL_ICH10_4 0x3a30
#define PCI_DEVICE_ID_INTEL_ICH10_5 0x3a60
#define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f
+#define PCI_DEVICE_ID_INTEL_5100_16 0x65f0
+#define PCI_DEVICE_ID_INTEL_5100_21 0x65f5
+#define PCI_DEVICE_ID_INTEL_5100_22 0x65f6
#define PCI_DEVICE_ID_INTEL_5400_ERR 0x4030
#define PCI_DEVICE_ID_INTEL_5400_FBD0 0x4035
#define PCI_DEVICE_ID_INTEL_5400_FBD1 0x4036
*/
struct upid {
- /* Try to keep pid_chain in the same cacheline as nr for find_pid */
+ /* Try to keep pid_chain in the same cacheline as nr for find_vpid */
int nr;
struct pid_namespace *ns;
struct hlist_node pid_chain;
struct pid
{
atomic_t count;
+ unsigned int level;
/* lists of tasks that use this pid */
struct hlist_head tasks[PIDTYPE_MAX];
struct rcu_head rcu;
- unsigned int level;
struct upid numbers[1];
};
* or rcu_read_lock() held.
*
* find_pid_ns() finds the pid in the namespace specified
- * find_pid() find the pid by its global id, i.e. in the init namespace
* find_vpid() finr the pid by its virtual id, i.e. in the current namespace
*
- * see also find_task_by_pid() set in include/linux/sched.h
+ * see also find_task_by_vpid() set in include/linux/sched.h
*/
extern struct pid *find_pid_ns(int nr, struct pid_namespace *ns);
extern struct pid *find_vpid(int nr);
-extern struct pid *find_pid(int nr);
/*
* Lookup a PID in the hash table, and return with it's count elevated.
#define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
+struct bsd_acct_struct;
+
struct pid_namespace {
struct kref kref;
struct pidmap pidmap[PIDMAP_ENTRIES];
#ifdef CONFIG_PROC_FS
struct vfsmount *proc_mnt;
#endif
+#ifdef CONFIG_BSD_PROCESS_ACCT
+ struct bsd_acct_struct *bacct;
+#endif
};
extern struct pid_namespace init_pid_ns;
return tsk->nsproxy->pid_ns->child_reaper;
}
+void pidhash_init(void);
+void pidmap_init(void);
+
#endif /* _LINUX_PID_NS_H */
int pde_users; /* number of callers into module in progress */
spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
struct completion *pde_unload_completion;
+ struct list_head pde_openers; /* who did ->open, but not ->release */
};
struct kcore_list {
extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
extern const struct file_operations proc_kcore_operations;
-extern const struct file_operations proc_kmsg_operations;
extern const struct file_operations ppc_htab_operations;
extern int pid_ns_prepare_proc(struct pid_namespace *ns);
#include <asm/errno.h>
-extern int prof_on __read_mostly;
-
#define CPU_PROFILING 1
#define SCHED_PROFILING 2
#define SLEEP_PROFILING 3
struct pt_regs;
struct notifier_block;
+#if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS)
+void create_prof_cpu_mask(struct proc_dir_entry *de);
+#else
+static inline void create_prof_cpu_mask(struct proc_dir_entry *de)
+{
+}
+#endif
+
+enum profile_type {
+ PROFILE_TASK_EXIT,
+ PROFILE_MUNMAP
+};
+
+#ifdef CONFIG_PROFILING
+
+extern int prof_on __read_mostly;
+
/* init basic kernel profiler */
void __init profile_init(void);
-void profile_tick(int);
+void profile_tick(int type);
/*
* Add multiple profiler hits to a given address:
*/
-void profile_hits(int, void *ip, unsigned int nr_hits);
+void profile_hits(int type, void *ip, unsigned int nr_hits);
/*
* Single profiler hit:
profile_hits(type, ip, 1);
}
-#ifdef CONFIG_PROC_FS
-void create_prof_cpu_mask(struct proc_dir_entry *);
-#else
-#define create_prof_cpu_mask(x) do { (void)(x); } while (0)
-#endif
-
-enum profile_type {
- PROFILE_TASK_EXIT,
- PROFILE_MUNMAP
-};
-
-#ifdef CONFIG_PROFILING
-
struct task_struct;
struct mm_struct;
#else
+#define prof_on 0
+
+static inline void profile_init(void)
+{
+ return;
+}
+
+static inline void profile_tick(int type)
+{
+ return;
+}
+
+static inline void profile_hits(int type, void *ip, unsigned int nr_hits)
+{
+ return;
+}
+
+static inline void profile_hit(int type, void *ip)
+{
+ return;
+}
+
static inline int task_handoff_register(struct notifier_block * n)
{
return -ENOSYS;
#define __DQUOT_VERSION__ "dquot_6.5.1"
#define __DQUOT_NUM_VERSION__ 6*10000+5*100+1
-typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
-typedef __u64 qsize_t; /* Type in which we store sizes */
-
/* Size of blocks in which are counted size limits */
#define QUOTABLOCK_BITS 10
#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
#define QUOTA_NL_BHARDWARN 4 /* Block hardlimit reached */
#define QUOTA_NL_BSOFTLONGWARN 5 /* Block grace time expired */
#define QUOTA_NL_BSOFTWARN 6 /* Block softlimit reached */
+#define QUOTA_NL_IHARDBELOW 7 /* Usage got below inode hardlimit */
+#define QUOTA_NL_ISOFTBELOW 8 /* Usage got below inode softlimit */
+#define QUOTA_NL_BHARDBELOW 9 /* Usage got below block hardlimit */
+#define QUOTA_NL_BSOFTBELOW 10 /* Usage got below block softlimit */
enum {
QUOTA_NL_C_UNSPEC,
#include <asm/atomic.h>
+typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
+typedef __u64 qsize_t; /* Type in which we store sizes */
+
extern spinlock_t dq_data_lock;
/* Maximal numbers of writes for quota operation (insert/delete/update)
#define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B) /* Is info dirty? */
extern void mark_info_dirty(struct super_block *sb, int type);
-#define info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags)
-#define info_any_dquot_dirty(info) (!list_empty(&(info)->dqi_dirty_list))
-#define info_any_dirty(info) (info_dirty(info) || info_any_dquot_dirty(info))
-
-#define sb_dqopt(sb) (&(sb)->s_dquot)
-#define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type))
+static inline int info_dirty(struct mem_dqinfo *info)
+{
+ return test_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
+}
struct dqstats {
int lookups;
struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */
};
-#define sb_has_quota_enabled(sb, type) ((type)==USRQUOTA ? \
- (sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) : (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED))
-
-#define sb_any_quota_enabled(sb) (sb_has_quota_enabled(sb, USRQUOTA) | \
- sb_has_quota_enabled(sb, GRPQUOTA))
-
-#define sb_has_quota_suspended(sb, type) \
- ((type) == USRQUOTA ? (sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED) : \
- (sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED))
-
-#define sb_any_quota_suspended(sb) (sb_has_quota_suspended(sb, USRQUOTA) | \
- sb_has_quota_suspended(sb, GRPQUOTA))
-
int register_quota_format(struct quota_format_type *fmt);
void unregister_quota_format(struct quota_format_type *fmt);
#define _LINUX_QUOTAOPS_
#include <linux/smp_lock.h>
-
#include <linux/fs.h>
+static inline struct quota_info *sb_dqopt(struct super_block *sb)
+{
+ return &sb->s_dquot;
+}
+
#if defined(CONFIG_QUOTA)
/*
* declaration of quota_function calls in kernel.
*/
-extern void sync_dquots(struct super_block *sb, int type);
-
-extern int dquot_initialize(struct inode *inode, int type);
-extern int dquot_drop(struct inode *inode);
-
-extern int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
-extern int dquot_alloc_inode(const struct inode *inode, unsigned long number);
-
-extern int dquot_free_space(struct inode *inode, qsize_t number);
-extern int dquot_free_inode(const struct inode *inode, unsigned long number);
-
-extern int dquot_transfer(struct inode *inode, struct iattr *iattr);
-extern int dquot_commit(struct dquot *dquot);
-extern int dquot_acquire(struct dquot *dquot);
-extern int dquot_release(struct dquot *dquot);
-extern int dquot_commit_info(struct super_block *sb, int type);
-extern int dquot_mark_dquot_dirty(struct dquot *dquot);
-
-extern int vfs_quota_on(struct super_block *sb, int type, int format_id,
- char *path, int remount);
-extern int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
- int format_id, int type);
-extern int vfs_quota_off(struct super_block *sb, int type, int remount);
-extern int vfs_quota_sync(struct super_block *sb, int type);
-extern int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
-extern int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
-extern int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
-extern int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
+void sync_dquots(struct super_block *sb, int type);
+
+int dquot_initialize(struct inode *inode, int type);
+int dquot_drop(struct inode *inode);
+
+int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
+int dquot_alloc_inode(const struct inode *inode, unsigned long number);
+
+int dquot_free_space(struct inode *inode, qsize_t number);
+int dquot_free_inode(const struct inode *inode, unsigned long number);
+
+int dquot_transfer(struct inode *inode, struct iattr *iattr);
+int dquot_commit(struct dquot *dquot);
+int dquot_acquire(struct dquot *dquot);
+int dquot_release(struct dquot *dquot);
+int dquot_commit_info(struct super_block *sb, int type);
+int dquot_mark_dquot_dirty(struct dquot *dquot);
+
+int vfs_quota_on(struct super_block *sb, int type, int format_id,
+ char *path, int remount);
+int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
+ int format_id, int type);
+int vfs_quota_off(struct super_block *sb, int type, int remount);
+int vfs_quota_sync(struct super_block *sb, int type);
+int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
+int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
+int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
+int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
+
+void vfs_dq_drop(struct inode *inode);
+int vfs_dq_transfer(struct inode *inode, struct iattr *iattr);
+int vfs_dq_quota_on_remount(struct super_block *sb);
+
+static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type)
+{
+ return sb_dqopt(sb)->info + type;
+}
+
+/*
+ * Functions for checking status of quota
+ */
+
+static inline int sb_has_quota_enabled(struct super_block *sb, int type)
+{
+ if (type == USRQUOTA)
+ return sb_dqopt(sb)->flags & DQUOT_USR_ENABLED;
+ return sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED;
+}
+
+static inline int sb_any_quota_enabled(struct super_block *sb)
+{
+ return sb_has_quota_enabled(sb, USRQUOTA) ||
+ sb_has_quota_enabled(sb, GRPQUOTA);
+}
+
+static inline int sb_has_quota_suspended(struct super_block *sb, int type)
+{
+ if (type == USRQUOTA)
+ return sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED;
+ return sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED;
+}
+
+static inline int sb_any_quota_suspended(struct super_block *sb)
+{
+ return sb_has_quota_suspended(sb, USRQUOTA) ||
+ sb_has_quota_suspended(sb, GRPQUOTA);
+}
/*
* Operations supported for diskquotas.
/* It is better to call this function outside of any transaction as it might
* need a lot of space in journal for dquot structure allocation. */
-static inline void DQUOT_INIT(struct inode *inode)
+static inline void vfs_dq_init(struct inode *inode)
{
BUG_ON(!inode->i_sb);
if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode))
inode->i_sb->dq_op->initialize(inode, -1);
}
-/* The same as with DQUOT_INIT */
-static inline void DQUOT_DROP(struct inode *inode)
-{
- /* Here we can get arbitrary inode from clear_inode() so we have
- * to be careful. OTOH we don't need locking as quota operations
- * are allowed to change only at mount time */
- if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
- && inode->i_sb->dq_op->drop) {
- int cnt;
- /* Test before calling to rule out calls from proc and such
- * where we are not allowed to block. Note that this is
- * actually reliable test even without the lock - the caller
- * must assure that nobody can come after the DQUOT_DROP and
- * add quota pointers back anyway */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- if (inode->i_dquot[cnt] != NODQUOT)
- break;
- if (cnt < MAXQUOTAS)
- inode->i_sb->dq_op->drop(inode);
- }
-}
-
/* The following allocation/freeing/transfer functions *must* be called inside
* a transaction (deadlocks possible otherwise) */
-static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
{
if (sb_any_quota_enabled(inode->i_sb)) {
/* Used space is updated in alloc_space() */
return 0;
}
-static inline int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
{
int ret;
- if (!(ret = DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr)))
+ if (!(ret = vfs_dq_prealloc_space_nodirty(inode, nr)))
mark_inode_dirty(inode);
return ret;
}
-static inline int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
{
if (sb_any_quota_enabled(inode->i_sb)) {
/* Used space is updated in alloc_space() */
return 0;
}
-static inline int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
{
int ret;
- if (!(ret = DQUOT_ALLOC_SPACE_NODIRTY(inode, nr)))
+ if (!(ret = vfs_dq_alloc_space_nodirty(inode, nr)))
mark_inode_dirty(inode);
return ret;
}
-static inline int DQUOT_ALLOC_INODE(struct inode *inode)
+static inline int vfs_dq_alloc_inode(struct inode *inode)
{
if (sb_any_quota_enabled(inode->i_sb)) {
- DQUOT_INIT(inode);
+ vfs_dq_init(inode);
if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA)
return 1;
}
return 0;
}
-static inline void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
{
if (sb_any_quota_enabled(inode->i_sb))
inode->i_sb->dq_op->free_space(inode, nr);
inode_sub_bytes(inode, nr);
}
-static inline void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
{
- DQUOT_FREE_SPACE_NODIRTY(inode, nr);
+ vfs_dq_free_space_nodirty(inode, nr);
mark_inode_dirty(inode);
}
-static inline void DQUOT_FREE_INODE(struct inode *inode)
+static inline void vfs_dq_free_inode(struct inode *inode)
{
if (sb_any_quota_enabled(inode->i_sb))
inode->i_sb->dq_op->free_inode(inode, 1);
}
-static inline int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr)
-{
- if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
- DQUOT_INIT(inode);
- if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
- return 1;
- }
- return 0;
-}
-
/* The following two functions cannot be called inside a transaction */
-static inline void DQUOT_SYNC(struct super_block *sb)
+static inline void vfs_dq_sync(struct super_block *sb)
{
sync_dquots(sb, -1);
}
-static inline int DQUOT_OFF(struct super_block *sb, int remount)
+static inline int vfs_dq_off(struct super_block *sb, int remount)
{
int ret = -ENOSYS;
return ret;
}
-static inline int DQUOT_ON_REMOUNT(struct super_block *sb)
+#else
+
+static inline int sb_has_quota_enabled(struct super_block *sb, int type)
{
- int cnt;
- int ret = 0, err;
+ return 0;
+}
- if (!sb->s_qcop || !sb->s_qcop->quota_on)
- return -ENOSYS;
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1);
- if (err < 0 && !ret)
- ret = err;
- }
- return ret;
+static inline int sb_any_quota_enabled(struct super_block *sb)
+{
+ return 0;
}
-#else
+static inline int sb_has_quota_suspended(struct super_block *sb, int type)
+{
+ return 0;
+}
+
+static inline int sb_any_quota_suspended(struct super_block *sb)
+{
+ return 0;
+}
/*
* NO-OP when quota not configured.
#define sb_dquot_ops (NULL)
#define sb_quotactl_ops (NULL)
-static inline void DQUOT_INIT(struct inode *inode)
+static inline void vfs_dq_init(struct inode *inode)
{
}
-static inline void DQUOT_DROP(struct inode *inode)
+static inline void vfs_dq_drop(struct inode *inode)
{
}
-static inline int DQUOT_ALLOC_INODE(struct inode *inode)
+static inline int vfs_dq_alloc_inode(struct inode *inode)
{
return 0;
}
-static inline void DQUOT_FREE_INODE(struct inode *inode)
+static inline void vfs_dq_free_inode(struct inode *inode)
{
}
-static inline void DQUOT_SYNC(struct super_block *sb)
+static inline void vfs_dq_sync(struct super_block *sb)
{
}
-static inline int DQUOT_OFF(struct super_block *sb, int remount)
+static inline int vfs_dq_off(struct super_block *sb, int remount)
{
return 0;
}
-static inline int DQUOT_ON_REMOUNT(struct super_block *sb)
+static inline int vfs_dq_quota_on_remount(struct super_block *sb)
{
return 0;
}
-static inline int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr)
+static inline int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
{
return 0;
}
-static inline int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
{
inode_add_bytes(inode, nr);
return 0;
}
-static inline int DQUOT_PREALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
{
- DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr);
+ vfs_dq_prealloc_space_nodirty(inode, nr);
mark_inode_dirty(inode);
return 0;
}
-static inline int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
{
inode_add_bytes(inode, nr);
return 0;
}
-static inline int DQUOT_ALLOC_SPACE(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
{
- DQUOT_ALLOC_SPACE_NODIRTY(inode, nr);
+ vfs_dq_alloc_space_nodirty(inode, nr);
mark_inode_dirty(inode);
return 0;
}
-static inline void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
{
inode_sub_bytes(inode, nr);
}
-static inline void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
{
- DQUOT_FREE_SPACE_NODIRTY(inode, nr);
+ vfs_dq_free_space_nodirty(inode, nr);
mark_inode_dirty(inode);
}
#endif /* CONFIG_QUOTA */
-static inline int DQUOT_PREALLOC_BLOCK_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_block_nodirty(struct inode *inode, qsize_t nr)
{
- return DQUOT_PREALLOC_SPACE_NODIRTY(inode,
+ return vfs_dq_prealloc_space_nodirty(inode,
nr << inode->i_sb->s_blocksize_bits);
}
-static inline int DQUOT_PREALLOC_BLOCK(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_prealloc_block(struct inode *inode, qsize_t nr)
{
- return DQUOT_PREALLOC_SPACE(inode,
+ return vfs_dq_prealloc_space(inode,
nr << inode->i_sb->s_blocksize_bits);
}
-static inline int DQUOT_ALLOC_BLOCK_NODIRTY(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_block_nodirty(struct inode *inode, qsize_t nr)
{
- return DQUOT_ALLOC_SPACE_NODIRTY(inode,
+ return vfs_dq_alloc_space_nodirty(inode,
nr << inode->i_sb->s_blocksize_bits);
}
-static inline int DQUOT_ALLOC_BLOCK(struct inode *inode, qsize_t nr)
+static inline int vfs_dq_alloc_block(struct inode *inode, qsize_t nr)
{
- return DQUOT_ALLOC_SPACE(inode,
+ return vfs_dq_alloc_space(inode,
nr << inode->i_sb->s_blocksize_bits);
}
-static inline void DQUOT_FREE_BLOCK_NODIRTY(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_block_nodirty(struct inode *inode, qsize_t nr)
{
- DQUOT_FREE_SPACE_NODIRTY(inode, nr << inode->i_sb->s_blocksize_bits);
+ vfs_dq_free_space_nodirty(inode, nr << inode->i_sb->s_blocksize_bits);
}
-static inline void DQUOT_FREE_BLOCK(struct inode *inode, qsize_t nr)
+static inline void vfs_dq_free_block(struct inode *inode, qsize_t nr)
{
- DQUOT_FREE_SPACE(inode, nr << inode->i_sb->s_blocksize_bits);
+ vfs_dq_free_space(inode, nr << inode->i_sb->s_blocksize_bits);
}
+/*
+ * Define uppercase equivalents for compatibility with old function names
+ * Can go away when we think all users have been converted (15/04/2008)
+ */
+#define DQUOT_INIT(inode) vfs_dq_init(inode)
+#define DQUOT_DROP(inode) vfs_dq_drop(inode)
+#define DQUOT_PREALLOC_SPACE_NODIRTY(inode, nr) \
+ vfs_dq_prealloc_space_nodirty(inode, nr)
+#define DQUOT_PREALLOC_SPACE(inode, nr) vfs_dq_prealloc_space(inode, nr)
+#define DQUOT_ALLOC_SPACE_NODIRTY(inode, nr) \
+ vfs_dq_alloc_space_nodirty(inode, nr)
+#define DQUOT_ALLOC_SPACE(inode, nr) vfs_dq_alloc_space(inode, nr)
+#define DQUOT_PREALLOC_BLOCK_NODIRTY(inode, nr) \
+ vfs_dq_prealloc_block_nodirty(inode, nr)
+#define DQUOT_PREALLOC_BLOCK(inode, nr) vfs_dq_prealloc_block(inode, nr)
+#define DQUOT_ALLOC_BLOCK_NODIRTY(inode, nr) \
+ vfs_dq_alloc_block_nodirty(inode, nr)
+#define DQUOT_ALLOC_BLOCK(inode, nr) vfs_dq_alloc_block(inode, nr)
+#define DQUOT_ALLOC_INODE(inode) vfs_dq_alloc_inode(inode)
+#define DQUOT_FREE_SPACE_NODIRTY(inode, nr) \
+ vfs_dq_free_space_nodirty(inode, nr)
+#define DQUOT_FREE_SPACE(inode, nr) vfs_dq_free_space(inode, nr)
+#define DQUOT_FREE_BLOCK_NODIRTY(inode, nr) \
+ vfs_dq_free_block_nodirty(inode, nr)
+#define DQUOT_FREE_BLOCK(inode, nr) vfs_dq_free_block(inode, nr)
+#define DQUOT_FREE_INODE(inode) vfs_dq_free_inode(inode)
+#define DQUOT_TRANSFER(inode, iattr) vfs_dq_transfer(inode, iattr)
+#define DQUOT_SYNC(sb) vfs_dq_sync(sb)
+#define DQUOT_OFF(sb, remount) vfs_dq_off(sb, remount)
+#define DQUOT_ON_REMOUNT(sb) vfs_dq_quota_on_remount(sb)
+
#endif /* _LINUX_QUOTAOPS_ */
--- /dev/null
+#ifndef _LINUX_RATELIMIT_H
+#define _LINUX_RATELIMIT_H
+#include <linux/param.h>
+
+#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ)
+#define DEFAULT_RATELIMIT_BURST 10
+
+struct ratelimit_state {
+ int interval;
+ int burst;
+ int printed;
+ int missed;
+ unsigned long begin;
+};
+
+#define DEFINE_RATELIMIT_STATE(name, interval, burst) \
+ struct ratelimit_state name = {interval, burst,}
+
+extern int __ratelimit(struct ratelimit_state *rs);
+
+static inline int ratelimit(void)
+{
+ static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+ return __ratelimit(&rs);
+}
+#endif
static inline void rcu_enter_nohz(void)
{
+ static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
+
smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
__get_cpu_var(rcu_dyntick_sched).dynticks++;
- WARN_ON(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1);
+ WARN_ON_RATELIMIT(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1, &rs);
}
static inline void rcu_exit_nohz(void)
{
+ static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
+
smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
__get_cpu_var(rcu_dyntick_sched).dynticks++;
- WARN_ON(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1));
+ WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1),
+ &rs);
}
#else /* CONFIG_NO_HZ */
** p is the array of __u32, i is the index into the array, v is the value
** to store there.
*/
-#define get_block_num(p, i) le32_to_cpu(get_unaligned((p) + (i)))
-#define put_block_num(p, i, v) put_unaligned(cpu_to_le32(v), (p) + (i))
+#define get_block_num(p, i) get_unaligned_le32((p) + (i))
+#define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i))
//
// in old version uniqueness field shows key type
atomic_t j_nonzerolen;
atomic_t j_commit_left;
atomic_t j_older_commits_done; /* all commits older than this on disk */
- struct semaphore j_commit_lock;
+ struct mutex j_commit_mutex;
unsigned long j_trans_id;
time_t j_timestamp;
struct reiserfs_list_bitmap *j_list_bitmap;
struct buffer_head *j_header_bh;
time_t j_trans_start_time; /* time this transaction started */
- struct semaphore j_lock;
- struct semaphore j_flush_sem;
+ struct mutex j_mutex;
+ struct mutex j_flush_mutex;
wait_queue_head_t j_join_wait; /* wait for current transaction to finish before starting new one */
atomic_t j_jlock; /* lock for j_join_wait */
int j_list_bitmap_index; /* number of next list bitmap to use */
ssize_t res_counter_read(struct res_counter *counter, int member,
const char __user *buf, size_t nbytes, loff_t *pos,
int (*read_strategy)(unsigned long long val, char *s));
-ssize_t res_counter_write(struct res_counter *counter, int member,
- const char __user *buf, size_t nbytes, loff_t *pos,
- int (*write_strategy)(char *buf, unsigned long long *val));
+
+typedef int (*write_strategy_fn)(const char *buf, unsigned long long *val);
+
+int res_counter_memparse_write_strategy(const char *buf,
+ unsigned long long *res);
+
+int res_counter_write(struct res_counter *counter, int member,
+ const char *buffer, write_strategy_fn write_strategy);
/*
* the field descriptors. one for each member of res_counter
* counter->limit _locked call expects the counter->lock to be taken
*/
-int res_counter_charge_locked(struct res_counter *counter, unsigned long val);
-int res_counter_charge(struct res_counter *counter, unsigned long val);
+int __must_check res_counter_charge_locked(struct res_counter *counter,
+ unsigned long val);
+int __must_check res_counter_charge(struct res_counter *counter,
+ unsigned long val);
/*
* uncharge - tell that some portion of the resource is released
cnt->failcnt = 0;
spin_unlock_irqrestore(&cnt->lock, flags);
}
+
+static inline int res_counter_set_limit(struct res_counter *cnt,
+ unsigned long long limit)
+{
+ unsigned long flags;
+ int ret = -EBUSY;
+
+ spin_lock_irqsave(&cnt->lock, flags);
+ if (cnt->usage < limit) {
+ cnt->limit = limit;
+ ret = 0;
+ }
+ spin_unlock_irqrestore(&cnt->lock, flags);
+ return ret;
+}
+
#endif
unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
unsigned long inblock, oublock, cinblock, coublock;
+#ifdef CONFIG_TASK_XACCT
+ u64 rchar, wchar, syscr, syscw;
+#endif
+ struct task_io_accounting ioac;
/*
* Cumulative ns of scheduled CPU time for dead threads in the
/* io operations performed */
u32 swapin_count; /* total count of the number of swapin block */
/* io operations performed */
+
+ struct timespec freepages_start, freepages_end;
+ u64 freepages_delay; /* wait for memory reclaim */
+ u32 freepages_count; /* total count of memory reclaim */
};
#endif /* CONFIG_TASK_DELAY_ACCT */
#if defined(CONFIG_TASK_XACCT)
u64 acct_rss_mem1; /* accumulated rss usage */
u64 acct_vm_mem1; /* accumulated virtual memory usage */
- cputime_t acct_stimexpd;/* stime since last update */
+ cputime_t acct_timexpd; /* stime + utime since last update */
#endif
#ifdef CONFIG_CPUSETS
nodemask_t mems_allowed;
#define PF_KSWAPD 0x00040000 /* I am kswapd */
#define PF_SWAPOFF 0x00080000 /* I am in swapoff */
#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */
-#define PF_BORROWED_MM 0x00200000 /* I am a kthread doing use_mm */
+#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
#define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */
* finds a task by its pid in the specified namespace
* find_task_by_vpid():
* finds a task by its virtual pid
- * find_task_by_pid():
- * finds a task by its global pid
*
- * see also find_pid() etc in include/linux/pid.h
+ * see also find_vpid() etc in include/linux/pid.h
*/
extern struct task_struct *find_task_by_pid_type_ns(int type, int pid,
struct pid_namespace *ns);
-static inline struct task_struct *__deprecated find_task_by_pid(pid_t nr)
-{
- return find_task_by_pid_type_ns(PIDTYPE_PID, nr, &init_pid_ns);
-}
extern struct task_struct *find_task_by_vpid(pid_t nr);
extern struct task_struct *find_task_by_pid_ns(pid_t nr,
struct pid_namespace *ns);
extern void force_sig_specific(int, struct task_struct *);
extern int send_sig(int, struct task_struct *, int);
extern void zap_other_threads(struct task_struct *p);
-extern int kill_proc(pid_t, int, int);
extern struct sigqueue *sigqueue_alloc(void);
extern void sigqueue_free(struct sigqueue *);
extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group);
if (!signal_pending(p))
return 0;
- if (state & (__TASK_STOPPED | __TASK_TRACED))
- return 0;
-
return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
}
#ifdef __KERNEL__
#include <asm/atomic.h>
+#include <linux/rcupdate.h>
struct task_struct;
time_t sem_otime; /* last semop time */
time_t sem_ctime; /* last change time */
struct sem *sem_base; /* ptr to first semaphore in array */
- struct sem_queue *sem_pending; /* pending operations to be processed */
- struct sem_queue **sem_pending_last; /* last pending operation */
- struct sem_undo *undo; /* undo requests on this array */
+ struct list_head sem_pending; /* pending operations to be processed */
+ struct list_head list_id; /* undo requests on this array */
unsigned long sem_nsems; /* no. of semaphores in array */
};
/* One queue for each sleeping process in the system. */
struct sem_queue {
- struct sem_queue * next; /* next entry in the queue */
- struct sem_queue ** prev; /* previous entry in the queue, *(q->prev) == q */
- struct task_struct* sleeper; /* this process */
- struct sem_undo * undo; /* undo structure */
+ struct list_head list; /* queue of pending operations */
+ struct task_struct *sleeper; /* this process */
+ struct sem_undo *undo; /* undo structure */
int pid; /* process id of requesting process */
int status; /* completion status of operation */
- struct sem_array * sma; /* semaphore array for operations */
- int id; /* internal sem id */
- struct sembuf * sops; /* array of pending operations */
+ struct sembuf *sops; /* array of pending operations */
int nsops; /* number of operations */
int alter; /* does the operation alter the array? */
};
* when the process exits.
*/
struct sem_undo {
- struct sem_undo * proc_next; /* next entry on this process */
- struct sem_undo * id_next; /* next entry on this semaphore set */
+ struct list_head list_proc; /* per-process list: all undos from one process. */
+ /* rcu protected */
+ struct rcu_head rcu; /* rcu struct for sem_undo() */
+ struct sem_undo_list *ulp; /* sem_undo_list for the process */
+ struct list_head list_id; /* per semaphore array list: all undos for one array */
int semid; /* semaphore set identifier */
short * semadj; /* array of adjustments, one per semaphore */
};
* that may be shared among all a CLONE_SYSVSEM task group.
*/
struct sem_undo_list {
- atomic_t refcnt;
- spinlock_t lock;
- struct sem_undo *proc_list;
+ atomic_t refcnt;
+ spinlock_t lock;
+ struct list_head list_proc;
};
struct sysv_sem {
unsigned long set,
unsigned long clear);
-/* sm501_gpio_set
- *
- * set the state of the given GPIO line
-*/
-
-extern void sm501_gpio_set(struct device *dev,
- unsigned long gpio,
- unsigned int to,
- unsigned int dir);
-
-/* sm501_gpio_get
- *
- * get the state of the given GPIO line
-*/
-
-extern unsigned long sm501_gpio_get(struct device *dev,
- unsigned long gpio);
-
/* Platform data definitions */
struct sm501_platdata_fbsub *fb_pnl;
};
-/* gpio i2c */
+/* gpio i2c
+ *
+ * Note, we have to pass in the bus number, as the number used will be
+ * passed to the i2c-gpio driver's platform_device.id, subsequently used
+ * to register the i2c bus.
+*/
struct sm501_platdata_gpio_i2c {
+ unsigned int bus_num;
unsigned int pin_sda;
unsigned int pin_scl;
+ int udelay;
+ int timeout;
};
/* sm501_initdata
#define SM501_USE_FBACCEL (1<<6)
#define SM501_USE_AC97 (1<<7)
#define SM501_USE_I2S (1<<8)
+#define SM501_USE_GPIO (1<<9)
#define SM501_USE_ALL (0xffffffff)
struct sm501_reg_init gpio_ddr_high;
};
+#define SM501_FLAG_SUSPEND_OFF (1<<4)
+
/* sm501_platdata
*
* This is passed with the platform device to allow the board
struct sm501_init_gpio *init_gpiop;
struct sm501_platdata_fb *fb;
+ int flags;
+ int gpio_base;
+
+ int (*get_power)(struct device *dev);
+ int (*set_power)(struct device *dev, unsigned int on);
+
struct sm501_platdata_gpio_i2c *gpio_i2c;
unsigned int gpio_i2c_nr;
};
}
/* macro names are short for word, double-word, long value (?) */
-#define WVAL(buf,pos) \
- (le16_to_cpu(get_unaligned((__le16 *)((u8 *)(buf) + (pos)))))
-#define DVAL(buf,pos) \
- (le32_to_cpu(get_unaligned((__le32 *)((u8 *)(buf) + (pos)))))
-#define LVAL(buf,pos) \
- (le64_to_cpu(get_unaligned((__le64 *)((u8 *)(buf) + (pos)))))
-#define WSET(buf,pos,val) \
- put_unaligned(cpu_to_le16((u16)(val)), (__le16 *)((u8 *)(buf) + (pos)))
-#define DSET(buf,pos,val) \
- put_unaligned(cpu_to_le32((u32)(val)), (__le32 *)((u8 *)(buf) + (pos)))
-#define LSET(buf,pos,val) \
- put_unaligned(cpu_to_le64((u64)(val)), (__le64 *)((u8 *)(buf) + (pos)))
+#define WVAL(buf, pos) (get_unaligned_le16((u8 *)(buf) + (pos)))
+#define DVAL(buf, pos) (get_unaligned_le32((u8 *)(buf) + (pos)))
+#define LVAL(buf, pos) (get_unaligned_le64((u8 *)(buf) + (pos)))
+
+#define WSET(buf, pos, val) put_unaligned_le16((val), (u8 *)(buf) + (pos))
+#define DSET(buf, pos, val) put_unaligned_le32((val), (u8 *)(buf) + (pos))
+#define LSET(buf, pos, val) put_unaligned_le64((val), (u8 *)(buf) + (pos))
/* where to find the base of the SMB packet proper */
#define smb_base(buf) ((u8 *)(((u8 *)(buf))+4))
-/* FIXME driver should be able to handle all four slaves that
- * can be hooked up to each chipselect, as well as IRQs...
- */
+/* FIXME driver should be able to handle IRQs... */
+
+struct mcp23s08_chip_info {
+ bool is_present; /* true iff populated */
+ u8 pullups; /* BIT(x) means enable pullup x */
+};
struct mcp23s08_platform_data {
- /* four slaves can share one SPI chipselect */
- u8 slave;
+ /* Four slaves (numbered 0..3) can share one SPI chipselect, and
+ * will provide 8..32 GPIOs using 1..4 gpio_chip instances.
+ */
+ struct mcp23s08_chip_info chip[4];
- /* number assigned to the first GPIO */
+ /* "base" is the number of the first GPIO. Dynamic assignment is
+ * not currently supported, and even if there are gaps in chip
+ * addressing the GPIO numbers are sequential .. so for example
+ * if only slaves 0 and 3 are present, their GPIOs range from
+ * base to base+15.
+ */
unsigned base;
- /* pins with pullups */
- u8 pullups;
-
void *context; /* param to setup/teardown */
int (*setup)(struct spi_device *spi,
* linux/spinlock.h: builds the final spin_*() APIs.
*/
+#include <linux/typecheck.h>
#include <linux/preempt.h>
#include <linux/linkage.h>
#include <linux/compiler.h>
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
-#define spin_lock_irqsave(lock, flags) flags = _spin_lock_irqsave(lock)
-#define read_lock_irqsave(lock, flags) flags = _read_lock_irqsave(lock)
-#define write_lock_irqsave(lock, flags) flags = _write_lock_irqsave(lock)
+#define spin_lock_irqsave(lock, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ flags = _spin_lock_irqsave(lock); \
+ } while (0)
+#define read_lock_irqsave(lock, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ flags = _read_lock_irqsave(lock); \
+ } while (0)
+#define write_lock_irqsave(lock, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ flags = _write_lock_irqsave(lock); \
+ } while (0)
#ifdef CONFIG_DEBUG_LOCK_ALLOC
-#define spin_lock_irqsave_nested(lock, flags, subclass) \
- flags = _spin_lock_irqsave_nested(lock, subclass)
+#define spin_lock_irqsave_nested(lock, flags, subclass) \
+ do { \
+ typecheck(unsigned long, flags); \
+ flags = _spin_lock_irqsave_nested(lock, subclass); \
+ } while (0)
#else
-#define spin_lock_irqsave_nested(lock, flags, subclass) \
- flags = _spin_lock_irqsave(lock)
+#define spin_lock_irqsave_nested(lock, flags, subclass) \
+ do { \
+ typecheck(unsigned long, flags); \
+ flags = _spin_lock_irqsave(lock); \
+ } while (0)
#endif
#else
-#define spin_lock_irqsave(lock, flags) _spin_lock_irqsave(lock, flags)
-#define read_lock_irqsave(lock, flags) _read_lock_irqsave(lock, flags)
-#define write_lock_irqsave(lock, flags) _write_lock_irqsave(lock, flags)
+#define spin_lock_irqsave(lock, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ _spin_lock_irqsave(lock, flags); \
+ } while (0)
+#define read_lock_irqsave(lock, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ _read_lock_irqsave(lock, flags); \
+ } while (0)
+#define write_lock_irqsave(lock, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ _write_lock_irqsave(lock, flags); \
+ } while (0)
#define spin_lock_irqsave_nested(lock, flags, subclass) \
spin_lock_irqsave(lock, flags)
} while (0)
#endif
-#define spin_unlock_irqrestore(lock, flags) \
- _spin_unlock_irqrestore(lock, flags)
+#define spin_unlock_irqrestore(lock, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ _spin_unlock_irqrestore(lock, flags); \
+ } while (0)
#define spin_unlock_bh(lock) _spin_unlock_bh(lock)
-#define read_unlock_irqrestore(lock, flags) \
- _read_unlock_irqrestore(lock, flags)
+#define read_unlock_irqrestore(lock, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ _read_unlock_irqrestore(lock, flags); \
+ } while (0)
#define read_unlock_bh(lock) _read_unlock_bh(lock)
-#define write_unlock_irqrestore(lock, flags) \
- _write_unlock_irqrestore(lock, flags)
+#define write_unlock_irqrestore(lock, flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ _write_unlock_irqrestore(lock, flags); \
+ } while (0)
#define write_unlock_bh(lock) _write_unlock_bh(lock)
#define spin_trylock_bh(lock) __cond_lock(lock, _spin_trylock_bh(lock))
asmlinkage long sys_connect(int, struct sockaddr __user *, int);
asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *);
asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *,
- const sigset_t *, size_t, int);
+ const __user sigset_t *, size_t, int);
asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *);
asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *);
asmlinkage long sys_send(int, void __user *, size_t, unsigned);
*/
-#define TASKSTATS_VERSION 6
+#define TASKSTATS_VERSION 7
#define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
* in linux/sched.h */
__u64 ac_utimescaled; /* utime scaled on frequency etc */
__u64 ac_stimescaled; /* stime scaled on frequency etc */
__u64 cpu_scaled_run_real_total; /* scaled cpu_run_real_total */
+
+ /* Delay waiting for memory reclaim */
+ __u64 freepages_count;
+ __u64 freepages_delay_total;
};
--- /dev/null
+#ifndef TYPECHECK_H_INCLUDED
+#define TYPECHECK_H_INCLUDED
+
+/*
+ * Check at compile time that something is of a particular type.
+ * Always evaluates to 1 so you may use it easily in comparisons.
+ */
+#define typecheck(type,x) \
+({ type __dummy; \
+ typeof(x) __dummy2; \
+ (void)(&__dummy == &__dummy2); \
+ 1; \
+})
+
+/*
+ * Check at compile time that 'function' is a certain type, or is a pointer
+ * to that type (needs to use typedef for the function type.)
+ */
+#define typecheck_fn(type,function) \
+({ typeof(type) __tmp = function; \
+ (void)__tmp; \
+})
+
+#endif /* TYPECHECK_H_INCLUDED */
dev_vdbg(&(d)->gadget->dev , fmt , ## args)
#define ERROR(d, fmt, args...) \
dev_err(&(d)->gadget->dev , fmt , ## args)
-#define WARN(d, fmt, args...) \
+#define WARNING(d, fmt, args...) \
dev_warn(&(d)->gadget->dev , fmt , ## args)
#define INFO(d, fmt, args...) \
dev_info(&(d)->gadget->dev , fmt , ## args)
#ifndef _LINUX_VIRTIO_9P_H
#define _LINUX_VIRTIO_9P_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
#include <linux/virtio_config.h>
/* The ID for virtio console */
#ifndef _LINUX_VIRTIO_BALLOON_H
#define _LINUX_VIRTIO_BALLOON_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
#include <linux/virtio_config.h>
/* The ID for virtio_balloon */
#ifndef _LINUX_VIRTIO_BLK_H
#define _LINUX_VIRTIO_BLK_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
#include <linux/virtio_config.h>
/* The ID for virtio_block */
#define VIRTIO_BLK_F_SEG_MAX 2 /* Indicates maximum # of segments */
#define VIRTIO_BLK_F_GEOMETRY 4 /* Legacy geometry available */
#define VIRTIO_BLK_F_RO 5 /* Disk is read-only */
+#define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/
struct virtio_blk_config
{
__u8 heads;
__u8 sectors;
} geometry;
+ /* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */
+ __u32 blk_size;
} __attribute__((packed));
/* These two define direction. */
#ifndef _LINUX_VIRTIO_CONFIG_H
#define _LINUX_VIRTIO_CONFIG_H
+/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
+ * anyone can use the definitions to implement compatible drivers/servers. */
+
/* Virtio devices use a standardized configuration space to define their
* features and pass configuration information, but each implementation can
* store and access that space differently. */
/* We've given up on this device. */
#define VIRTIO_CONFIG_S_FAILED 0x80
+/* Some virtio feature bits (currently bits 28 through 31) are reserved for the
+ * transport being used (eg. virtio_ring), the rest are per-device feature
+ * bits. */
+#define VIRTIO_TRANSPORT_F_START 28
+#define VIRTIO_TRANSPORT_F_END 32
+
/* Do we get callbacks when the ring is completely used, even if we've
* suppressed them? */
#define VIRTIO_F_NOTIFY_ON_EMPTY 24
* @get_features: get the array of feature bits for this device.
* vdev: the virtio_device
* Returns the first 32 feature bits (all we currently need).
- * @set_features: confirm what device features we'll be using.
+ * @finalize_features: confirm what device features we'll be using.
* vdev: the virtio_device
- * feature: the first 32 feature bits
+ * This gives the final feature bits for the device: it can change
+ * the dev->feature bits if it wants.
*/
struct virtio_config_ops
{
void (*callback)(struct virtqueue *));
void (*del_vq)(struct virtqueue *vq);
u32 (*get_features)(struct virtio_device *vdev);
- void (*set_features)(struct virtio_device *vdev, u32 features);
+ void (*finalize_features)(struct virtio_device *vdev);
};
/* If driver didn't advertise the feature, it will never appear. */
#ifndef _LINUX_VIRTIO_CONSOLE_H
#define _LINUX_VIRTIO_CONSOLE_H
#include <linux/virtio_config.h>
+/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
+ * anyone can use the definitions to implement compatible drivers/servers. */
/* The ID for virtio console */
#define VIRTIO_ID_CONSOLE 3
#ifndef _LINUX_VIRTIO_NET_H
#define _LINUX_VIRTIO_NET_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
#include <linux/virtio_config.h>
/* The ID for virtio_net */
* Authors:
* Anthony Liguori <aliguori@us.ibm.com>
*
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
*/
#ifndef _LINUX_VIRTIO_PCI_H
void (*notify)(struct virtqueue *vq),
void (*callback)(struct virtqueue *vq));
void vring_del_virtqueue(struct virtqueue *vq);
+/* Filter out transport-specific feature bits. */
+void vring_transport_features(struct virtio_device *vdev);
irqreturn_t vring_interrupt(int irq, void *_vq);
#endif /* __KERNEL__ */
#ifndef _LINUX_VIRTIO_RNG_H
#define _LINUX_VIRTIO_RNG_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers. */
#include <linux/virtio_config.h>
/* The ID for virtio_rng */
#include <linux/mutex.h>
#include <linux/console_struct.h>
#include <linux/mm.h>
+#include <linux/consolemap.h>
/*
* Presently, a lot of graphics programs do not restore the contents of
struct tty_struct;
int tioclinux(struct tty_struct *tty, unsigned long arg);
+#ifdef CONFIG_CONSOLE_TRANSLATIONS
/* consolemap.c */
struct unimapinit;
void con_protect_unimap(struct vc_data *vc, int rdonly);
int con_copy_unimap(struct vc_data *dst_vc, struct vc_data *src_vc);
+#define vc_translate(vc, c) ((vc)->vc_translate[(c) | \
+ (vc)->vc_toggle_meta ? 0x80 : 0])
+#else
+#define con_set_trans_old(arg) (0)
+#define con_get_trans_old(arg) (-EINVAL)
+#define con_set_trans_new(arg) (0)
+#define con_get_trans_new(arg) (-EINVAL)
+#define con_clear_unimap(vc, ui) (0)
+#define con_set_unimap(vc, ct, list) (0)
+#define con_set_default_unimap(vc) (0)
+#define con_copy_unimap(d, s) (0)
+#define con_get_unimap(vc, ct, uct, list) (-EINVAL)
+#define con_free_unimap(vc) do { ; } while (0)
+
+#define vc_translate(vc, c) (c)
+#endif
+
/* vt.c */
int vt_waitactive(int vt);
void change_console(struct vc_data *new_vc);
extern void init_workqueues(void);
int execute_in_process_context(work_func_t fn, struct execute_work *);
+extern int flush_work(struct work_struct *work);
+
extern int cancel_work_sync(struct work_struct *work);
/*
* device should be used. A &struct ubi_rsvol_req object has to be properly
* filled and a pointer to it has to be passed to the IOCTL.
*
+ * UBI volumes re-name
+ * ~~~~~~~~~~~~~~~~~~~
+ *
+ * To re-name several volumes atomically at one go, the %UBI_IOCRNVOL command
+ * of the UBI character device should be used. A &struct ubi_rnvol_req object
+ * has to be properly filled and a pointer to it has to be passed to the IOCTL.
+ *
* UBI volume update
* ~~~~~~~~~~~~~~~~~
*
#define UBI_IOCRMVOL _IOW(UBI_IOC_MAGIC, 1, int32_t)
/* Re-size an UBI volume */
#define UBI_IOCRSVOL _IOW(UBI_IOC_MAGIC, 2, struct ubi_rsvol_req)
+/* Re-name volumes */
+#define UBI_IOCRNVOL _IOW(UBI_IOC_MAGIC, 3, struct ubi_rnvol_req)
/* IOCTL commands of the UBI control character device */
/* Maximum MTD device name length supported by UBI */
#define MAX_UBI_MTD_NAME_LEN 127
+/* Maximum amount of UBI volumes that can be re-named at one go */
+#define UBI_MAX_RNVOL 32
+
/*
* UBI data type hint constants.
*
* it will be 512 in case of a 2KiB page NAND flash with 4 512-byte sub-pages.
*
* But in rare cases, if this optimizes things, the VID header may be placed to
- * a different offset. For example, the boot-loader might do things faster if the
- * VID header sits at the end of the first 2KiB NAND page with 4 sub-pages. As
- * the boot-loader would not normally need to read EC headers (unless it needs
- * UBI in RW mode), it might be faster to calculate ECC. This is weird example,
- * but it real-life example. So, in this example, @vid_hdr_offer would be
- * 2KiB-64 bytes = 1984. Note, that this position is not even 512-bytes
- * aligned, which is OK, as UBI is clever enough to realize this is 4th sub-page
- * of the first page and add needed padding.
+ * a different offset. For example, the boot-loader might do things faster if
+ * the VID header sits at the end of the first 2KiB NAND page with 4 sub-pages.
+ * As the boot-loader would not normally need to read EC headers (unless it
+ * needs UBI in RW mode), it might be faster to calculate ECC. This is weird
+ * example, but it real-life example. So, in this example, @vid_hdr_offer would
+ * be 2KiB-64 bytes = 1984. Note, that this position is not even 512-bytes
+ * aligned, which is OK, as UBI is clever enough to realize this is 4th
+ * sub-page of the first page and add needed padding.
*/
struct ubi_attach_req {
int32_t ubi_num;
int32_t mtd_num;
int32_t vid_hdr_offset;
- uint8_t padding[12];
+ int8_t padding[12];
};
/**
int32_t vol_id;
} __attribute__ ((packed));
+/**
+ * struct ubi_rnvol_req - volumes re-name request.
+ * @count: count of volumes to re-name
+ * @padding1: reserved for future, not used, has to be zeroed
+ * @vol_id: ID of the volume to re-name
+ * @name_len: name length
+ * @padding2: reserved for future, not used, has to be zeroed
+ * @name: new volume name
+ *
+ * UBI allows to re-name up to %32 volumes at one go. The count of volumes to
+ * re-name is specified in the @count field. The ID of the volumes to re-name
+ * and the new names are specified in the @vol_id and @name fields.
+ *
+ * The UBI volume re-name operation is atomic, which means that should power cut
+ * happen, the volumes will have either old name or new name. So the possible
+ * use-cases of this command is atomic upgrade. Indeed, to upgrade, say, volumes
+ * A and B one may create temporary volumes %A1 and %B1 with the new contents,
+ * then atomically re-name A1->A and B1->B, in which case old %A and %B will
+ * be removed.
+ *
+ * If it is not desirable to remove old A and B, the re-name request has to
+ * contain 4 entries: A1->A, A->A1, B1->B, B->B1, in which case old A1 and B1
+ * become A and B, and old A and B will become A1 and B1.
+ *
+ * It is also OK to request: A1->A, A1->X, B1->B, B->Y, in which case old A1
+ * and B1 become A and B, and old A and B become X and Y.
+ *
+ * In other words, in case of re-naming into an existing volume name, the
+ * existing volume is removed, unless it is re-named as well at the same
+ * re-name request.
+ */
+struct ubi_rnvol_req {
+ int32_t count;
+ int8_t padding1[12];
+ struct {
+ int32_t vol_id;
+ int16_t name_len;
+ int8_t padding2[2];
+ char name[UBI_MAX_VOLUME_NAME + 1];
+ } ents[UBI_MAX_RNVOL];
+} __attribute__ ((packed));
+
/**
* struct ubi_leb_change_req - a data structure used in atomic logical
* eraseblock change requests.
struct ubi_leb_change_req {
int32_t lnum;
int32_t bytes;
- uint8_t dtype;
- uint8_t padding[7];
+ int8_t dtype;
+ int8_t padding[7];
} __attribute__ ((packed));
#endif /* __UBI_USER_H__ */
struct ieee80211_radiotap_header *hdr =
(struct ieee80211_radiotap_header *)data;
- return le16_to_cpu(get_unaligned(&hdr->it_len));
+ return get_unaligned_le16(&hdr->it_len);
}
#endif /* IEEE80211_RADIOTAP_H */
#include <linux/device.h>
#include <linux/init.h>
#include <linux/fs.h>
+#include <linux/initrd.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_fs_sb.h>
#include "do_mounts.h"
-#define BUILD_CRAMDISK
-
int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */
static int __init prompt_ramdisk(char *str)
goto done;
if (nblocks == 0) {
-#ifdef BUILD_CRAMDISK
if (crd_load(in_fd, out_fd) == 0)
goto successful_load;
-#else
- printk(KERN_NOTICE
- "RAMDISK: Kernel does not support compressed "
- "RAM disk images\n");
-#endif
goto done;
}
return rd_load_image("/dev/root");
}
-#ifdef BUILD_CRAMDISK
-
/*
* gzip declarations
*/
static int __init fill_inbuf(void);
static void __init flush_window(void);
-static void __init *malloc(size_t size);
-static void __init free(void *where);
static void __init error(char *m);
-static void __init gzip_mark(void **);
-static void __init gzip_release(void **);
-
-#include "../lib/inflate.c"
-static void __init *malloc(size_t size)
-{
- return kmalloc(size, GFP_KERNEL);
-}
-
-static void __init free(void *where)
-{
- kfree(where);
-}
-
-static void __init gzip_mark(void **ptr)
-{
-}
-
-static void __init gzip_release(void **ptr)
-{
-}
+#define NO_INFLATE_MALLOC
+#include "../lib/inflate.c"
/* ===========================================================================
* Fill the input buffer. This is called only when the buffer is empty
kfree(window);
return result;
}
-
-#endif /* BUILD_CRAMDISK */
message = x;
}
-static void __init *malloc(size_t size)
-{
- return kmalloc(size, GFP_KERNEL);
-}
-
-static void __init free(void *where)
-{
- kfree(where);
-}
-
/* link hash */
#define N_ALIGN(len) ((((len) + 1) & ~3) + 2)
static void __init flush_window(void);
static void __init error(char *m);
-static void __init gzip_mark(void **);
-static void __init gzip_release(void **);
-#include "../lib/inflate.c"
+#define NO_INFLATE_MALLOC
-static void __init gzip_mark(void **ptr)
-{
-}
-
-static void __init gzip_release(void **ptr)
-{
-}
+#include "../lib/inflate.c"
/* ===========================================================================
* Write the output window window[0..outcnt-1] and update crc and bytes_out.
extern void fork_init(unsigned long);
extern void mca_init(void);
extern void sbus_init(void);
-extern void pidhash_init(void);
-extern void pidmap_init(void);
extern void prio_tree_init(void);
extern void radix_tree_init(void);
extern void free_initmem(void);
#include <linux/utsrelease.h>
#include <linux/version.h>
+#ifndef CONFIG_KALLSYMS
#define version(a) Version_ ## a
#define version_string(a) version(a)
+extern int version_string(LINUX_VERSION_CODE);
int version_string(LINUX_VERSION_CODE);
+#endif
struct uts_namespace init_uts_ns = {
.kref = {
}
/*
- * Routine that is called when a tunable has successfully been changed by
- * hand and it has a callback routine registered on the ipc namespace notifier
- * chain: we don't want such tunables to be recomputed anymore upon memory
- * add/remove or ipc namespace creation/removal.
- * They can come back to a recomputable state by being set to a <0 value.
+ * Routine that is called when the file "auto_msgmni" has successfully been
+ * written.
+ * Two values are allowed:
+ * 0: unregister msgmni's callback routine from the ipc namespace notifier
+ * chain. This means that msgmni won't be recomputed anymore upon memory
+ * add/remove or ipc namespace creation/removal.
+ * 1: register back the callback routine.
*/
-static void tunable_set_callback(int val)
+static void ipc_auto_callback(int val)
{
- if (val >= 0)
+ if (!val)
unregister_ipcns_notifier(current->nsproxy->ipc_ns);
else {
/*
rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos);
if (write && !rc && lenp_bef == *lenp)
- tunable_set_callback(*((int *)(ipc_table.data)));
+ /*
+ * Tunable has successfully been changed by hand. Disable its
+ * automatic adjustment. This simply requires unregistering
+ * the notifiers that trigger recalculation.
+ */
+ unregister_ipcns_notifier(current->nsproxy->ipc_ns);
return rc;
}
lenp, ppos);
}
+static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
+ struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct ctl_table ipc_table;
+ size_t lenp_bef = *lenp;
+ int oldval;
+ int rc;
+
+ memcpy(&ipc_table, table, sizeof(ipc_table));
+ ipc_table.data = get_ipc(table);
+ oldval = *((int *)(ipc_table.data));
+
+ rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos);
+
+ if (write && !rc && lenp_bef == *lenp) {
+ int newval = *((int *)(ipc_table.data));
+ /*
+ * The file "auto_msgmni" has correctly been set.
+ * React by (un)registering the corresponding tunable, if the
+ * value has changed.
+ */
+ if (newval != oldval)
+ ipc_auto_callback(newval);
+ }
+
+ return rc;
+}
+
#else
#define proc_ipc_doulongvec_minmax NULL
#define proc_ipc_dointvec NULL
#define proc_ipc_callback_dointvec NULL
+#define proc_ipcauto_dointvec_minmax NULL
#endif
#ifdef CONFIG_SYSCTL_SYSCALL
rc = sysctl_ipc_data(table, name, nlen, oldval, oldlenp, newval,
newlen);
- if (newval && newlen && rc > 0) {
+ if (newval && newlen && rc > 0)
/*
* Tunable has successfully been changed from userland
*/
- int *data = get_ipc(table);
-
- tunable_set_callback(*data);
- }
+ unregister_ipcns_notifier(current->nsproxy->ipc_ns);
return rc;
}
#define sysctl_ipc_registered_data NULL
#endif
+static int zero;
+static int one = 1;
+
static struct ctl_table ipc_kern_table[] = {
{
.ctl_name = KERN_SHMMAX,
.proc_handler = proc_ipc_dointvec,
.strategy = sysctl_ipc_data,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "auto_msgmni",
+ .data = &init_ipc_ns.auto_msgmni,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_ipcauto_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
{}
};
int register_ipcns_notifier(struct ipc_namespace *ns)
{
+ int rc;
+
memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb));
ns->ipcns_nb.notifier_call = ipcns_callback;
ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI;
- return blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb);
+ rc = blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb);
+ if (!rc)
+ ns->auto_msgmni = 1;
+ return rc;
}
int cond_register_ipcns_notifier(struct ipc_namespace *ns)
{
+ int rc;
+
memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb));
ns->ipcns_nb.notifier_call = ipcns_callback;
ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI;
- return blocking_notifier_chain_cond_register(&ipcns_chain,
+ rc = blocking_notifier_chain_cond_register(&ipcns_chain,
&ns->ipcns_nb);
+ if (!rc)
+ ns->auto_msgmni = 1;
+ return rc;
}
-int unregister_ipcns_notifier(struct ipc_namespace *ns)
+void unregister_ipcns_notifier(struct ipc_namespace *ns)
{
- return blocking_notifier_chain_unregister(&ipcns_chain,
- &ns->ipcns_nb);
+ blocking_notifier_chain_unregister(&ipcns_chain, &ns->ipcns_nb);
+ ns->auto_msgmni = 0;
}
int ipcns_notify(unsigned long val)
* through std routines)
*/
static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
- size_t count, loff_t * off)
+ size_t count, loff_t *off)
{
struct mqueue_inode_info *info = MQUEUE_I(filp->f_path.dentry->d_inode);
char buffer[FILENT_SIZE];
- size_t slen;
- loff_t o;
-
- if (!count)
- return 0;
+ ssize_t ret;
spin_lock(&info->lock);
snprintf(buffer, sizeof(buffer),
pid_vnr(info->notify_owner));
spin_unlock(&info->lock);
buffer[sizeof(buffer)-1] = '\0';
- slen = strlen(buffer)+1;
-
- o = *off;
- if (o > slen)
- return 0;
-
- if (o + count > slen)
- count = slen - o;
- if (copy_to_user(u_data, buffer + o, count))
- return -EFAULT;
+ ret = simple_read_from_buffer(u_data, count, off, buffer,
+ strlen(buffer));
+ if (ret <= 0)
+ return ret;
- *off = o + count;
filp->f_path.dentry->d_inode->i_atime = filp->f_path.dentry->d_inode->i_ctime = CURRENT_TIME;
- return count;
+ return ret;
}
static int mqueue_flush_file(struct file *filp, fl_owner_t id)
ns->used_sems += nsems;
sma->sem_base = (struct sem *) &sma[1];
- /* sma->sem_pending = NULL; */
- sma->sem_pending_last = &sma->sem_pending;
- /* sma->undo = NULL; */
+ INIT_LIST_HEAD(&sma->sem_pending);
+ INIT_LIST_HEAD(&sma->list_id);
sma->sem_nsems = nsems;
sma->sem_ctime = get_seconds();
sem_unlock(sma);
return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
}
-/* Manage the doubly linked list sma->sem_pending as a FIFO:
- * insert new queue elements at the tail sma->sem_pending_last.
- */
-static inline void append_to_queue (struct sem_array * sma,
- struct sem_queue * q)
-{
- *(q->prev = sma->sem_pending_last) = q;
- *(sma->sem_pending_last = &q->next) = NULL;
-}
-
-static inline void prepend_to_queue (struct sem_array * sma,
- struct sem_queue * q)
-{
- q->next = sma->sem_pending;
- *(q->prev = &sma->sem_pending) = q;
- if (q->next)
- q->next->prev = &q->next;
- else /* sma->sem_pending_last == &sma->sem_pending */
- sma->sem_pending_last = &q->next;
-}
-
-static inline void remove_from_queue (struct sem_array * sma,
- struct sem_queue * q)
-{
- *(q->prev) = q->next;
- if (q->next)
- q->next->prev = q->prev;
- else /* sma->sem_pending_last == &q->next */
- sma->sem_pending_last = q->prev;
- q->prev = NULL; /* mark as removed */
-}
-
/*
* Determine whether a sequence of semaphore operations would succeed
* all at once. Return 0 if yes, 1 if need to sleep, else return error code.
int error;
struct sem_queue * q;
- q = sma->sem_pending;
- while(q) {
+ q = list_entry(sma->sem_pending.next, struct sem_queue, list);
+ while (&q->list != &sma->sem_pending) {
error = try_atomic_semop(sma, q->sops, q->nsops,
q->undo, q->pid);
/* Does q->sleeper still need to sleep? */
if (error <= 0) {
struct sem_queue *n;
- remove_from_queue(sma,q);
- q->status = IN_WAKEUP;
+
/*
* Continue scanning. The next operation
* that must be checked depends on the type of the
* for semaphore values to become 0.
* - if the operation didn't modify the array,
* then just continue.
+ * The order of list_del() and reading ->next
+ * is crucial: In the former case, the list_del()
+ * must be done first [because we might be the
+ * first entry in ->sem_pending], in the latter
+ * case the list_del() must be done last
+ * [because the list is invalid after the list_del()]
*/
- if (q->alter)
- n = sma->sem_pending;
- else
- n = q->next;
+ if (q->alter) {
+ list_del(&q->list);
+ n = list_entry(sma->sem_pending.next,
+ struct sem_queue, list);
+ } else {
+ n = list_entry(q->list.next, struct sem_queue,
+ list);
+ list_del(&q->list);
+ }
+
+ /* wake up the waiting thread */
+ q->status = IN_WAKEUP;
+
wake_up_process(q->sleeper);
/* hands-off: q will disappear immediately after
* writing q->status.
q->status = error;
q = n;
} else {
- q = q->next;
+ q = list_entry(q->list.next, struct sem_queue, list);
}
}
}
struct sem_queue * q;
semncnt = 0;
- for (q = sma->sem_pending; q; q = q->next) {
+ list_for_each_entry(q, &sma->sem_pending, list) {
struct sembuf * sops = q->sops;
int nsops = q->nsops;
int i;
}
return semncnt;
}
+
static int count_semzcnt (struct sem_array * sma, ushort semnum)
{
int semzcnt;
struct sem_queue * q;
semzcnt = 0;
- for (q = sma->sem_pending; q; q = q->next) {
+ list_for_each_entry(q, &sma->sem_pending, list) {
struct sembuf * sops = q->sops;
int nsops = q->nsops;
int i;
return semzcnt;
}
+void free_un(struct rcu_head *head)
+{
+ struct sem_undo *un = container_of(head, struct sem_undo, rcu);
+ kfree(un);
+}
+
/* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked
* as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex
* remains locked on exit.
*/
static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
{
- struct sem_undo *un;
- struct sem_queue *q;
+ struct sem_undo *un, *tu;
+ struct sem_queue *q, *tq;
struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
- /* Invalidate the existing undo structures for this semaphore set.
- * (They will be freed without any further action in exit_sem()
- * or during the next semop.)
- */
- for (un = sma->undo; un; un = un->id_next)
+ /* Free the existing undo structures for this semaphore set. */
+ assert_spin_locked(&sma->sem_perm.lock);
+ list_for_each_entry_safe(un, tu, &sma->list_id, list_id) {
+ list_del(&un->list_id);
+ spin_lock(&un->ulp->lock);
un->semid = -1;
+ list_del_rcu(&un->list_proc);
+ spin_unlock(&un->ulp->lock);
+ call_rcu(&un->rcu, free_un);
+ }
/* Wake up all pending processes and let them fail with EIDRM. */
- q = sma->sem_pending;
- while(q) {
- struct sem_queue *n;
- /* lazy remove_from_queue: we are killing the whole queue */
- q->prev = NULL;
- n = q->next;
+ list_for_each_entry_safe(q, tq, &sma->sem_pending, list) {
+ list_del(&q->list);
+
q->status = IN_WAKEUP;
wake_up_process(q->sleeper); /* doesn't sleep */
smp_wmb();
q->status = -EIDRM; /* hands-off q */
- q = n;
}
/* Remove the semaphore set from the IDR */
for (i = 0; i < nsems; i++)
sma->sem_base[i].semval = sem_io[i];
- for (un = sma->undo; un; un = un->id_next)
+
+ assert_spin_locked(&sma->sem_perm.lock);
+ list_for_each_entry(un, &sma->list_id, list_id) {
for (i = 0; i < nsems; i++)
un->semadj[i] = 0;
+ }
sma->sem_ctime = get_seconds();
/* maybe some queued-up processes were waiting for this */
update_queue(sma);
{
int val = arg.val;
struct sem_undo *un;
+
err = -ERANGE;
if (val > SEMVMX || val < 0)
goto out_unlock;
- for (un = sma->undo; un; un = un->id_next)
+ assert_spin_locked(&sma->sem_perm.lock);
+ list_for_each_entry(un, &sma->list_id, list_id)
un->semadj[semnum] = 0;
+
curr->semval = val;
curr->sempid = task_tgid_vnr(current);
sma->sem_ctime = get_seconds();
return -ENOMEM;
spin_lock_init(&undo_list->lock);
atomic_set(&undo_list->refcnt, 1);
+ INIT_LIST_HEAD(&undo_list->list_proc);
+
current->sysvsem.undo_list = undo_list;
}
*undo_listp = undo_list;
static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
{
- struct sem_undo **last, *un;
+ struct sem_undo *walk;
- last = &ulp->proc_list;
- un = *last;
- while(un != NULL) {
- if(un->semid==semid)
- break;
- if(un->semid==-1) {
- *last=un->proc_next;
- kfree(un);
- } else {
- last=&un->proc_next;
- }
- un=*last;
+ list_for_each_entry_rcu(walk, &ulp->list_proc, list_proc) {
+ if (walk->semid == semid)
+ return walk;
}
- return un;
+ return NULL;
}
-static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
+/**
+ * find_alloc_undo - Lookup (and if not present create) undo array
+ * @ns: namespace
+ * @semid: semaphore array id
+ *
+ * The function looks up (and if not present creates) the undo structure.
+ * The size of the undo structure depends on the size of the semaphore
+ * array, thus the alloc path is not that straightforward.
+ * Lifetime-rules: sem_undo is rcu-protected, on success, the function
+ * performs a rcu_read_lock().
+ */
+static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
{
struct sem_array *sma;
struct sem_undo_list *ulp;
if (error)
return ERR_PTR(error);
+ rcu_read_lock();
spin_lock(&ulp->lock);
un = lookup_undo(ulp, semid);
spin_unlock(&ulp->lock);
if (likely(un!=NULL))
goto out;
+ rcu_read_unlock();
/* no undo structure around - allocate one. */
+ /* step 1: figure out the size of the semaphore array */
sma = sem_lock_check(ns, semid);
if (IS_ERR(sma))
return ERR_PTR(PTR_ERR(sma));
nsems = sma->sem_nsems;
sem_getref_and_unlock(sma);
+ /* step 2: allocate new undo structure */
new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
if (!new) {
sem_putref(sma);
return ERR_PTR(-ENOMEM);
}
- new->semadj = (short *) &new[1];
- new->semid = semid;
- spin_lock(&ulp->lock);
- un = lookup_undo(ulp, semid);
- if (un) {
- spin_unlock(&ulp->lock);
- kfree(new);
- sem_putref(sma);
- goto out;
- }
+ /* step 3: Acquire the lock on semaphore array */
sem_lock_and_putref(sma);
if (sma->sem_perm.deleted) {
sem_unlock(sma);
- spin_unlock(&ulp->lock);
kfree(new);
un = ERR_PTR(-EIDRM);
goto out;
}
- new->proc_next = ulp->proc_list;
- ulp->proc_list = new;
- new->id_next = sma->undo;
- sma->undo = new;
- sem_unlock(sma);
+ spin_lock(&ulp->lock);
+
+ /*
+ * step 4: check for races: did someone else allocate the undo struct?
+ */
+ un = lookup_undo(ulp, semid);
+ if (un) {
+ kfree(new);
+ goto success;
+ }
+ /* step 5: initialize & link new undo structure */
+ new->semadj = (short *) &new[1];
+ new->ulp = ulp;
+ new->semid = semid;
+ assert_spin_locked(&ulp->lock);
+ list_add_rcu(&new->list_proc, &ulp->list_proc);
+ assert_spin_locked(&sma->sem_perm.lock);
+ list_add(&new->list_id, &sma->list_id);
un = new;
+
+success:
spin_unlock(&ulp->lock);
+ rcu_read_lock();
+ sem_unlock(sma);
out:
return un;
}
alter = 1;
}
-retry_undos:
if (undos) {
- un = find_undo(ns, semid);
+ un = find_alloc_undo(ns, semid);
if (IS_ERR(un)) {
error = PTR_ERR(un);
goto out_free;
sma = sem_lock_check(ns, semid);
if (IS_ERR(sma)) {
+ if (un)
+ rcu_read_unlock();
error = PTR_ERR(sma);
goto out_free;
}
/*
- * semid identifiers are not unique - find_undo may have
+ * semid identifiers are not unique - find_alloc_undo may have
* allocated an undo structure, it was invalidated by an RMID
- * and now a new array with received the same id. Check and retry.
+ * and now a new array with received the same id. Check and fail.
+ * This case can be detected checking un->semid. The existance of
+ * "un" itself is guaranteed by rcu.
*/
- if (un && un->semid == -1) {
- sem_unlock(sma);
- goto retry_undos;
+ error = -EIDRM;
+ if (un) {
+ if (un->semid == -1) {
+ rcu_read_unlock();
+ goto out_unlock_free;
+ } else {
+ /*
+ * rcu lock can be released, "un" cannot disappear:
+ * - sem_lock is acquired, thus IPC_RMID is
+ * impossible.
+ * - exit_sem is impossible, it always operates on
+ * current (or a dead task).
+ */
+
+ rcu_read_unlock();
+ }
}
+
error = -EFBIG;
if (max >= sma->sem_nsems)
goto out_unlock_free;
* task into the pending queue and go to sleep.
*/
- queue.sma = sma;
queue.sops = sops;
queue.nsops = nsops;
queue.undo = un;
queue.pid = task_tgid_vnr(current);
- queue.id = semid;
queue.alter = alter;
if (alter)
- append_to_queue(sma ,&queue);
+ list_add_tail(&queue.list, &sma->sem_pending);
else
- prepend_to_queue(sma ,&queue);
+ list_add(&queue.list, &sma->sem_pending);
queue.status = -EINTR;
queue.sleeper = current;
sma = sem_lock(ns, semid);
if (IS_ERR(sma)) {
- BUG_ON(queue.prev != NULL);
error = -EIDRM;
goto out_free;
}
*/
if (timeout && jiffies_left == 0)
error = -EAGAIN;
- remove_from_queue(sma,&queue);
+ list_del(&queue.list);
goto out_unlock_free;
out_unlock_free:
*/
void exit_sem(struct task_struct *tsk)
{
- struct sem_undo_list *undo_list;
- struct sem_undo *u, **up;
- struct ipc_namespace *ns;
+ struct sem_undo_list *ulp;
- undo_list = tsk->sysvsem.undo_list;
- if (!undo_list)
+ ulp = tsk->sysvsem.undo_list;
+ if (!ulp)
return;
tsk->sysvsem.undo_list = NULL;
- if (!atomic_dec_and_test(&undo_list->refcnt))
+ if (!atomic_dec_and_test(&ulp->refcnt))
return;
- ns = tsk->nsproxy->ipc_ns;
- /* There's no need to hold the semundo list lock, as current
- * is the last task exiting for this undo list.
- */
- for (up = &undo_list->proc_list; (u = *up); *up = u->proc_next, kfree(u)) {
+ for (;;) {
struct sem_array *sma;
- int nsems, i;
- struct sem_undo *un, **unp;
+ struct sem_undo *un;
int semid;
-
- semid = u->semid;
+ int i;
- if(semid == -1)
- continue;
- sma = sem_lock(ns, semid);
+ rcu_read_lock();
+ un = list_entry(rcu_dereference(ulp->list_proc.next),
+ struct sem_undo, list_proc);
+ if (&un->list_proc == &ulp->list_proc)
+ semid = -1;
+ else
+ semid = un->semid;
+ rcu_read_unlock();
+
+ if (semid == -1)
+ break;
+
+ sma = sem_lock_check(tsk->nsproxy->ipc_ns, un->semid);
+
+ /* exit_sem raced with IPC_RMID, nothing to do */
if (IS_ERR(sma))
continue;
- if (u->semid == -1)
- goto next_entry;
+ un = lookup_undo(ulp, semid);
+ if (un == NULL) {
+ /* exit_sem raced with IPC_RMID+semget() that created
+ * exactly the same semid. Nothing to do.
+ */
+ sem_unlock(sma);
+ continue;
+ }
- BUG_ON(sem_checkid(sma, u->semid));
+ /* remove un from the linked lists */
+ assert_spin_locked(&sma->sem_perm.lock);
+ list_del(&un->list_id);
- /* remove u from the sma->undo list */
- for (unp = &sma->undo; (un = *unp); unp = &un->id_next) {
- if (u == un)
- goto found;
- }
- printk ("exit_sem undo list error id=%d\n", u->semid);
- goto next_entry;
-found:
- *unp = un->id_next;
- /* perform adjustments registered in u */
- nsems = sma->sem_nsems;
- for (i = 0; i < nsems; i++) {
+ spin_lock(&ulp->lock);
+ list_del_rcu(&un->list_proc);
+ spin_unlock(&ulp->lock);
+
+ /* perform adjustments registered in un */
+ for (i = 0; i < sma->sem_nsems; i++) {
struct sem * semaphore = &sma->sem_base[i];
- if (u->semadj[i]) {
- semaphore->semval += u->semadj[i];
+ if (un->semadj[i]) {
+ semaphore->semval += un->semadj[i];
/*
* Range checks of the new semaphore value,
* not defined by sus:
sma->sem_otime = get_seconds();
/* maybe some queued-up processes were waiting for this */
update_queue(sma);
-next_entry:
sem_unlock(sma);
+
+ call_rcu(&un->rcu, free_un);
}
- kfree(undo_list);
+ kfree(ulp);
}
#ifdef CONFIG_PROC_FS
IPC_SHM_IDS, sysvipc_shm_proc_show);
}
-/*
- * shm_lock_(check_)down routines are called in the paths where the rw_mutex
- * is held to protect access to the idr tree.
- */
-static inline struct shmid_kernel *shm_lock_down(struct ipc_namespace *ns,
- int id)
-{
- struct kern_ipc_perm *ipcp = ipc_lock_down(&shm_ids(ns), id);
-
- if (IS_ERR(ipcp))
- return (struct shmid_kernel *)ipcp;
-
- return container_of(ipcp, struct shmid_kernel, shm_perm);
-}
-
/*
* shm_lock_(check_) routines are called in the paths where the rw_mutex
- * is not held.
+ * is not necessarily held.
*/
static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
{
down_write(&shm_ids(ns).rw_mutex);
/* remove from the list of attaches of the shm segment */
- shp = shm_lock_down(ns, sfd->id);
+ shp = shm_lock(ns, sfd->id);
BUG_ON(IS_ERR(shp));
shp->shm_lprid = task_tgid_vnr(current);
shp->shm_dtim = get_seconds();
out_nattch:
down_write(&shm_ids(ns).rw_mutex);
- shp = shm_lock_down(ns, shmid);
+ shp = shm_lock(ns, shmid);
BUG_ON(IS_ERR(shp));
shp->shm_nattch--;
if(shp->shm_nattch == 0 &&
* Look for an id in the ipc ids idr and lock the associated ipc object.
*
* The ipc object is locked on exit.
- *
- * This is the routine that should be called when the rw_mutex is not already
- * held, i.e. idr tree not protected: it protects the idr tree in read mode
- * during the idr_find().
*/
struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
struct kern_ipc_perm *out;
int lid = ipcid_to_idx(id);
- down_read(&ids->rw_mutex);
-
rcu_read_lock();
out = idr_find(&ids->ipcs_idr, lid);
if (out == NULL) {
rcu_read_unlock();
- up_read(&ids->rw_mutex);
return ERR_PTR(-EINVAL);
}
- up_read(&ids->rw_mutex);
-
spin_lock(&out->lock);
/* ipc_rmid() may have already freed the ID while ipc_lock
return out;
}
-/**
- * ipc_lock_down - Lock an ipc structure with rw_sem held
- * @ids: IPC identifier set
- * @id: ipc id to look for
- *
- * Look for an id in the ipc ids idr and lock the associated ipc object.
- *
- * The ipc object is locked on exit.
- *
- * This is the routine that should be called when the rw_mutex is already
- * held, i.e. idr tree protected.
- */
-
-struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *ids, int id)
-{
- struct kern_ipc_perm *out;
- int lid = ipcid_to_idx(id);
-
- rcu_read_lock();
- out = idr_find(&ids->ipcs_idr, lid);
- if (out == NULL) {
- rcu_read_unlock();
- return ERR_PTR(-EINVAL);
- }
-
- spin_lock(&out->lock);
-
- /*
- * No need to verify that the structure is still valid since the
- * rw_mutex is held.
- */
- return out;
-}
-
-struct kern_ipc_perm *ipc_lock_check_down(struct ipc_ids *ids, int id)
-{
- struct kern_ipc_perm *out;
-
- out = ipc_lock_down(ids, id);
- if (IS_ERR(out))
- return out;
-
- if (ipc_checkid(out, id)) {
- ipc_unlock(out);
- return ERR_PTR(-EIDRM);
- }
-
- return out;
-}
-
struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id)
{
struct kern_ipc_perm *out;
int err;
down_write(&ids->rw_mutex);
- ipcp = ipc_lock_check_down(ids, id);
+ ipcp = ipc_lock_check(ids, id);
if (IS_ERR(ipcp)) {
err = PTR_ERR(ipcp);
goto out_up;
void ipc_rcu_getref(void *ptr);
void ipc_rcu_putref(void *ptr);
-/*
- * ipc_lock_down: called with rw_mutex held
- * ipc_lock: called without that lock held
- */
-struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *, int);
struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
rcu_read_unlock();
}
-struct kern_ipc_perm *ipc_lock_check_down(struct ipc_ids *ids, int id);
struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id);
int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
struct ipc_ops *ops, struct ipc_params *params);
# Makefile for the linux kernel.
#
-obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
+obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
cpu.o exit.o itimer.o time.o softirq.o resource.o \
sysctl.o capability.o ptrace.o timer.o user.o \
signal.o sys.o kmod.o workqueue.o pid.o \
CFLAGS_REMOVE_sched.o = -mno-spe -pg
endif
+obj-$(CONFIG_PROFILING) += profile.o
obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += time/
/*
* External references and all of the globals.
*/
-static void do_acct_process(struct pid_namespace *ns, struct file *);
+static void do_acct_process(struct bsd_acct_struct *acct,
+ struct pid_namespace *ns, struct file *);
/*
* This structure is used so that all the data protected by lock
* can be placed in the same cache line as the lock. This primes
* the cache line to have the data after getting the lock.
*/
-struct acct_glbs {
- spinlock_t lock;
+struct bsd_acct_struct {
volatile int active;
volatile int needcheck;
struct file *file;
struct pid_namespace *ns;
struct timer_list timer;
+ struct list_head list;
};
-static struct acct_glbs acct_globals __cacheline_aligned =
- {__SPIN_LOCK_UNLOCKED(acct_globals.lock)};
+static DEFINE_SPINLOCK(acct_lock);
+static LIST_HEAD(acct_list);
/*
* Called whenever the timer says to check the free space.
*/
-static void acct_timeout(unsigned long unused)
+static void acct_timeout(unsigned long x)
{
- acct_globals.needcheck = 1;
+ struct bsd_acct_struct *acct = (struct bsd_acct_struct *)x;
+ acct->needcheck = 1;
}
/*
* Check the amount of free space and suspend/resume accordingly.
*/
-static int check_free_space(struct file *file)
+static int check_free_space(struct bsd_acct_struct *acct, struct file *file)
{
struct kstatfs sbuf;
int res;
sector_t resume;
sector_t suspend;
- spin_lock(&acct_globals.lock);
- res = acct_globals.active;
- if (!file || !acct_globals.needcheck)
+ spin_lock(&acct_lock);
+ res = acct->active;
+ if (!file || !acct->needcheck)
goto out;
- spin_unlock(&acct_globals.lock);
+ spin_unlock(&acct_lock);
/* May block */
if (vfs_statfs(file->f_path.dentry, &sbuf))
act = 0;
/*
- * If some joker switched acct_globals.file under us we'ld better be
+ * If some joker switched acct->file under us we'ld better be
* silent and _not_ touch anything.
*/
- spin_lock(&acct_globals.lock);
- if (file != acct_globals.file) {
+ spin_lock(&acct_lock);
+ if (file != acct->file) {
if (act)
res = act>0;
goto out;
}
- if (acct_globals.active) {
+ if (acct->active) {
if (act < 0) {
- acct_globals.active = 0;
+ acct->active = 0;
printk(KERN_INFO "Process accounting paused\n");
}
} else {
if (act > 0) {
- acct_globals.active = 1;
+ acct->active = 1;
printk(KERN_INFO "Process accounting resumed\n");
}
}
- del_timer(&acct_globals.timer);
- acct_globals.needcheck = 0;
- acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
- add_timer(&acct_globals.timer);
- res = acct_globals.active;
+ del_timer(&acct->timer);
+ acct->needcheck = 0;
+ acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ;
+ add_timer(&acct->timer);
+ res = acct->active;
out:
- spin_unlock(&acct_globals.lock);
+ spin_unlock(&acct_lock);
return res;
}
* Close the old accounting file (if currently open) and then replace
* it with file (if non-NULL).
*
- * NOTE: acct_globals.lock MUST be held on entry and exit.
+ * NOTE: acct_lock MUST be held on entry and exit.
*/
-static void acct_file_reopen(struct file *file)
+static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
+ struct pid_namespace *ns)
{
struct file *old_acct = NULL;
struct pid_namespace *old_ns = NULL;
- if (acct_globals.file) {
- old_acct = acct_globals.file;
- old_ns = acct_globals.ns;
- del_timer(&acct_globals.timer);
- acct_globals.active = 0;
- acct_globals.needcheck = 0;
- acct_globals.file = NULL;
+ if (acct->file) {
+ old_acct = acct->file;
+ old_ns = acct->ns;
+ del_timer(&acct->timer);
+ acct->active = 0;
+ acct->needcheck = 0;
+ acct->file = NULL;
+ acct->ns = NULL;
+ list_del(&acct->list);
}
if (file) {
- acct_globals.file = file;
- acct_globals.ns = get_pid_ns(task_active_pid_ns(current));
- acct_globals.needcheck = 0;
- acct_globals.active = 1;
+ acct->file = file;
+ acct->ns = ns;
+ acct->needcheck = 0;
+ acct->active = 1;
+ list_add(&acct->list, &acct_list);
/* It's been deleted if it was used before so this is safe */
- init_timer(&acct_globals.timer);
- acct_globals.timer.function = acct_timeout;
- acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
- add_timer(&acct_globals.timer);
+ setup_timer(&acct->timer, acct_timeout, (unsigned long)acct);
+ acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ;
+ add_timer(&acct->timer);
}
if (old_acct) {
mnt_unpin(old_acct->f_path.mnt);
- spin_unlock(&acct_globals.lock);
- do_acct_process(old_ns, old_acct);
+ spin_unlock(&acct_lock);
+ do_acct_process(acct, old_ns, old_acct);
filp_close(old_acct, NULL);
- put_pid_ns(old_ns);
- spin_lock(&acct_globals.lock);
+ spin_lock(&acct_lock);
}
}
{
struct file *file;
int error;
+ struct pid_namespace *ns;
+ struct bsd_acct_struct *acct = NULL;
/* Difference from BSD - they don't do O_APPEND */
file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
return -EIO;
}
+ ns = task_active_pid_ns(current);
+ if (ns->bacct == NULL) {
+ acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
+ if (acct == NULL) {
+ filp_close(file, NULL);
+ return -ENOMEM;
+ }
+ }
+
error = security_acct(file);
if (error) {
+ kfree(acct);
filp_close(file, NULL);
return error;
}
- spin_lock(&acct_globals.lock);
+ spin_lock(&acct_lock);
+ if (ns->bacct == NULL) {
+ ns->bacct = acct;
+ acct = NULL;
+ }
+
mnt_pin(file->f_path.mnt);
- acct_file_reopen(file);
- spin_unlock(&acct_globals.lock);
+ acct_file_reopen(ns->bacct, file, ns);
+ spin_unlock(&acct_lock);
mntput(file->f_path.mnt); /* it's pinned, now give up active reference */
+ kfree(acct);
return 0;
}
error = acct_on(tmp);
putname(tmp);
} else {
+ struct bsd_acct_struct *acct;
+
+ acct = task_active_pid_ns(current)->bacct;
+ if (acct == NULL)
+ return 0;
+
error = security_acct(NULL);
if (!error) {
- spin_lock(&acct_globals.lock);
- acct_file_reopen(NULL);
- spin_unlock(&acct_globals.lock);
+ spin_lock(&acct_lock);
+ acct_file_reopen(acct, NULL, NULL);
+ spin_unlock(&acct_lock);
}
}
return error;
*/
void acct_auto_close_mnt(struct vfsmount *m)
{
- spin_lock(&acct_globals.lock);
- if (acct_globals.file && acct_globals.file->f_path.mnt == m)
- acct_file_reopen(NULL);
- spin_unlock(&acct_globals.lock);
+ struct bsd_acct_struct *acct;
+
+ spin_lock(&acct_lock);
+restart:
+ list_for_each_entry(acct, &acct_list, list)
+ if (acct->file && acct->file->f_path.mnt == m) {
+ acct_file_reopen(acct, NULL, NULL);
+ goto restart;
+ }
+ spin_unlock(&acct_lock);
}
/**
*/
void acct_auto_close(struct super_block *sb)
{
- spin_lock(&acct_globals.lock);
- if (acct_globals.file &&
- acct_globals.file->f_path.mnt->mnt_sb == sb) {
- acct_file_reopen(NULL);
+ struct bsd_acct_struct *acct;
+
+ spin_lock(&acct_lock);
+restart:
+ list_for_each_entry(acct, &acct_list, list)
+ if (acct->file && acct->file->f_path.mnt->mnt_sb == sb) {
+ acct_file_reopen(acct, NULL, NULL);
+ goto restart;
+ }
+ spin_unlock(&acct_lock);
+}
+
+void acct_exit_ns(struct pid_namespace *ns)
+{
+ struct bsd_acct_struct *acct;
+
+ spin_lock(&acct_lock);
+ acct = ns->bacct;
+ if (acct != NULL) {
+ if (acct->file != NULL)
+ acct_file_reopen(acct, NULL, NULL);
+
+ kfree(acct);
}
- spin_unlock(&acct_globals.lock);
+ spin_unlock(&acct_lock);
}
/*
/*
* do_acct_process does all actual work. Caller holds the reference to file.
*/
-static void do_acct_process(struct pid_namespace *ns, struct file *file)
+static void do_acct_process(struct bsd_acct_struct *acct,
+ struct pid_namespace *ns, struct file *file)
{
struct pacct_struct *pacct = ¤t->signal->pacct;
acct_t ac;
* First check to see if there is enough free_space to continue
* the process accounting system.
*/
- if (!check_free_space(file))
+ if (!check_free_space(acct, file))
return;
/*
spin_unlock_irq(¤t->sighand->siglock);
}
-/**
- * acct_process - now just a wrapper around do_acct_process
- * @exitcode: task exit code
- *
- * handles process accounting for an exiting task
- */
-void acct_process(void)
+static void acct_process_in_ns(struct pid_namespace *ns)
{
struct file *file = NULL;
- struct pid_namespace *ns;
+ struct bsd_acct_struct *acct;
+ acct = ns->bacct;
/*
* accelerate the common fastpath:
*/
- if (!acct_globals.file)
+ if (!acct || !acct->file)
return;
- spin_lock(&acct_globals.lock);
- file = acct_globals.file;
+ spin_lock(&acct_lock);
+ file = acct->file;
if (unlikely(!file)) {
- spin_unlock(&acct_globals.lock);
+ spin_unlock(&acct_lock);
return;
}
get_file(file);
- ns = get_pid_ns(acct_globals.ns);
- spin_unlock(&acct_globals.lock);
+ spin_unlock(&acct_lock);
- do_acct_process(ns, file);
+ do_acct_process(acct, ns, file);
fput(file);
- put_pid_ns(ns);
+}
+
+/**
+ * acct_process - now just a wrapper around acct_process_in_ns,
+ * which in turn is a wrapper around do_acct_process.
+ *
+ * handles process accounting for an exiting task
+ */
+void acct_process(void)
+{
+ struct pid_namespace *ns;
+
+ /*
+ * This loop is safe lockless, since current is still
+ * alive and holds its namespace, which in turn holds
+ * its parent.
+ */
+ for (ns = task_active_pid_ns(current); ns != NULL; ns = ns->parent)
+ acct_process_in_ns(ns);
}
/* Hierarchy-specific flags */
unsigned long flags;
- /* The path to use for release notifications. No locking
- * between setting and use - so if userspace updates this
- * while child cgroups exist, you could miss a
- * notification. We ensure that it's always a valid
- * NUL-terminated string */
+ /* The path to use for release notifications. */
char release_agent_path[PATH_MAX];
};
* extra work in the fork/exit path if none of the subsystems need to
* be called.
*/
-static int need_forkexit_callback;
+static int need_forkexit_callback __read_mostly;
static int need_mm_owner_callback __read_mostly;
/* convenient tests for these bits */
* task until after the first call to cgroup_iter_start(). This
* reduces the fork()/exit() overhead for people who have cgroups
* compiled into their kernel but not actually in use */
-static int use_task_css_set_links;
+static int use_task_css_set_links __read_mostly;
/* When we create or destroy a css_set, the operation simply
* takes/releases a reference count on all the cgroups referenced
*/
static void unlink_css_set(struct css_set *cg)
{
+ struct cg_cgroup_link *link;
+ struct cg_cgroup_link *saved_link;
+
write_lock(&css_set_lock);
hlist_del(&cg->hlist);
css_set_count--;
- while (!list_empty(&cg->cg_links)) {
- struct cg_cgroup_link *link;
- link = list_entry(cg->cg_links.next,
- struct cg_cgroup_link, cg_link_list);
+
+ list_for_each_entry_safe(link, saved_link, &cg->cg_links,
+ cg_link_list) {
list_del(&link->cg_link_list);
list_del(&link->cgrp_link_list);
kfree(link);
}
+
write_unlock(&css_set_lock);
}
static int allocate_cg_links(int count, struct list_head *tmp)
{
struct cg_cgroup_link *link;
+ struct cg_cgroup_link *saved_link;
int i;
INIT_LIST_HEAD(tmp);
for (i = 0; i < count; i++) {
link = kmalloc(sizeof(*link), GFP_KERNEL);
if (!link) {
- while (!list_empty(tmp)) {
- link = list_entry(tmp->next,
- struct cg_cgroup_link,
- cgrp_link_list);
+ list_for_each_entry_safe(link, saved_link, tmp,
+ cgrp_link_list) {
list_del(&link->cgrp_link_list);
kfree(link);
}
static void free_cg_links(struct list_head *tmp)
{
- while (!list_empty(tmp)) {
- struct cg_cgroup_link *link;
- link = list_entry(tmp->next,
- struct cg_cgroup_link,
- cgrp_link_list);
+ struct cg_cgroup_link *link;
+ struct cg_cgroup_link *saved_link;
+
+ list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
list_del(&link->cgrp_link_list);
kfree(link);
}
/* First see if we already have a cgroup group that matches
* the desired set */
- write_lock(&css_set_lock);
+ read_lock(&css_set_lock);
res = find_existing_css_set(oldcg, cgrp, template);
if (res)
get_css_set(res);
- write_unlock(&css_set_lock);
+ read_unlock(&css_set_lock);
if (res)
return res;
* knows that the cgroup won't be removed, as cgroup_rmdir()
* needs that mutex.
*
- * The cgroup_common_file_write handler for operations that modify
- * the cgroup hierarchy holds cgroup_mutex across the entire operation,
- * single threading all such cgroup modifications across the system.
- *
* The fork and exit callbacks cgroup_fork() and cgroup_exit(), don't
* (usually) take cgroup_mutex. These are the two most performance
* critical pieces of code here. The exception occurs on cgroup_exit(),
struct cgroupfs_root *root = sb->s_fs_info;
struct cgroup *cgrp = &root->top_cgroup;
int ret;
+ struct cg_cgroup_link *link;
+ struct cg_cgroup_link *saved_link;
BUG_ON(!root);
* root cgroup
*/
write_lock(&css_set_lock);
- while (!list_empty(&cgrp->css_sets)) {
- struct cg_cgroup_link *link;
- link = list_entry(cgrp->css_sets.next,
- struct cg_cgroup_link, cgrp_link_list);
+
+ list_for_each_entry_safe(link, saved_link, &cgrp->css_sets,
+ cgrp_link_list) {
list_del(&link->cg_link_list);
list_del(&link->cgrp_link_list);
kfree(link);
}
/*
- * Attach task with pid 'pid' to cgroup 'cgrp'. Call with
- * cgroup_mutex, may take task_lock of task
+ * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex
+ * held. May take task_lock of task
*/
-static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf)
+static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
{
- pid_t pid;
struct task_struct *tsk;
int ret;
- if (sscanf(pidbuf, "%d", &pid) != 1)
- return -EIO;
-
if (pid) {
rcu_read_lock();
tsk = find_task_by_vpid(pid);
return ret;
}
+static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
+{
+ int ret;
+ if (!cgroup_lock_live_group(cgrp))
+ return -ENODEV;
+ ret = attach_task_by_pid(cgrp, pid);
+ cgroup_unlock();
+ return ret;
+}
+
/* The various types of files and directories in a cgroup file system */
enum cgroup_filetype {
FILE_ROOT,
FILE_RELEASE_AGENT,
};
+/**
+ * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
+ * @cgrp: the cgroup to be checked for liveness
+ *
+ * On success, returns true; the lock should be later released with
+ * cgroup_unlock(). On failure returns false with no lock held.
+ */
+bool cgroup_lock_live_group(struct cgroup *cgrp)
+{
+ mutex_lock(&cgroup_mutex);
+ if (cgroup_is_removed(cgrp)) {
+ mutex_unlock(&cgroup_mutex);
+ return false;
+ }
+ return true;
+}
+
+static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
+ const char *buffer)
+{
+ BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
+ if (!cgroup_lock_live_group(cgrp))
+ return -ENODEV;
+ strcpy(cgrp->root->release_agent_path, buffer);
+ cgroup_unlock();
+ return 0;
+}
+
+static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
+ struct seq_file *seq)
+{
+ if (!cgroup_lock_live_group(cgrp))
+ return -ENODEV;
+ seq_puts(seq, cgrp->root->release_agent_path);
+ seq_putc(seq, '\n');
+ cgroup_unlock();
+ return 0;
+}
+
+/* A buffer size big enough for numbers or short strings */
+#define CGROUP_LOCAL_BUFFER_SIZE 64
+
static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
struct file *file,
const char __user *userbuf,
size_t nbytes, loff_t *unused_ppos)
{
- char buffer[64];
+ char buffer[CGROUP_LOCAL_BUFFER_SIZE];
int retval = 0;
char *end;
return retval;
}
-static ssize_t cgroup_common_file_write(struct cgroup *cgrp,
- struct cftype *cft,
- struct file *file,
- const char __user *userbuf,
- size_t nbytes, loff_t *unused_ppos)
+static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
+ struct file *file,
+ const char __user *userbuf,
+ size_t nbytes, loff_t *unused_ppos)
{
- enum cgroup_filetype type = cft->private;
- char *buffer;
+ char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
int retval = 0;
+ size_t max_bytes = cft->max_write_len;
+ char *buffer = local_buffer;
- if (nbytes >= PATH_MAX)
+ if (!max_bytes)
+ max_bytes = sizeof(local_buffer) - 1;
+ if (nbytes >= max_bytes)
return -E2BIG;
-
- /* +1 for nul-terminator */
- buffer = kmalloc(nbytes + 1, GFP_KERNEL);
- if (buffer == NULL)
- return -ENOMEM;
-
- if (copy_from_user(buffer, userbuf, nbytes)) {
- retval = -EFAULT;
- goto out1;
+ /* Allocate a dynamic buffer if we need one */
+ if (nbytes >= sizeof(local_buffer)) {
+ buffer = kmalloc(nbytes + 1, GFP_KERNEL);
+ if (buffer == NULL)
+ return -ENOMEM;
}
- buffer[nbytes] = 0; /* nul-terminate */
- strstrip(buffer); /* strip -just- trailing whitespace */
-
- mutex_lock(&cgroup_mutex);
+ if (nbytes && copy_from_user(buffer, userbuf, nbytes))
+ return -EFAULT;
- /*
- * This was already checked for in cgroup_file_write(), but
- * check again now we're holding cgroup_mutex.
- */
- if (cgroup_is_removed(cgrp)) {
- retval = -ENODEV;
- goto out2;
- }
-
- switch (type) {
- case FILE_TASKLIST:
- retval = attach_task_by_pid(cgrp, buffer);
- break;
- case FILE_NOTIFY_ON_RELEASE:
- clear_bit(CGRP_RELEASABLE, &cgrp->flags);
- if (simple_strtoul(buffer, NULL, 10) != 0)
- set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
- else
- clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
- break;
- case FILE_RELEASE_AGENT:
- BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
- strcpy(cgrp->root->release_agent_path, buffer);
- break;
- default:
- retval = -EINVAL;
- goto out2;
- }
-
- if (retval == 0)
+ buffer[nbytes] = 0; /* nul-terminate */
+ strstrip(buffer);
+ retval = cft->write_string(cgrp, cft, buffer);
+ if (!retval)
retval = nbytes;
-out2:
- mutex_unlock(&cgroup_mutex);
-out1:
- kfree(buffer);
+ if (buffer != local_buffer)
+ kfree(buffer);
return retval;
}
return cft->write(cgrp, cft, file, buf, nbytes, ppos);
if (cft->write_u64 || cft->write_s64)
return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
+ if (cft->write_string)
+ return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
if (cft->trigger) {
int ret = cft->trigger(cgrp, (unsigned int)cft->private);
return ret ? ret : nbytes;
char __user *buf, size_t nbytes,
loff_t *ppos)
{
- char tmp[64];
+ char tmp[CGROUP_LOCAL_BUFFER_SIZE];
u64 val = cft->read_u64(cgrp, cft);
int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
char __user *buf, size_t nbytes,
loff_t *ppos)
{
- char tmp[64];
+ char tmp[CGROUP_LOCAL_BUFFER_SIZE];
s64 val = cft->read_s64(cgrp, cft);
int len = sprintf(tmp, "%lld\n", (long long) val);
return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
}
-static ssize_t cgroup_common_file_read(struct cgroup *cgrp,
- struct cftype *cft,
- struct file *file,
- char __user *buf,
- size_t nbytes, loff_t *ppos)
-{
- enum cgroup_filetype type = cft->private;
- char *page;
- ssize_t retval = 0;
- char *s;
-
- if (!(page = (char *)__get_free_page(GFP_KERNEL)))
- return -ENOMEM;
-
- s = page;
-
- switch (type) {
- case FILE_RELEASE_AGENT:
- {
- struct cgroupfs_root *root;
- size_t n;
- mutex_lock(&cgroup_mutex);
- root = cgrp->root;
- n = strnlen(root->release_agent_path,
- sizeof(root->release_agent_path));
- n = min(n, (size_t) PAGE_SIZE);
- strncpy(s, root->release_agent_path, n);
- mutex_unlock(&cgroup_mutex);
- s += n;
- break;
- }
- default:
- retval = -EINVAL;
- goto out;
- }
- *s++ = '\n';
-
- retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);
-out:
- free_page((unsigned long)page);
- return retval;
-}
-
static ssize_t cgroup_file_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos)
{
static struct file_operations cgroup_seqfile_operations = {
.read = seq_read,
+ .write = cgroup_file_write,
.llseek = seq_lseek,
.release = cgroup_seqfile_release,
};
int cgroup_task_count(const struct cgroup *cgrp)
{
int count = 0;
- struct list_head *l;
+ struct cg_cgroup_link *link;
read_lock(&css_set_lock);
- l = cgrp->css_sets.next;
- while (l != &cgrp->css_sets) {
- struct cg_cgroup_link *link =
- list_entry(l, struct cg_cgroup_link, cgrp_link_list);
+ list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
count += atomic_read(&link->cg->ref.refcount);
- l = l->next;
}
read_unlock(&css_set_lock);
return count;
return notify_on_release(cgrp);
}
+static int cgroup_write_notify_on_release(struct cgroup *cgrp,
+ struct cftype *cft,
+ u64 val)
+{
+ clear_bit(CGRP_RELEASABLE, &cgrp->flags);
+ if (val)
+ set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+ else
+ clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+ return 0;
+}
+
/*
* for the common functions, 'private' gives the type of file
*/
.name = "tasks",
.open = cgroup_tasks_open,
.read = cgroup_tasks_read,
- .write = cgroup_common_file_write,
+ .write_u64 = cgroup_tasks_write,
.release = cgroup_tasks_release,
.private = FILE_TASKLIST,
},
{
.name = "notify_on_release",
.read_u64 = cgroup_read_notify_on_release,
- .write = cgroup_common_file_write,
+ .write_u64 = cgroup_write_notify_on_release,
.private = FILE_NOTIFY_ON_RELEASE,
},
};
static struct cftype cft_release_agent = {
.name = "release_agent",
- .read = cgroup_common_file_read,
- .write = cgroup_common_file_write,
+ .read_seq_string = cgroup_release_agent_show,
+ .write_string = cgroup_release_agent_write,
+ .max_write_len = PATH_MAX,
.private = FILE_RELEASE_AGENT,
};
* cgroup_clone - clone the cgroup the given subsystem is attached to
* @tsk: the task to be moved
* @subsys: the given subsystem
+ * @nodename: the name for the new cgroup
*
* Duplicate the current cgroup in the hierarchy that the given
* subsystem is attached to, and move this task into the new
* child.
*/
-int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
+int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
+ char *nodename)
{
struct dentry *dentry;
int ret = 0;
- char nodename[MAX_CGROUP_TYPE_NAMELEN];
struct cgroup *parent, *child;
struct inode *inode;
struct css_set *cg;
cg = tsk->cgroups;
parent = task_cgroup(tsk, subsys->subsys_id);
- snprintf(nodename, MAX_CGROUP_TYPE_NAMELEN, "%d", tsk->pid);
-
/* Pin the hierarchy */
atomic_inc(&parent->root->sb->s_active);
while (!list_empty(&release_list)) {
char *argv[3], *envp[3];
int i;
- char *pathbuf;
+ char *pathbuf = NULL, *agentbuf = NULL;
struct cgroup *cgrp = list_entry(release_list.next,
struct cgroup,
release_list);
list_del_init(&cgrp->release_list);
spin_unlock(&release_list_lock);
pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!pathbuf) {
- spin_lock(&release_list_lock);
- continue;
- }
-
- if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0) {
- kfree(pathbuf);
- spin_lock(&release_list_lock);
- continue;
- }
+ if (!pathbuf)
+ goto continue_free;
+ if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
+ goto continue_free;
+ agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
+ if (!agentbuf)
+ goto continue_free;
i = 0;
- argv[i++] = cgrp->root->release_agent_path;
- argv[i++] = (char *)pathbuf;
+ argv[i++] = agentbuf;
+ argv[i++] = pathbuf;
argv[i] = NULL;
i = 0;
* be a slow process */
mutex_unlock(&cgroup_mutex);
call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
- kfree(pathbuf);
mutex_lock(&cgroup_mutex);
+ continue_free:
+ kfree(pathbuf);
+ kfree(agentbuf);
spin_lock(&release_list_lock);
}
spin_unlock(&release_list_lock);
set_cpus_allowed_ptr(current, &old_allowed);
out_release:
cpu_hotplug_done();
+ if (!err) {
+ if (raw_notifier_call_chain(&cpu_chain, CPU_POST_DEAD | mod,
+ hcpu) == NOTIFY_BAD)
+ BUG();
+ }
return err;
}
* The task_struct fields mems_allowed and mems_generation may only
* be accessed in the context of that task, so require no locks.
*
- * The cpuset_common_file_write handler for operations that modify
- * the cpuset hierarchy holds cgroup_mutex across the entire operation,
- * single threading all such cpuset modifications across the system.
- *
* The cpuset_common_file_read() handlers only hold callback_mutex across
* small pieces of code, such as when reading out possibly multi-word
* cpumasks and nodemasks.
my_cpusets_mem_gen = top_cpuset.mems_generation;
} else {
rcu_read_lock();
- my_cpusets_mem_gen = task_cs(current)->mems_generation;
+ my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
rcu_read_unlock();
}
/*
* rebuild_sched_domains()
*
- * If the flag 'sched_load_balance' of any cpuset with non-empty
- * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
- * which has that flag enabled, or if any cpuset with a non-empty
- * 'cpus' is removed, then call this routine to rebuild the
- * scheduler's dynamic sched domains.
+ * This routine will be called to rebuild the scheduler's dynamic
+ * sched domains:
+ * - if the flag 'sched_load_balance' of any cpuset with non-empty
+ * 'cpus' changes,
+ * - or if the 'cpus' allowed changes in any cpuset which has that
+ * flag enabled,
+ * - or if the 'sched_relax_domain_level' of any cpuset which has
+ * that flag enabled and with non-empty 'cpus' changes,
+ * - or if any cpuset with non-empty 'cpus' is removed,
+ * - or if a cpu gets offlined.
*
* This routine builds a partial partition of the systems CPUs
* (the set of non-overlappping cpumask_t's in the array 'part'
while (__kfifo_get(q, (void *)&cp, sizeof(cp))) {
struct cgroup *cont;
struct cpuset *child; /* scans child cpusets of cp */
+
+ if (cpus_empty(cp->cpus_allowed))
+ continue;
+
if (is_sched_load_balance(cp))
csa[csn++] = cp;
+
list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
child = cgroup_cs(cont);
__kfifo_put(q, (void *)&child, sizeof(cp));
/* Don't kfree(dattr) -- partition_sched_domains() does that. */
}
-static inline int started_after_time(struct task_struct *t1,
- struct timespec *time,
- struct task_struct *t2)
-{
- int start_diff = timespec_compare(&t1->start_time, time);
- if (start_diff > 0) {
- return 1;
- } else if (start_diff < 0) {
- return 0;
- } else {
- /*
- * Arbitrarily, if two processes started at the same
- * time, we'll say that the lower pointer value
- * started first. Note that t2 may have exited by now
- * so this may not be a valid pointer any longer, but
- * that's fine - it still serves to distinguish
- * between two tasks started (effectively)
- * simultaneously.
- */
- return t1 > t2;
- }
-}
-
-static inline int started_after(void *p1, void *p2)
-{
- struct task_struct *t1 = p1;
- struct task_struct *t2 = p2;
- return started_after_time(t1, &t2->start_time, t2);
-}
-
/**
* cpuset_test_cpumask - test a task's cpus_allowed versus its cpuset's
* @tsk: task to test
set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed));
}
+/**
+ * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
+ * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
+ *
+ * Called with cgroup_mutex held
+ *
+ * The cgroup_scan_tasks() function will scan all the tasks in a cgroup,
+ * calling callback functions for each.
+ *
+ * Return 0 if successful, -errno if not.
+ */
+static int update_tasks_cpumask(struct cpuset *cs)
+{
+ struct cgroup_scanner scan;
+ struct ptr_heap heap;
+ int retval;
+
+ /*
+ * cgroup_scan_tasks() will initialize heap->gt for us.
+ * heap_init() is still needed here for we should not change
+ * cs->cpus_allowed when heap_init() fails.
+ */
+ retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
+ if (retval)
+ return retval;
+
+ scan.cg = cs->css.cgroup;
+ scan.test_task = cpuset_test_cpumask;
+ scan.process_task = cpuset_change_cpumask;
+ scan.heap = &heap;
+ retval = cgroup_scan_tasks(&scan);
+
+ heap_free(&heap);
+ return retval;
+}
+
/**
* update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it
* @cs: the cpuset to consider
* @buf: buffer of cpu numbers written to this cpuset
*/
-static int update_cpumask(struct cpuset *cs, char *buf)
+static int update_cpumask(struct cpuset *cs, const char *buf)
{
struct cpuset trialcs;
- struct cgroup_scanner scan;
- struct ptr_heap heap;
int retval;
int is_load_balanced;
* that parsing. The validate_change() call ensures that cpusets
* with tasks have cpus.
*/
- buf = strstrip(buf);
if (!*buf) {
cpus_clear(trialcs.cpus_allowed);
} else {
if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed))
return 0;
- retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after);
- if (retval)
- return retval;
-
is_load_balanced = is_sched_load_balance(&trialcs);
mutex_lock(&callback_mutex);
* Scan tasks in the cpuset, and update the cpumasks of any
* that need an update.
*/
- scan.cg = cs->css.cgroup;
- scan.test_task = cpuset_test_cpumask;
- scan.process_task = cpuset_change_cpumask;
- scan.heap = &heap;
- cgroup_scan_tasks(&scan);
- heap_free(&heap);
+ retval = update_tasks_cpumask(cs);
+ if (retval < 0)
+ return retval;
if (is_load_balanced)
rebuild_sched_domains();
mutex_unlock(&callback_mutex);
}
-/*
- * Handle user request to change the 'mems' memory placement
- * of a cpuset. Needs to validate the request, update the
- * cpusets mems_allowed and mems_generation, and for each
- * task in the cpuset, rebind any vma mempolicies and if
- * the cpuset is marked 'memory_migrate', migrate the tasks
- * pages to the new memory.
- *
- * Call with cgroup_mutex held. May take callback_mutex during call.
- * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
- * lock each such tasks mm->mmap_sem, scan its vma's and rebind
- * their mempolicies to the cpusets new mems_allowed.
- */
-
static void *cpuset_being_rebound;
-static int update_nodemask(struct cpuset *cs, char *buf)
+/**
+ * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset.
+ * @cs: the cpuset in which each task's mems_allowed mask needs to be changed
+ * @oldmem: old mems_allowed of cpuset cs
+ *
+ * Called with cgroup_mutex held
+ * Return 0 if successful, -errno if not.
+ */
+static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem)
{
- struct cpuset trialcs;
- nodemask_t oldmem;
struct task_struct *p;
struct mm_struct **mmarray;
int i, n, ntasks;
int migrate;
int fudge;
- int retval;
struct cgroup_iter it;
-
- /*
- * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
- * it's read-only
- */
- if (cs == &top_cpuset)
- return -EACCES;
-
- trialcs = *cs;
-
- /*
- * An empty mems_allowed is ok iff there are no tasks in the cpuset.
- * Since nodelist_parse() fails on an empty mask, we special case
- * that parsing. The validate_change() call ensures that cpusets
- * with tasks have memory.
- */
- buf = strstrip(buf);
- if (!*buf) {
- nodes_clear(trialcs.mems_allowed);
- } else {
- retval = nodelist_parse(buf, trialcs.mems_allowed);
- if (retval < 0)
- goto done;
-
- if (!nodes_subset(trialcs.mems_allowed,
- node_states[N_HIGH_MEMORY]))
- return -EINVAL;
- }
- oldmem = cs->mems_allowed;
- if (nodes_equal(oldmem, trialcs.mems_allowed)) {
- retval = 0; /* Too easy - nothing to do */
- goto done;
- }
- retval = validate_change(cs, &trialcs);
- if (retval < 0)
- goto done;
-
- mutex_lock(&callback_mutex);
- cs->mems_allowed = trialcs.mems_allowed;
- cs->mems_generation = cpuset_mems_generation++;
- mutex_unlock(&callback_mutex);
+ int retval;
cpuset_being_rebound = cs; /* causes mpol_dup() rebind */
mpol_rebind_mm(mm, &cs->mems_allowed);
if (migrate)
- cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed);
+ cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
mmput(mm);
}
return retval;
}
+/*
+ * Handle user request to change the 'mems' memory placement
+ * of a cpuset. Needs to validate the request, update the
+ * cpusets mems_allowed and mems_generation, and for each
+ * task in the cpuset, rebind any vma mempolicies and if
+ * the cpuset is marked 'memory_migrate', migrate the tasks
+ * pages to the new memory.
+ *
+ * Call with cgroup_mutex held. May take callback_mutex during call.
+ * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
+ * lock each such tasks mm->mmap_sem, scan its vma's and rebind
+ * their mempolicies to the cpusets new mems_allowed.
+ */
+static int update_nodemask(struct cpuset *cs, const char *buf)
+{
+ struct cpuset trialcs;
+ nodemask_t oldmem;
+ int retval;
+
+ /*
+ * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
+ * it's read-only
+ */
+ if (cs == &top_cpuset)
+ return -EACCES;
+
+ trialcs = *cs;
+
+ /*
+ * An empty mems_allowed is ok iff there are no tasks in the cpuset.
+ * Since nodelist_parse() fails on an empty mask, we special case
+ * that parsing. The validate_change() call ensures that cpusets
+ * with tasks have memory.
+ */
+ if (!*buf) {
+ nodes_clear(trialcs.mems_allowed);
+ } else {
+ retval = nodelist_parse(buf, trialcs.mems_allowed);
+ if (retval < 0)
+ goto done;
+
+ if (!nodes_subset(trialcs.mems_allowed,
+ node_states[N_HIGH_MEMORY]))
+ return -EINVAL;
+ }
+ oldmem = cs->mems_allowed;
+ if (nodes_equal(oldmem, trialcs.mems_allowed)) {
+ retval = 0; /* Too easy - nothing to do */
+ goto done;
+ }
+ retval = validate_change(cs, &trialcs);
+ if (retval < 0)
+ goto done;
+
+ mutex_lock(&callback_mutex);
+ cs->mems_allowed = trialcs.mems_allowed;
+ cs->mems_generation = cpuset_mems_generation++;
+ mutex_unlock(&callback_mutex);
+
+ retval = update_tasks_nodemask(cs, &oldmem);
+done:
+ return retval;
+}
+
int current_cpuset_is_being_rebound(void)
{
return task_cs(current) == cpuset_being_rebound;
if (val != cs->relax_domain_level) {
cs->relax_domain_level = val;
- rebuild_sched_domains();
+ if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs))
+ rebuild_sched_domains();
}
return 0;
FILE_SPREAD_SLAB,
} cpuset_filetype_t;
-static ssize_t cpuset_common_file_write(struct cgroup *cont,
- struct cftype *cft,
- struct file *file,
- const char __user *userbuf,
- size_t nbytes, loff_t *unused_ppos)
-{
- struct cpuset *cs = cgroup_cs(cont);
- cpuset_filetype_t type = cft->private;
- char *buffer;
- int retval = 0;
-
- /* Crude upper limit on largest legitimate cpulist user might write. */
- if (nbytes > 100U + 6 * max(NR_CPUS, MAX_NUMNODES))
- return -E2BIG;
-
- /* +1 for nul-terminator */
- buffer = kmalloc(nbytes + 1, GFP_KERNEL);
- if (!buffer)
- return -ENOMEM;
-
- if (copy_from_user(buffer, userbuf, nbytes)) {
- retval = -EFAULT;
- goto out1;
- }
- buffer[nbytes] = 0; /* nul-terminate */
-
- cgroup_lock();
-
- if (cgroup_is_removed(cont)) {
- retval = -ENODEV;
- goto out2;
- }
-
- switch (type) {
- case FILE_CPULIST:
- retval = update_cpumask(cs, buffer);
- break;
- case FILE_MEMLIST:
- retval = update_nodemask(cs, buffer);
- break;
- default:
- retval = -EINVAL;
- goto out2;
- }
-
- if (retval == 0)
- retval = nbytes;
-out2:
- cgroup_unlock();
-out1:
- kfree(buffer);
- return retval;
-}
-
static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
{
int retval = 0;
struct cpuset *cs = cgroup_cs(cgrp);
cpuset_filetype_t type = cft->private;
- cgroup_lock();
-
- if (cgroup_is_removed(cgrp)) {
- cgroup_unlock();
+ if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
- }
switch (type) {
case FILE_CPU_EXCLUSIVE:
struct cpuset *cs = cgroup_cs(cgrp);
cpuset_filetype_t type = cft->private;
- cgroup_lock();
-
- if (cgroup_is_removed(cgrp)) {
- cgroup_unlock();
+ if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
- }
+
switch (type) {
case FILE_SCHED_RELAX_DOMAIN_LEVEL:
retval = update_relax_domain_level(cs, val);
return retval;
}
+/*
+ * Common handling for a write to a "cpus" or "mems" file.
+ */
+static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
+ const char *buf)
+{
+ int retval = 0;
+
+ if (!cgroup_lock_live_group(cgrp))
+ return -ENODEV;
+
+ switch (cft->private) {
+ case FILE_CPULIST:
+ retval = update_cpumask(cgroup_cs(cgrp), buf);
+ break;
+ case FILE_MEMLIST:
+ retval = update_nodemask(cgroup_cs(cgrp), buf);
+ break;
+ default:
+ retval = -EINVAL;
+ break;
+ }
+ cgroup_unlock();
+ return retval;
+}
+
/*
* These ascii lists should be read in a single call, by using a user
* buffer large enough to hold the entire map. If read in smaller
{
.name = "cpus",
.read = cpuset_common_file_read,
- .write = cpuset_common_file_write,
+ .write_string = cpuset_write_resmask,
+ .max_write_len = (100U + 6 * NR_CPUS),
.private = FILE_CPULIST,
},
{
.name = "mems",
.read = cpuset_common_file_read,
- .write = cpuset_common_file_write,
+ .write_string = cpuset_write_resmask,
+ .max_write_len = (100U + 6 * MAX_NUMNODES),
.private = FILE_MEMLIST,
},
scan.scan.heap = NULL;
scan.to = to->css.cgroup;
- if (cgroup_scan_tasks((struct cgroup_scanner *)&scan))
+ if (cgroup_scan_tasks(&scan.scan))
printk(KERN_ERR "move_member_tasks_to_cpuset: "
"cgroup_scan_tasks failed\n");
}
struct cpuset *child; /* scans child cpusets of cp */
struct list_head queue;
struct cgroup *cont;
+ nodemask_t oldmems;
INIT_LIST_HEAD(&queue);
nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
continue;
+ oldmems = cp->mems_allowed;
+
/* Remove offline cpus and mems from this cpuset. */
mutex_lock(&callback_mutex);
cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map);
if (cpus_empty(cp->cpus_allowed) ||
nodes_empty(cp->mems_allowed))
remove_tasks_in_empty_cpuset(cp);
+ else {
+ update_tasks_cpumask(cp);
+ update_tasks_nodemask(cp, &oldmems);
+ }
}
}
}
/**
-
* cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
* @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
* @pmask: pointer to cpumask_t variable to receive cpus_allowed set.
d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
+ tmp = d->freepages_delay_total + tsk->delays->freepages_delay;
+ d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
d->blkio_count += tsk->delays->blkio_count;
d->swapin_count += tsk->delays->swapin_count;
+ d->freepages_count += tsk->delays->freepages_count;
spin_unlock_irqrestore(&tsk->delays->lock, flags);
done:
return ret;
}
+void __delayacct_freepages_start(void)
+{
+ delayacct_start(¤t->delays->freepages_start);
+}
+
+void __delayacct_freepages_end(void)
+{
+ delayacct_end(¤t->delays->freepages_start,
+ ¤t->delays->freepages_end,
+ ¤t->delays->freepages_delay,
+ ¤t->delays->freepages_count);
+}
+
BUG_ON(!sig);
BUG_ON(!atomic_read(&sig->count));
- rcu_read_lock();
sighand = rcu_dereference(tsk->sighand);
spin_lock(&sighand->siglock);
sig->nivcsw += tsk->nivcsw;
sig->inblock += task_io_get_inblock(tsk);
sig->oublock += task_io_get_oublock(tsk);
+#ifdef CONFIG_TASK_XACCT
+ sig->rchar += tsk->rchar;
+ sig->wchar += tsk->wchar;
+ sig->syscr += tsk->syscr;
+ sig->syscw += tsk->syscw;
+#endif /* CONFIG_TASK_XACCT */
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+ sig->ioac.read_bytes += tsk->ioac.read_bytes;
+ sig->ioac.write_bytes += tsk->ioac.write_bytes;
+ sig->ioac.cancelled_write_bytes +=
+ tsk->ioac.cancelled_write_bytes;
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
sig = NULL; /* Marker for below. */
}
tsk->signal = NULL;
tsk->sighand = NULL;
spin_unlock(&sighand->siglock);
- rcu_read_unlock();
__cleanup_sighand(sighand);
clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
* We don't want to have TIF_FREEZE set if the system-wide hibernation
* or suspend transition begins right now.
*/
- current->flags |= PF_NOFREEZE;
+ current->flags |= (PF_NOFREEZE | PF_KTHREAD);
if (current->nsproxy != &init_nsproxy) {
get_nsproxy(&init_nsproxy);
static void exit_mm(struct task_struct * tsk)
{
struct mm_struct *mm = tsk->mm;
+ struct core_state *core_state;
mm_release(tsk, mm);
if (!mm)
return;
/*
* Serialize with any possible pending coredump.
- * We must hold mmap_sem around checking core_waiters
+ * We must hold mmap_sem around checking core_state
* and clearing tsk->mm. The core-inducing thread
- * will increment core_waiters for each thread in the
+ * will increment ->nr_threads for each thread in the
* group with ->mm != NULL.
*/
down_read(&mm->mmap_sem);
- if (mm->core_waiters) {
+ core_state = mm->core_state;
+ if (core_state) {
+ struct core_thread self;
up_read(&mm->mmap_sem);
- down_write(&mm->mmap_sem);
- if (!--mm->core_waiters)
- complete(mm->core_startup_done);
- up_write(&mm->mmap_sem);
- wait_for_completion(&mm->core_done);
+ self.task = tsk;
+ self.next = xchg(&core_state->dumper.next, &self);
+ /*
+ * Implies mb(), the result of xchg() must be visible
+ * to core_state->dumper.
+ */
+ if (atomic_dec_and_test(&core_state->nr_threads))
+ complete(&core_state->startup);
+
+ for (;;) {
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ if (!self.task) /* see coredump_finish() */
+ break;
+ schedule();
+ }
+ __set_task_state(tsk, TASK_RUNNING);
down_read(&mm->mmap_sem);
}
atomic_inc(&mm->mm_count);
psig->coublock +=
task_io_get_oublock(p) +
sig->oublock + sig->coublock;
+#ifdef CONFIG_TASK_XACCT
+ psig->rchar += p->rchar + sig->rchar;
+ psig->wchar += p->wchar + sig->wchar;
+ psig->syscr += p->syscr + sig->syscr;
+ psig->syscw += p->syscw + sig->syscw;
+#endif /* CONFIG_TASK_XACCT */
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+ psig->ioac.read_bytes +=
+ p->ioac.read_bytes + sig->ioac.read_bytes;
+ psig->ioac.write_bytes +=
+ p->ioac.write_bytes + sig->ioac.write_bytes;
+ psig->ioac.cancelled_write_bytes +=
+ p->ioac.cancelled_write_bytes +
+ sig->ioac.cancelled_write_bytes;
+#endif /* CONFIG_TASK_IO_ACCOUNTING */
spin_unlock_irq(&p->parent->sighand->siglock);
}
static struct kmem_cache *task_struct_cachep;
#endif
+#ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+static inline struct thread_info *alloc_thread_info(struct task_struct *tsk)
+{
+#ifdef CONFIG_DEBUG_STACK_USAGE
+ gfp_t mask = GFP_KERNEL | __GFP_ZERO;
+#else
+ gfp_t mask = GFP_KERNEL;
+#endif
+ return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER);
+}
+
+static inline void free_thread_info(struct thread_info *ti)
+{
+ free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
+}
+#endif
+
/* SLAB cache for signal_struct structures (tsk->signal) */
static struct kmem_cache *signal_cachep;
INIT_LIST_HEAD(&mm->mmlist);
mm->flags = (current->mm) ? current->mm->flags
: MMF_DUMP_FILTER_DEFAULT;
- mm->core_waiters = 0;
+ mm->core_state = NULL;
mm->nr_ptes = 0;
set_mm_counter(mm, file_rss, 0);
set_mm_counter(mm, anon_rss, 0);
/**
* get_task_mm - acquire a reference to the task's mm
*
- * Returns %NULL if the task has no mm. Checks PF_BORROWED_MM (meaning
+ * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning
* this kernel workthread has transiently adopted a user mm with use_mm,
* to do its AIO) is not set and if so returns a reference to it, after
* bumping up the use count. User must release the mm via mmput()
task_lock(task);
mm = task->mm;
if (mm) {
- if (task->flags & PF_BORROWED_MM)
+ if (task->flags & PF_KTHREAD)
mm = NULL;
else
atomic_inc(&mm->mm_users);
sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
+#ifdef CONFIG_TASK_XACCT
+ sig->rchar = sig->wchar = sig->syscr = sig->syscw = 0;
+#endif
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+ memset(&sig->ioac, 0, sizeof(sig->ioac));
+#endif
sig->sum_sched_runtime = 0;
INIT_LIST_HEAD(&sig->cpu_timers[0]);
INIT_LIST_HEAD(&sig->cpu_timers[1]);
if (clone_flags & CLONE_THREAD)
p->tgid = current->tgid;
+ if (current->nsproxy != p->nsproxy) {
+ retval = ns_cgroup_clone(p, pid);
+ if (retval)
+ goto bad_fork_free_pid;
+ }
+
p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
/*
* Clear TID on mm_release()?
}
} else {
if (desc->wake_depth == 0) {
- printk(KERN_WARNING "Unbalanced IRQ %d "
- "wake disable\n", irq);
- WARN_ON(1);
+ WARN(1, "Unbalanced IRQ %d wake disable\n", irq);
} else if (--desc->wake_depth == 0) {
ret = set_irq_wake_real(irq, on);
if (ret)
high = kallsyms_num_syms;
while (high - low > 1) {
- mid = (low + high) / 2;
+ mid = low + (high - low) / 2;
if (kallsyms_addresses[mid] <= addr)
low = mid;
else
* @path: path to usermode executable
* @argv: arg vector for process
* @envp: environment for process
+ * @gfp_mask: gfp mask for memory allocation
*
* Returns either %NULL on allocation failure, or a subprocess_info
* structure. This should be passed to call_usermodehelper_exec to
* exec the process and free the structure.
*/
-struct subprocess_info *call_usermodehelper_setup(char *path,
- char **argv, char **envp)
+struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
+ char **envp, gfp_t gfp_mask)
{
struct subprocess_info *sub_info;
- sub_info = kzalloc(sizeof(struct subprocess_info), GFP_ATOMIC);
+ sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask);
if (!sub_info)
goto out;
struct subprocess_info *sub_info;
int ret;
- sub_info = call_usermodehelper_setup(path, argv, envp);
+ sub_info = call_usermodehelper_setup(path, argv, envp, GFP_KERNEL);
if (sub_info == NULL)
return -ENOMEM;
addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name)))
#endif
+static int kprobes_initialized;
static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
static bool kprobe_enabled;
DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
-DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */
static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
+static struct {
+ spinlock_t lock ____cacheline_aligned;
+} kretprobe_table_locks[KPROBE_TABLE_SIZE];
+
+static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
+{
+ return &(kretprobe_table_locks[hash].lock);
+}
/*
* Normally, functions that we'd want to prohibit kprobes in, are marked
return;
}
-/* Called with kretprobe_lock held */
void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
struct hlist_head *head)
{
+ struct kretprobe *rp = ri->rp;
+
/* remove rp inst off the rprobe_inst_table */
hlist_del(&ri->hlist);
- if (ri->rp) {
- /* remove rp inst off the used list */
- hlist_del(&ri->uflist);
- /* put rp inst back onto the free list */
- INIT_HLIST_NODE(&ri->uflist);
- hlist_add_head(&ri->uflist, &ri->rp->free_instances);
+ INIT_HLIST_NODE(&ri->hlist);
+ if (likely(rp)) {
+ spin_lock(&rp->lock);
+ hlist_add_head(&ri->hlist, &rp->free_instances);
+ spin_unlock(&rp->lock);
} else
/* Unregistering */
hlist_add_head(&ri->hlist, head);
}
-struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk)
+void kretprobe_hash_lock(struct task_struct *tsk,
+ struct hlist_head **head, unsigned long *flags)
{
- return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)];
+ unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
+ spinlock_t *hlist_lock;
+
+ *head = &kretprobe_inst_table[hash];
+ hlist_lock = kretprobe_table_lock_ptr(hash);
+ spin_lock_irqsave(hlist_lock, *flags);
+}
+
+void kretprobe_table_lock(unsigned long hash, unsigned long *flags)
+{
+ spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
+ spin_lock_irqsave(hlist_lock, *flags);
+}
+
+void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags)
+{
+ unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
+ spinlock_t *hlist_lock;
+
+ hlist_lock = kretprobe_table_lock_ptr(hash);
+ spin_unlock_irqrestore(hlist_lock, *flags);
+}
+
+void kretprobe_table_unlock(unsigned long hash, unsigned long *flags)
+{
+ spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
+ spin_unlock_irqrestore(hlist_lock, *flags);
}
/*
struct kretprobe_instance *ri;
struct hlist_head *head, empty_rp;
struct hlist_node *node, *tmp;
- unsigned long flags = 0;
+ unsigned long hash, flags = 0;
- INIT_HLIST_HEAD(&empty_rp);
- spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(tk);
+ if (unlikely(!kprobes_initialized))
+ /* Early boot. kretprobe_table_locks not yet initialized. */
+ return;
+
+ hash = hash_ptr(tk, KPROBE_HASH_BITS);
+ head = &kretprobe_inst_table[hash];
+ kretprobe_table_lock(hash, &flags);
hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
if (ri->task == tk)
recycle_rp_inst(ri, &empty_rp);
}
- spin_unlock_irqrestore(&kretprobe_lock, flags);
-
+ kretprobe_table_unlock(hash, &flags);
+ INIT_HLIST_HEAD(&empty_rp);
hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
hlist_del(&ri->hlist);
kfree(ri);
struct kretprobe_instance *ri;
struct hlist_node *pos, *next;
- hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, uflist) {
- hlist_del(&ri->uflist);
+ hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, hlist) {
+ hlist_del(&ri->hlist);
kfree(ri);
}
}
static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
{
- unsigned long flags;
+ unsigned long flags, hash;
struct kretprobe_instance *ri;
struct hlist_node *pos, *next;
+ struct hlist_head *head;
+
/* No race here */
- spin_lock_irqsave(&kretprobe_lock, flags);
- hlist_for_each_entry_safe(ri, pos, next, &rp->used_instances, uflist) {
- ri->rp = NULL;
- hlist_del(&ri->uflist);
+ for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) {
+ kretprobe_table_lock(hash, &flags);
+ head = &kretprobe_inst_table[hash];
+ hlist_for_each_entry_safe(ri, pos, next, head, hlist) {
+ if (ri->rp == rp)
+ ri->rp = NULL;
+ }
+ kretprobe_table_unlock(hash, &flags);
}
- spin_unlock_irqrestore(&kretprobe_lock, flags);
free_rp_inst(rp);
}
struct pt_regs *regs)
{
struct kretprobe *rp = container_of(p, struct kretprobe, kp);
- unsigned long flags = 0;
+ unsigned long hash, flags = 0;
+ struct kretprobe_instance *ri;
/*TODO: consider to only swap the RA after the last pre_handler fired */
- spin_lock_irqsave(&kretprobe_lock, flags);
+ hash = hash_ptr(current, KPROBE_HASH_BITS);
+ spin_lock_irqsave(&rp->lock, flags);
if (!hlist_empty(&rp->free_instances)) {
- struct kretprobe_instance *ri;
-
ri = hlist_entry(rp->free_instances.first,
- struct kretprobe_instance, uflist);
+ struct kretprobe_instance, hlist);
+ hlist_del(&ri->hlist);
+ spin_unlock_irqrestore(&rp->lock, flags);
+
ri->rp = rp;
ri->task = current;
if (rp->entry_handler && rp->entry_handler(ri, regs)) {
- spin_unlock_irqrestore(&kretprobe_lock, flags);
+ spin_unlock_irqrestore(&rp->lock, flags);
return 0;
}
arch_prepare_kretprobe(ri, regs);
/* XXX(hch): why is there no hlist_move_head? */
- hlist_del(&ri->uflist);
- hlist_add_head(&ri->uflist, &ri->rp->used_instances);
- hlist_add_head(&ri->hlist, kretprobe_inst_table_head(ri->task));
- } else
+ INIT_HLIST_NODE(&ri->hlist);
+ kretprobe_table_lock(hash, &flags);
+ hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]);
+ kretprobe_table_unlock(hash, &flags);
+ } else {
rp->nmissed++;
- spin_unlock_irqrestore(&kretprobe_lock, flags);
+ spin_unlock_irqrestore(&rp->lock, flags);
+ }
return 0;
}
rp->maxactive = NR_CPUS;
#endif
}
- INIT_HLIST_HEAD(&rp->used_instances);
+ spin_lock_init(&rp->lock);
INIT_HLIST_HEAD(&rp->free_instances);
for (i = 0; i < rp->maxactive; i++) {
inst = kmalloc(sizeof(struct kretprobe_instance) +
free_rp_inst(rp);
return -ENOMEM;
}
- INIT_HLIST_NODE(&inst->uflist);
- hlist_add_head(&inst->uflist, &rp->free_instances);
+ INIT_HLIST_NODE(&inst->hlist);
+ hlist_add_head(&inst->hlist, &rp->free_instances);
}
rp->nmissed = 0;
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
INIT_HLIST_HEAD(&kprobe_table[i]);
INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
+ spin_lock_init(&(kretprobe_table_locks[i].lock));
}
/*
err = arch_init_kprobes();
if (!err)
err = register_die_notifier(&kprobe_exceptions_nb);
+ kprobes_initialized = (err == 0);
if (!err)
init_test_probes();
EXPORT_SYMBOL_GPL(unregister_jprobe);
EXPORT_SYMBOL_GPL(register_jprobes);
EXPORT_SYMBOL_GPL(unregister_jprobes);
-#ifdef CONFIG_KPROBES
EXPORT_SYMBOL_GPL(jprobe_return);
-#endif
-
-#ifdef CONFIG_KPROBES
EXPORT_SYMBOL_GPL(register_kretprobe);
EXPORT_SYMBOL_GPL(unregister_kretprobe);
EXPORT_SYMBOL_GPL(register_kretprobes);
EXPORT_SYMBOL_GPL(unregister_kretprobes);
-#endif
hlist_del(&e->hlist);
/* Make sure the call_rcu has been executed */
if (e->rcu_pending)
- rcu_barrier();
+ rcu_barrier_sched();
kfree(e);
return 0;
}
hlist_del(&(*entry)->hlist);
/* Make sure the call_rcu has been executed */
if ((*entry)->rcu_pending)
- rcu_barrier();
+ rcu_barrier_sched();
kfree(*entry);
*entry = e;
trace_mark(core_marker_format, "name %s format %s",
* make sure it's executed now.
*/
if (entry->rcu_pending)
- rcu_barrier();
+ rcu_barrier_sched();
old = marker_entry_add_probe(entry, probe, probe_private);
if (IS_ERR(old)) {
ret = PTR_ERR(old);
entry->rcu_pending = 1;
/* write rcu_pending before calling the RCU callback */
smp_wmb();
-#ifdef CONFIG_PREEMPT_RCU
- synchronize_sched(); /* Until we have the call_rcu_sched() */
-#endif
- call_rcu(&entry->rcu, free_old_closure);
+ call_rcu_sched(&entry->rcu, free_old_closure);
end:
mutex_unlock(&markers_mutex);
return ret;
if (!entry)
goto end;
if (entry->rcu_pending)
- rcu_barrier();
+ rcu_barrier_sched();
old = marker_entry_remove_probe(entry, probe, probe_private);
mutex_unlock(&markers_mutex);
marker_update_probes(); /* may update entry */
entry->rcu_pending = 1;
/* write rcu_pending before calling the RCU callback */
smp_wmb();
-#ifdef CONFIG_PREEMPT_RCU
- synchronize_sched(); /* Until we have the call_rcu_sched() */
-#endif
- call_rcu(&entry->rcu, free_old_closure);
+ call_rcu_sched(&entry->rcu, free_old_closure);
remove_marker(name); /* Ignore busy error message */
ret = 0;
end:
goto end;
}
if (entry->rcu_pending)
- rcu_barrier();
+ rcu_barrier_sched();
old = marker_entry_remove_probe(entry, NULL, probe_private);
mutex_unlock(&markers_mutex);
marker_update_probes(); /* may update entry */
entry->rcu_pending = 1;
/* write rcu_pending before calling the RCU callback */
smp_wmb();
-#ifdef CONFIG_PREEMPT_RCU
- synchronize_sched(); /* Until we have the call_rcu_sched() */
-#endif
- call_rcu(&entry->rcu, free_old_closure);
+ call_rcu_sched(&entry->rcu, free_old_closure);
remove_marker(entry->name); /* Ignore busy error message */
end:
mutex_unlock(&markers_mutex);
#include <linux/module.h>
#include <linux/cgroup.h>
#include <linux/fs.h>
+#include <linux/proc_fs.h>
#include <linux/slab.h>
#include <linux/nsproxy.h>
struct ns_cgroup, css);
}
-int ns_cgroup_clone(struct task_struct *task)
+int ns_cgroup_clone(struct task_struct *task, struct pid *pid)
{
- return cgroup_clone(task, &ns_subsys);
+ char name[PROC_NUMBUF];
+
+ snprintf(name, PROC_NUMBUF, "%d", pid_vnr(pid));
+ return cgroup_clone(task, &ns_subsys, name);
}
/*
goto out;
}
- err = ns_cgroup_clone(tsk);
- if (err) {
- put_nsproxy(new_ns);
- goto out;
- }
-
tsk->nsproxy = new_ns;
out:
goto out;
}
- err = ns_cgroup_clone(current);
+ err = ns_cgroup_clone(current, task_pid(current));
if (err)
put_nsproxy(*new_nsp);
add_taint(TAINT_WARN);
}
EXPORT_SYMBOL(warn_on_slowpath);
+
+
+void warn_slowpath(const char *file, int line, const char *fmt, ...)
+{
+ va_list args;
+ char function[KSYM_SYMBOL_LEN];
+ unsigned long caller = (unsigned long)__builtin_return_address(0);
+ sprint_symbol(function, caller);
+
+ printk(KERN_WARNING "------------[ cut here ]------------\n");
+ printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file,
+ line, function);
+ va_start(args, fmt);
+ vprintk(fmt, args);
+ va_end(args);
+
+ print_modules();
+ dump_stack();
+ print_oops_end_marker();
+ add_taint(TAINT_WARN);
+}
+EXPORT_SYMBOL(warn_slowpath);
#endif
#ifdef CONFIG_CC_STACKPROTECTOR
}
EXPORT_SYMBOL_GPL(find_vpid);
-struct pid *find_pid(int nr)
-{
- return find_pid_ns(nr, &init_pid_ns);
-}
-EXPORT_SYMBOL_GPL(find_pid);
-
/*
* attach_pid() must be called with the tasklist_lock write-held.
*/
return pid;
}
+EXPORT_SYMBOL_GPL(find_get_pid);
pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
{
/*
* Used by proc to find the first pid that is greater then or equal to nr.
*
- * If there is a pid at nr this function is exactly the same as find_pid.
+ * If there is a pid at nr this function is exactly the same as find_pid_ns.
*/
struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
{
return pid;
}
-EXPORT_SYMBOL_GPL(find_get_pid);
/*
* The pid hash table is scaled according to the amount of memory in the
#include <linux/pid_namespace.h>
#include <linux/syscalls.h>
#include <linux/err.h>
+#include <linux/acct.h>
#define BITS_PER_PAGE (PAGE_SIZE*8)
struct pid_namespace *ns;
int i;
- ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL);
+ ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
if (ns == NULL)
goto out;
goto out_free_map;
kref_init(&ns->kref);
- ns->last_pid = 0;
- ns->child_reaper = NULL;
ns->level = level;
set_bit(0, ns->pidmap[0].page);
atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
- for (i = 1; i < PIDMAP_ENTRIES; i++) {
- ns->pidmap[i].page = NULL;
+ for (i = 1; i < PIDMAP_ENTRIES; i++)
atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
- }
return ns;
/* Child reaper for the pid namespace is going away */
pid_ns->child_reaper = NULL;
+ acct_exit_ns(pid_ns);
return;
}
spin_unlock_irqrestore(&idr_lock, flags);
}
sigqueue_free(tmr->sigq);
- if (unlikely(tmr->it_process) &&
- tmr->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
- put_task_struct(tmr->it_process);
kmem_cache_free(posix_timers_cache, tmr);
}
* This keeps any tasks waiting on the spin lock from thinking
* they got something (see the lock code above).
*/
- if (timer->it_process) {
- if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
- put_task_struct(timer->it_process);
- timer->it_process = NULL;
- }
+ if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
+ put_task_struct(timer->it_process);
+ timer->it_process = NULL;
+
unlock_timer(timer, flags);
release_posix_timer(timer, IT_ID_SET);
return 0;
* This keeps any tasks waiting on the spin lock from thinking
* they got something (see the lock code above).
*/
- if (timer->it_process) {
- if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
- put_task_struct(timer->it_process);
- timer->it_process = NULL;
- }
+ if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID))
+ put_task_struct(timer->it_process);
+ timer->it_process = NULL;
+
unlock_timer(timer, flags);
release_posix_timer(timer, IT_ID_SET);
}
}
#if defined CONFIG_PRINTK
+
+DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10);
/*
* printk rate limiting, lifted from the networking subsystem.
*
* every printk_ratelimit_jiffies to make a denial-of-service
* attack impossible.
*/
-int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst)
-{
- return __ratelimit(ratelimit_jiffies, ratelimit_burst);
-}
-EXPORT_SYMBOL(__printk_ratelimit);
-
-/* minimum time in jiffies between messages */
-int printk_ratelimit_jiffies = 5 * HZ;
-
-/* number of messages we send before ratelimiting */
-int printk_ratelimit_burst = 10;
-
int printk_ratelimit(void)
{
- return __printk_ratelimit(printk_ratelimit_jiffies,
- printk_ratelimit_burst);
+ return __ratelimit(&printk_ratelimit_state);
}
EXPORT_SYMBOL(printk_ratelimit);
/* Profile event notifications */
-#ifdef CONFIG_PROFILING
-
static BLOCKING_NOTIFIER_HEAD(task_exit_notifier);
static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
static BLOCKING_NOTIFIER_HEAD(munmap_notifier);
}
EXPORT_SYMBOL_GPL(unregister_timer_hook);
-#endif /* CONFIG_PROFILING */
-
#ifdef CONFIG_SMP
/*
#include <linux/slab.h>
#include <linux/res_counter.h>
#include <linux/uaccess.h>
+#include <linux/mm.h>
void res_counter_init(struct res_counter *counter)
{
return *res_counter_member(counter, member);
}
-ssize_t res_counter_write(struct res_counter *counter, int member,
- const char __user *userbuf, size_t nbytes, loff_t *pos,
- int (*write_strategy)(char *st_buf, unsigned long long *val))
+int res_counter_memparse_write_strategy(const char *buf,
+ unsigned long long *res)
{
- int ret;
- char *buf, *end;
- unsigned long flags;
- unsigned long long tmp, *val;
-
- buf = kmalloc(nbytes + 1, GFP_KERNEL);
- ret = -ENOMEM;
- if (buf == NULL)
- goto out;
+ char *end;
+ /* FIXME - make memparse() take const char* args */
+ *res = memparse((char *)buf, &end);
+ if (*end != '\0')
+ return -EINVAL;
- buf[nbytes] = '\0';
- ret = -EFAULT;
- if (copy_from_user(buf, userbuf, nbytes))
- goto out_free;
+ *res = PAGE_ALIGN(*res);
+ return 0;
+}
- ret = -EINVAL;
+int res_counter_write(struct res_counter *counter, int member,
+ const char *buf, write_strategy_fn write_strategy)
+{
+ char *end;
+ unsigned long flags;
+ unsigned long long tmp, *val;
- strstrip(buf);
if (write_strategy) {
- if (write_strategy(buf, &tmp)) {
- goto out_free;
- }
+ if (write_strategy(buf, &tmp))
+ return -EINVAL;
} else {
tmp = simple_strtoull(buf, &end, 10);
if (*end != '\0')
- goto out_free;
+ return -EINVAL;
}
spin_lock_irqsave(&counter->lock, flags);
val = res_counter_member(counter, member);
*val = tmp;
spin_unlock_irqrestore(&counter->lock, flags);
- ret = nbytes;
-out_free:
- kfree(buf);
-out:
- return ret;
+ return 0;
}
cpustat->nice = cputime64_add(cpustat->nice, tmp);
else
cpustat->user = cputime64_add(cpustat->user, tmp);
+ /* Account for user time used */
+ acct_update_integrals(p);
}
/*
spin_unlock_irqrestore(¤t->sighand->siglock, flags);
}
-static int collect_signal(int sig, struct sigpending *list, siginfo_t *info)
+static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
{
struct sigqueue *q, *first = NULL;
- int still_pending = 0;
-
- if (unlikely(!sigismember(&list->signal, sig)))
- return 0;
/*
* Collect the siginfo appropriate to this signal. Check if
*/
list_for_each_entry(q, &list->list, list) {
if (q->info.si_signo == sig) {
- if (first) {
- still_pending = 1;
- break;
- }
+ if (first)
+ goto still_pending;
first = q;
}
}
+
+ sigdelset(&list->signal, sig);
+
if (first) {
+still_pending:
list_del_init(&first->list);
copy_siginfo(info, &first->info);
__sigqueue_free(first);
- if (!still_pending)
- sigdelset(&list->signal, sig);
} else {
-
/* Ok, it wasn't in the queue. This must be
a fast-pathed signal or we must have been
out of queue space. So zero out the info.
*/
- sigdelset(&list->signal, sig);
info->si_signo = sig;
info->si_errno = 0;
info->si_code = 0;
info->si_pid = 0;
info->si_uid = 0;
}
- return 1;
}
static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
}
}
- if (!collect_signal(sig, pending, info))
- sig = 0;
+ collect_signal(sig, pending, info);
}
return sig;
* is to alert stop-signal processing code when another
* processor has come along and cleared the flag.
*/
- if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT))
- tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
+ tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
}
if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) {
/*
* is probably wrong. Should make it like BSD or SYSV.
*/
-static int kill_something_info(int sig, struct siginfo *info, int pid)
+static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
{
int ret;
}
EXPORT_SYMBOL(kill_pid);
-int
-kill_proc(pid_t pid, int sig, int priv)
-{
- int ret;
-
- rcu_read_lock();
- ret = kill_pid_info(sig, __si_special(priv), find_pid(pid));
- rcu_read_unlock();
- return ret;
-}
-
/*
* These functions support sending signals using preallocated sigqueue
* structures. This is needed "because realtime applications cannot
info.si_uid = tsk->uid;
- /* FIXME: find out whether or not this is supposed to be c*time. */
- info.si_utime = cputime_to_jiffies(cputime_add(tsk->utime,
+ info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime,
tsk->signal->utime));
- info.si_stime = cputime_to_jiffies(cputime_add(tsk->stime,
+ info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime,
tsk->signal->stime));
info.si_status = tsk->exit_code & 0x7f;
info.si_uid = tsk->uid;
- /* FIXME: find out whether or not this is supposed to be c*time. */
- info.si_utime = cputime_to_jiffies(tsk->utime);
- info.si_stime = cputime_to_jiffies(tsk->stime);
+ info.si_utime = cputime_to_clock_t(tsk->utime);
+ info.si_stime = cputime_to_clock_t(tsk->stime);
info.si_code = why;
switch (why) {
* is a deadlock situation, and pointless because our tracer
* is dead so don't allow us to stop.
* If SIGKILL was already sent before the caller unlocked
- * ->siglock we must see ->core_waiters != 0. Otherwise it
+ * ->siglock we must see ->core_state != NULL. Otherwise it
* is safe to enter schedule().
*/
- if (unlikely(current->mm->core_waiters) &&
+ if (unlikely(current->mm->core_state) &&
unlikely(current->mm == current->parent->mm))
return 0;
*/
static int sigkill_pending(struct task_struct *tsk)
{
- return ((sigismember(&tsk->pending.signal, SIGKILL) ||
- sigismember(&tsk->signal->shared_pending.signal, SIGKILL)) &&
- !unlikely(sigismember(&tsk->blocked, SIGKILL)));
+ return sigismember(&tsk->pending.signal, SIGKILL) ||
+ sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
}
/*
*/
static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
{
- int killed = 0;
-
if (arch_ptrace_stop_needed(exit_code, info)) {
/*
* The arch code has something special to do before a
spin_unlock_irq(¤t->sighand->siglock);
arch_ptrace_stop(exit_code, info);
spin_lock_irq(¤t->sighand->siglock);
- killed = sigkill_pending(current);
+ if (sigkill_pending(current))
+ return;
}
/*
__set_current_state(TASK_TRACED);
spin_unlock_irq(¤t->sighand->siglock);
read_lock(&tasklist_lock);
- if (!unlikely(killed) && may_ptrace_stop()) {
+ if (may_ptrace_stop()) {
do_notify_parent_cldstop(current, CLD_TRAPPED);
read_unlock(&tasklist_lock);
schedule();
} else {
struct task_struct *t;
- if (unlikely((sig->flags & (SIGNAL_STOP_DEQUEUED | SIGNAL_UNKILLABLE))
- != SIGNAL_STOP_DEQUEUED) ||
+ if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
unlikely(signal_group_exit(sig)))
return 0;
/*
EXPORT_SYMBOL_GPL(dequeue_signal);
EXPORT_SYMBOL(flush_signals);
EXPORT_SYMBOL(force_sig);
-EXPORT_SYMBOL(kill_proc);
EXPORT_SYMBOL(ptrace_notify);
EXPORT_SYMBOL(send_sig);
EXPORT_SYMBOL(send_sig_info);
}
asmlinkage long
-sys_kill(int pid, int sig)
+sys_kill(pid_t pid, int sig)
{
struct siginfo info;
return kill_something_info(sig, &info, pid);
}
-static int do_tkill(int tgid, int pid, int sig)
+static int do_tkill(pid_t tgid, pid_t pid, int sig)
{
int error;
struct siginfo info;
* exists but it's not belonging to the target process anymore. This
* method solves the problem of threads exiting and PIDs getting reused.
*/
-asmlinkage long sys_tgkill(int tgid, int pid, int sig)
+asmlinkage long sys_tgkill(pid_t tgid, pid_t pid, int sig)
{
/* This is only valid for single tasks */
if (pid <= 0 || tgid <= 0)
* Send a signal to only one task, even if it's a CLONE_THREAD task.
*/
asmlinkage long
-sys_tkill(int pid, int sig)
+sys_tkill(pid_t pid, int sig)
{
/* This is only valid for single tasks */
if (pid <= 0)
}
asmlinkage long
-sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo)
+sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo)
{
siginfo_t info;
DECLARE_RWSEM(uts_sem);
-EXPORT_SYMBOL(uts_sem);
-
asmlinkage long sys_newuname(struct new_utsname __user * name)
{
int errno = 0;
goto out;
}
- info = call_usermodehelper_setup(argv[0], argv, envp);
+ info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC);
if (info == NULL) {
argv_free(argv);
goto out;
cond_syscall(sys_get_robust_list);
cond_syscall(compat_sys_get_robust_list);
cond_syscall(sys_epoll_create);
+cond_syscall(sys_epoll_create1);
cond_syscall(sys_epoll_ctl);
cond_syscall(sys_epoll_wait);
cond_syscall(sys_epoll_pwait);
cond_syscall(sys_signalfd);
cond_syscall(sys_signalfd4);
cond_syscall(compat_sys_signalfd);
+cond_syscall(compat_sys_signalfd4);
cond_syscall(sys_timerfd_create);
cond_syscall(sys_timerfd_settime);
cond_syscall(sys_timerfd_gettime);
{
.ctl_name = KERN_PRINTK_RATELIMIT,
.procname = "printk_ratelimit",
- .data = &printk_ratelimit_jiffies,
+ .data = &printk_ratelimit_state.interval,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
{
.ctl_name = KERN_PRINTK_RATELIMIT_BURST,
.procname = "printk_ratelimit_burst",
- .data = &printk_ratelimit_burst,
+ .data = &printk_ratelimit_state.burst,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
sysctl_check_leaf(namespaces, table, &fail);
}
sysctl_check_bin_path(table, &fail);
+ if (table->mode > 0777)
+ set_fail(&fail, table, "bogus .mode");
if (fail) {
set_fail(&fail, table, NULL);
error = -EINVAL;
*/
#define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS)
-static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 };
+static DEFINE_PER_CPU(__u32, taskstats_seqnum);
static int family_registered;
struct kmem_cache *taskstats_cache;
__trace_special(tr, data, 2, regs->ip, 0);
while (i < sample_max_depth) {
- frame.next_fp = 0;
+ frame.next_fp = NULL;
frame.return_address = 0;
if (!copy_stack_frame(fp, &frame))
break;
void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
{
struct timespec uptime, ts;
- s64 ac_etime;
+ u64 ac_etime;
BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN);
/* calculate task elapsed time in timespec */
do_posix_clock_monotonic_gettime(&uptime);
ts = timespec_sub(uptime, tsk->start_time);
- /* rebase elapsed time to usec */
+ /* rebase elapsed time to usec (should never be negative) */
ac_etime = timespec_to_ns(&ts);
do_div(ac_etime, NSEC_PER_USEC);
stats->ac_etime = ac_etime;
{
struct mm_struct *mm;
- /* convert pages-jiffies to Mbyte-usec */
- stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB;
- stats->virtmem = jiffies_to_usecs(p->acct_vm_mem1) * PAGE_SIZE / MB;
+ /* convert pages-usec to Mbyte-usec */
+ stats->coremem = p->acct_rss_mem1 * PAGE_SIZE / MB;
+ stats->virtmem = p->acct_vm_mem1 * PAGE_SIZE / MB;
mm = get_task_mm(p);
if (mm) {
/* adjust to KB unit */
void acct_update_integrals(struct task_struct *tsk)
{
if (likely(tsk->mm)) {
- long delta = cputime_to_jiffies(
- cputime_sub(tsk->stime, tsk->acct_stimexpd));
+ cputime_t time, dtime;
+ struct timeval value;
+ u64 delta;
+
+ time = tsk->stime + tsk->utime;
+ dtime = cputime_sub(time, tsk->acct_timexpd);
+ jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
+ delta = value.tv_sec;
+ delta = delta * USEC_PER_SEC + value.tv_usec;
if (delta == 0)
return;
- tsk->acct_stimexpd = tsk->stime;
+ tsk->acct_timexpd = time;
tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
}
*/
void acct_clear_integrals(struct task_struct *tsk)
{
- tsk->acct_stimexpd = 0;
+ tsk->acct_timexpd = 0;
tsk->acct_rss_mem1 = 0;
tsk->acct_vm_mem1 = 0;
}
}
static void insert_work(struct cpu_workqueue_struct *cwq,
- struct work_struct *work, int tail)
+ struct work_struct *work, struct list_head *head)
{
set_wq_data(work, cwq);
/*
* result of list_add() below, see try_to_grab_pending().
*/
smp_wmb();
- if (tail)
- list_add_tail(&work->entry, &cwq->worklist);
- else
- list_add(&work->entry, &cwq->worklist);
+ list_add_tail(&work->entry, head);
wake_up(&cwq->more_work);
}
unsigned long flags;
spin_lock_irqsave(&cwq->lock, flags);
- insert_work(cwq, work, 1);
+ insert_work(cwq, work, &cwq->worklist);
spin_unlock_irqrestore(&cwq->lock, flags);
}
*/
int queue_work(struct workqueue_struct *wq, struct work_struct *work)
{
- int ret = 0;
+ int ret;
+
+ ret = queue_work_on(get_cpu(), wq, work);
+ put_cpu();
- if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
- BUG_ON(!list_empty(&work->entry));
- __queue_work(wq_per_cpu(wq, get_cpu()), work);
- put_cpu();
- ret = 1;
- }
return ret;
}
EXPORT_SYMBOL_GPL(queue_work);
}
static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
- struct wq_barrier *barr, int tail)
+ struct wq_barrier *barr, struct list_head *head)
{
INIT_WORK(&barr->work, wq_barrier_func);
__set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));
init_completion(&barr->done);
- insert_work(cwq, &barr->work, tail);
+ insert_work(cwq, &barr->work, head);
}
static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
active = 0;
spin_lock_irq(&cwq->lock);
if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) {
- insert_wq_barrier(cwq, &barr, 1);
+ insert_wq_barrier(cwq, &barr, &cwq->worklist);
active = 1;
}
spin_unlock_irq(&cwq->lock);
}
EXPORT_SYMBOL_GPL(flush_workqueue);
+/**
+ * flush_work - block until a work_struct's callback has terminated
+ * @work: the work which is to be flushed
+ *
+ * Returns false if @work has already terminated.
+ *
+ * It is expected that, prior to calling flush_work(), the caller has
+ * arranged for the work to not be requeued, otherwise it doesn't make
+ * sense to use this function.
+ */
+int flush_work(struct work_struct *work)
+{
+ struct cpu_workqueue_struct *cwq;
+ struct list_head *prev;
+ struct wq_barrier barr;
+
+ might_sleep();
+ cwq = get_wq_data(work);
+ if (!cwq)
+ return 0;
+
+ lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+ lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
+
+ prev = NULL;
+ spin_lock_irq(&cwq->lock);
+ if (!list_empty(&work->entry)) {
+ /*
+ * See the comment near try_to_grab_pending()->smp_rmb().
+ * If it was re-queued under us we are not going to wait.
+ */
+ smp_rmb();
+ if (unlikely(cwq != get_wq_data(work)))
+ goto out;
+ prev = &work->entry;
+ } else {
+ if (cwq->current_work != work)
+ goto out;
+ prev = &cwq->worklist;
+ }
+ insert_wq_barrier(cwq, &barr, prev->next);
+out:
+ spin_unlock_irq(&cwq->lock);
+ if (!prev)
+ return 0;
+
+ wait_for_completion(&barr.done);
+ return 1;
+}
+EXPORT_SYMBOL_GPL(flush_work);
+
/*
* Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
* so this work can't be re-armed in any way.
spin_lock_irq(&cwq->lock);
if (unlikely(cwq->current_work == work)) {
- insert_wq_barrier(cwq, &barr, 0);
+ insert_wq_barrier(cwq, &barr, cwq->worklist.next);
running = 1;
}
spin_unlock_irq(&cwq->lock);
struct work_struct *work = per_cpu_ptr(works, cpu);
INIT_WORK(work, func);
- set_bit(WORK_STRUCT_PENDING, work_data_bits(work));
- __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), work);
+ schedule_work_on(cpu, work);
}
- flush_workqueue(keventd_wq);
+ for_each_online_cpu(cpu)
+ flush_work(per_cpu_ptr(works, cpu));
put_online_cpus();
free_percpu(works);
return 0;
err = create_workqueue_thread(cwq, singlethread_cpu);
start_workqueue_thread(cwq, -1);
} else {
- get_online_cpus();
+ cpu_maps_update_begin();
spin_lock(&workqueue_lock);
list_add(&wq->list, &workqueues);
spin_unlock(&workqueue_lock);
err = create_workqueue_thread(cwq, cpu);
start_workqueue_thread(cwq, cpu);
}
- put_online_cpus();
+ cpu_maps_update_done();
}
if (err) {
static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
{
/*
- * Our caller is either destroy_workqueue() or CPU_DEAD,
- * get_online_cpus() protects cwq->thread.
+ * Our caller is either destroy_workqueue() or CPU_POST_DEAD,
+ * cpu_add_remove_lock protects cwq->thread.
*/
if (cwq->thread == NULL)
return;
flush_cpu_workqueue(cwq);
/*
- * If the caller is CPU_DEAD and cwq->worklist was not empty,
+ * If the caller is CPU_POST_DEAD and cwq->worklist was not empty,
* a concurrent flush_workqueue() can insert a barrier after us.
* However, in that case run_workqueue() won't return and check
* kthread_should_stop() until it flushes all work_struct's.
const cpumask_t *cpu_map = wq_cpu_map(wq);
int cpu;
- get_online_cpus();
+ cpu_maps_update_begin();
spin_lock(&workqueue_lock);
list_del(&wq->list);
spin_unlock(&workqueue_lock);
for_each_cpu_mask_nr(cpu, *cpu_map)
cleanup_workqueue_thread(per_cpu_ptr(wq->cpu_wq, cpu));
- put_online_cpus();
+ cpu_maps_update_done();
free_percpu(wq->cpu_wq);
kfree(wq);
unsigned int cpu = (unsigned long)hcpu;
struct cpu_workqueue_struct *cwq;
struct workqueue_struct *wq;
+ int ret = NOTIFY_OK;
action &= ~CPU_TASKS_FROZEN;
case CPU_UP_PREPARE:
cpu_set(cpu, cpu_populated_map);
}
-
+undo:
list_for_each_entry(wq, &workqueues, list) {
cwq = per_cpu_ptr(wq->cpu_wq, cpu);
break;
printk(KERN_ERR "workqueue [%s] for %i failed\n",
wq->name, cpu);
- return NOTIFY_BAD;
+ action = CPU_UP_CANCELED;
+ ret = NOTIFY_BAD;
+ goto undo;
case CPU_ONLINE:
start_workqueue_thread(cwq, cpu);
case CPU_UP_CANCELED:
start_workqueue_thread(cwq, -1);
- case CPU_DEAD:
+ case CPU_POST_DEAD:
cleanup_workqueue_thread(cwq);
break;
}
switch (action) {
case CPU_UP_CANCELED:
- case CPU_DEAD:
+ case CPU_POST_DEAD:
cpu_clear(cpu, cpu_populated_map);
}
- return NOTIFY_OK;
+ return ret;
}
void __init init_workqueues(void)
/**
* memparse - parse a string with mem suffixes into a number
* @ptr: Where parse begins
- * @retptr: (output) Pointer to next char after parse completes
+ * @retptr: (output) Optional pointer to next char after parse completes
*
* Parses a string into a number. The number stored at @ptr is
* potentially suffixed with %K (for kilobytes, or 1024 bytes),
* megabyte, or one gigabyte, respectively.
*/
-unsigned long long memparse (char *ptr, char **retptr)
+unsigned long long memparse(char *ptr, char **retptr)
{
- unsigned long long ret = simple_strtoull (ptr, retptr, 0);
+ char *endptr; /* local pointer to end of parsed string */
- switch (**retptr) {
+ unsigned long long ret = simple_strtoull(ptr, &endptr, 0);
+
+ switch (*endptr) {
case 'G':
case 'g':
ret <<= 10;
case 'K':
case 'k':
ret <<= 10;
- (*retptr)++;
+ endptr++;
default:
break;
}
+
+ if (retptr)
+ *retptr = endptr;
+
return ret;
}
* Modified by George Anzinger to reuse immediately and to use
* find bit instructions. Also removed _irq on spinlocks.
*
+ * Modified by Nadia Derbey to make it RCU safe.
+ *
* Small id to pointer translation service.
*
* It uses a radix tree like structure as a sparse array indexed
static struct kmem_cache *idr_layer_cache;
-static struct idr_layer *alloc_layer(struct idr *idp)
+static struct idr_layer *get_from_free_list(struct idr *idp)
{
struct idr_layer *p;
unsigned long flags;
return(p);
}
+static void idr_layer_rcu_free(struct rcu_head *head)
+{
+ struct idr_layer *layer;
+
+ layer = container_of(head, struct idr_layer, rcu_head);
+ kmem_cache_free(idr_layer_cache, layer);
+}
+
+static inline void free_layer(struct idr_layer *p)
+{
+ call_rcu(&p->rcu_head, idr_layer_rcu_free);
+}
+
/* only called when idp->lock is held */
-static void __free_layer(struct idr *idp, struct idr_layer *p)
+static void __move_to_free_list(struct idr *idp, struct idr_layer *p)
{
p->ary[0] = idp->id_free;
idp->id_free = p;
idp->id_free_cnt++;
}
-static void free_layer(struct idr *idp, struct idr_layer *p)
+static void move_to_free_list(struct idr *idp, struct idr_layer *p)
{
unsigned long flags;
* Depends on the return element being zeroed.
*/
spin_lock_irqsave(&idp->lock, flags);
- __free_layer(idp, p);
+ __move_to_free_list(idp, p);
spin_unlock_irqrestore(&idp->lock, flags);
}
* @gfp_mask: memory allocation flags
*
* This function should be called prior to locking and calling the
- * following function. It preallocates enough memory to satisfy
+ * idr_get_new* functions. It preallocates enough memory to satisfy
* the worst possible allocation.
*
* If the system is REALLY out of memory this function returns 0,
new = kmem_cache_alloc(idr_layer_cache, gfp_mask);
if (new == NULL)
return (0);
- free_layer(idp, new);
+ move_to_free_list(idp, new);
}
return 1;
}
/* if already at the top layer, we need to grow */
if (!(p = pa[l])) {
*starting_id = id;
- return -2;
+ return IDR_NEED_TO_GROW;
}
/* If we need to go up one layer, continue the
id = ((id >> sh) ^ n ^ m) << sh;
}
if ((id >= MAX_ID_BIT) || (id < 0))
- return -3;
+ return IDR_NOMORE_SPACE;
if (l == 0)
break;
/*
* Create the layer below if it is missing.
*/
if (!p->ary[m]) {
- if (!(new = alloc_layer(idp)))
+ new = get_from_free_list(idp);
+ if (!new)
return -1;
- p->ary[m] = new;
+ rcu_assign_pointer(p->ary[m], new);
p->count++;
}
pa[l--] = p;
p = idp->top;
layers = idp->layers;
if (unlikely(!p)) {
- if (!(p = alloc_layer(idp)))
+ if (!(p = get_from_free_list(idp)))
return -1;
layers = 1;
}
layers++;
if (!p->count)
continue;
- if (!(new = alloc_layer(idp))) {
+ if (!(new = get_from_free_list(idp))) {
/*
* The allocation failed. If we built part of
* the structure tear it down.
p = p->ary[0];
new->ary[0] = NULL;
new->bitmap = new->count = 0;
- __free_layer(idp, new);
+ __move_to_free_list(idp, new);
}
spin_unlock_irqrestore(&idp->lock, flags);
return -1;
__set_bit(0, &new->bitmap);
p = new;
}
- idp->top = p;
+ rcu_assign_pointer(idp->top, p);
idp->layers = layers;
v = sub_alloc(idp, &id, pa);
- if (v == -2)
+ if (v == IDR_NEED_TO_GROW)
goto build_up;
return(v);
}
* Successfully found an empty slot. Install the user
* pointer and mark the slot full.
*/
- pa[0]->ary[id & IDR_MASK] = (struct idr_layer *)ptr;
+ rcu_assign_pointer(pa[0]->ary[id & IDR_MASK],
+ (struct idr_layer *)ptr);
pa[0]->count++;
idr_mark_full(pa, id);
}
* This is a cheap hack until the IDR code can be fixed to
* return proper error values.
*/
- if (rv < 0) {
- if (rv == -1)
- return -EAGAIN;
- else /* Will be -3 */
- return -ENOSPC;
- }
+ if (rv < 0)
+ return _idr_rc_to_errno(rv);
*id = rv;
return 0;
}
* This is a cheap hack until the IDR code can be fixed to
* return proper error values.
*/
- if (rv < 0) {
- if (rv == -1)
- return -EAGAIN;
- else /* Will be -3 */
- return -ENOSPC;
- }
+ if (rv < 0)
+ return _idr_rc_to_errno(rv);
*id = rv;
return 0;
}
static void idr_remove_warning(int id)
{
- printk("idr_remove called for id=%d which is not allocated.\n", id);
+ printk(KERN_WARNING
+ "idr_remove called for id=%d which is not allocated.\n", id);
dump_stack();
}
struct idr_layer *p = idp->top;
struct idr_layer **pa[MAX_LEVEL];
struct idr_layer ***paa = &pa[0];
+ struct idr_layer *to_free;
int n;
*paa = NULL;
n = id & IDR_MASK;
if (likely(p != NULL && test_bit(n, &p->bitmap))){
__clear_bit(n, &p->bitmap);
- p->ary[n] = NULL;
+ rcu_assign_pointer(p->ary[n], NULL);
+ to_free = NULL;
while(*paa && ! --((**paa)->count)){
- free_layer(idp, **paa);
+ if (to_free)
+ free_layer(to_free);
+ to_free = **paa;
**paa-- = NULL;
}
if (!*paa)
idp->layers = 0;
+ if (to_free)
+ free_layer(to_free);
} else
idr_remove_warning(id);
}
void idr_remove(struct idr *idp, int id)
{
struct idr_layer *p;
+ struct idr_layer *to_free;
/* Mask off upper bits we don't use for the search. */
id &= MAX_ID_MASK;
sub_remove(idp, (idp->layers - 1) * IDR_BITS, id);
if (idp->top && idp->top->count == 1 && (idp->layers > 1) &&
- idp->top->ary[0]) { // We can drop a layer
-
+ idp->top->ary[0]) {
+ /*
+ * Single child at leftmost slot: we can shrink the tree.
+ * This level is not needed anymore since when layers are
+ * inserted, they are inserted at the top of the existing
+ * tree.
+ */
+ to_free = idp->top;
p = idp->top->ary[0];
- idp->top->bitmap = idp->top->count = 0;
- free_layer(idp, idp->top);
- idp->top = p;
+ rcu_assign_pointer(idp->top, p);
--idp->layers;
+ to_free->bitmap = to_free->count = 0;
+ free_layer(to_free);
}
while (idp->id_free_cnt >= IDR_FREE_MAX) {
- p = alloc_layer(idp);
+ p = get_from_free_list(idp);
+ /*
+ * Note: we don't call the rcu callback here, since the only
+ * layers that fall into the freelist are those that have been
+ * preallocated.
+ */
kmem_cache_free(idr_layer_cache, p);
}
return;
id += 1 << n;
while (n < fls(id)) {
- if (p) {
- memset(p, 0, sizeof *p);
- free_layer(idp, p);
- }
+ if (p)
+ free_layer(p);
n += IDR_BITS;
p = *--paa;
}
}
- idp->top = NULL;
+ rcu_assign_pointer(idp->top, NULL);
idp->layers = 0;
}
EXPORT_SYMBOL(idr_remove_all);
void idr_destroy(struct idr *idp)
{
while (idp->id_free_cnt) {
- struct idr_layer *p = alloc_layer(idp);
+ struct idr_layer *p = get_from_free_list(idp);
kmem_cache_free(idr_layer_cache, p);
}
}
* return indicates that @id is not valid or you passed %NULL in
* idr_get_new().
*
- * The caller must serialize idr_find() vs idr_get_new() and idr_remove().
+ * This function can be called under rcu_read_lock(), given that the leaf
+ * pointers lifetimes are correctly managed.
*/
void *idr_find(struct idr *idp, int id)
{
struct idr_layer *p;
n = idp->layers * IDR_BITS;
- p = idp->top;
+ p = rcu_dereference(idp->top);
/* Mask off upper bits we don't use for the search. */
id &= MAX_ID_MASK;
while (n > 0 && p) {
n -= IDR_BITS;
- p = p->ary[(id >> n) & IDR_MASK];
+ p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
}
return((void *)p);
}
struct idr_layer **paa = &pa[0];
n = idp->layers * IDR_BITS;
- p = idp->top;
+ p = rcu_dereference(idp->top);
max = 1 << n;
id = 0;
while (n > 0 && p) {
n -= IDR_BITS;
*paa++ = p;
- p = p->ary[(id >> n) & IDR_MASK];
+ p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
}
if (p) {
* A -ENOENT return indicates that @id was not found.
* A -EINVAL return indicates that @id was not within valid constraints.
*
- * The caller must serialize vs idr_find(), idr_get_new(), and idr_remove().
+ * The caller must serialize with writers.
*/
void *idr_replace(struct idr *idp, void *ptr, int id)
{
return ERR_PTR(-ENOENT);
old_p = p->ary[n];
- p->ary[n] = ptr;
+ rcu_assign_pointer(p->ary[n], ptr);
return old_p;
}
restart:
/* get vacant slot */
t = idr_get_empty_slot(&ida->idr, idr_id, pa);
- if (t < 0) {
- if (t == -1)
- return -EAGAIN;
- else /* will be -3 */
- return -ENOSPC;
- }
+ if (t < 0)
+ return _idr_rc_to_errno(t);
if (t * IDA_BITMAP_BITS >= MAX_ID_BIT)
return -ENOSPC;
return -EAGAIN;
memset(bitmap, 0, sizeof(struct ida_bitmap));
- pa[0]->ary[idr_id & IDR_MASK] = (void *)bitmap;
+ rcu_assign_pointer(pa[0]->ary[idr_id & IDR_MASK],
+ (void *)bitmap);
pa[0]->count++;
}
* allocation.
*/
if (ida->idr.id_free_cnt || ida->free_bitmap) {
- struct idr_layer *p = alloc_layer(&ida->idr);
+ struct idr_layer *p = get_from_free_list(&ida->idr);
if (p)
kmem_cache_free(idr_layer_cache, p);
}
#define NEEDBITS(n) {while(k<(n)){b|=((ulg)NEXTBYTE())<<k;k+=8;}}
#define DUMPBITS(n) {b>>=(n);k-=(n);}
+#ifndef NO_INFLATE_MALLOC
+/* A trivial malloc implementation, adapted from
+ * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
+ */
+
+static unsigned long malloc_ptr;
+static int malloc_count;
+
+static void *malloc(int size)
+{
+ void *p;
+
+ if (size < 0)
+ error("Malloc error");
+ if (!malloc_ptr)
+ malloc_ptr = free_mem_ptr;
+
+ malloc_ptr = (malloc_ptr + 3) & ~3; /* Align */
+
+ p = (void *)malloc_ptr;
+ malloc_ptr += size;
+
+ if (free_mem_end_ptr && malloc_ptr >= free_mem_end_ptr)
+ error("Out of memory");
+
+ malloc_count++;
+ return p;
+}
+
+static void free(void *where)
+{
+ malloc_count--;
+ if (!malloc_count)
+ malloc_ptr = free_mem_ptr;
+}
+#else
+#define malloc(a) kmalloc(a, GFP_KERNEL)
+#define free(a) kfree(a)
+#endif
/*
Huffman code decoding is performed using a multi-level table lookup.
int e; /* last block flag */
int r; /* result code */
unsigned h; /* maximum struct huft's malloc'ed */
- void *ptr;
/* initialize window, bit buffer */
wp = 0;
h = 0;
do {
hufts = 0;
- gzip_mark(&ptr);
- if ((r = inflate_block(&e)) != 0) {
- gzip_release(&ptr);
- return r;
- }
- gzip_release(&ptr);
+#ifdef ARCH_HAS_DECOMP_WDOG
+ arch_decomp_wdog();
+#endif
+ r = inflate_block(&e);
+ if (r)
+ return r;
if (hufts > h)
h = hufts;
} while (!e);
return -ENOENT;
if (!kobj->name || !kobj->name[0]) {
- pr_debug("kobject: (%p): attempted to be registered with empty "
+ WARN(1, "kobject: (%p): attempted to be registered with empty "
"name!\n", kobj);
- WARN_ON(1);
return -EINVAL;
}
void kobject_put(struct kobject *kobj)
{
if (kobj) {
- if (!kobj->state_initialized) {
- printk(KERN_WARNING "kobject: '%s' (%p): is not "
+ if (!kobj->state_initialized)
+ WARN(1, KERN_WARNING "kobject: '%s' (%p): is not "
"initialized, yet kobject_put() is being "
"called.\n", kobject_name(kobj), kobj);
- WARN_ON(1);
- }
kref_put(&kobj->kref, kobject_release);
}
}
struct list_head *prev,
struct list_head *next)
{
- if (unlikely(next->prev != prev)) {
- printk(KERN_ERR "list_add corruption. next->prev should be "
- "prev (%p), but was %p. (next=%p).\n",
- prev, next->prev, next);
- BUG();
- }
- if (unlikely(prev->next != next)) {
- printk(KERN_ERR "list_add corruption. prev->next should be "
- "next (%p), but was %p. (prev=%p).\n",
- next, prev->next, prev);
- BUG();
- }
+ WARN(next->prev != prev,
+ "list_add corruption. next->prev should be "
+ "prev (%p), but was %p. (next=%p).\n",
+ prev, next->prev, next);
+ WARN(prev->next != next,
+ "list_add corruption. prev->next should be "
+ "next (%p), but was %p. (prev=%p).\n",
+ next, prev->next, prev);
next->prev = new;
new->next = next;
new->prev = prev;
}
EXPORT_SYMBOL(__list_add);
-/**
- * list_add - add a new entry
- * @new: new entry to be added
- * @head: list head to add it after
- *
- * Insert a new entry after the specified head.
- * This is good for implementing stacks.
- */
-void list_add(struct list_head *new, struct list_head *head)
-{
- __list_add(new, head, head->next);
-}
-EXPORT_SYMBOL(list_add);
-
/**
* list_del - deletes entry from list.
* @entry: the element to delete from the list.
*/
void list_del(struct list_head *entry)
{
- if (unlikely(entry->prev->next != entry)) {
- printk(KERN_ERR "list_del corruption. prev->next should be %p, "
- "but was %p\n", entry, entry->prev->next);
- BUG();
- }
- if (unlikely(entry->next->prev != entry)) {
- printk(KERN_ERR "list_del corruption. next->prev should be %p, "
- "but was %p\n", entry, entry->next->prev);
- BUG();
- }
+ WARN(entry->prev->next != entry,
+ "list_del corruption. prev->next should be %p, "
+ "but was %p\n", entry, entry->prev->next);
+ WARN(entry->next->prev != entry,
+ "list_del corruption. next->prev should be %p, "
+ "but was %p\n", entry, entry->next->prev);
__list_del(entry->prev, entry->next);
entry->next = LIST_POISON1;
entry->prev = LIST_POISON2;
t += 31 + *ip++;
}
m_pos = op - 1;
- m_pos -= le16_to_cpu(get_unaligned(
- (const unsigned short *)ip)) >> 2;
+ m_pos -= get_unaligned_le16(ip) >> 2;
ip += 2;
} else if (t >= 16) {
m_pos = op;
}
t += 7 + *ip++;
}
- m_pos -= le16_to_cpu(get_unaligned(
- (const unsigned short *)ip)) >> 2;
+ m_pos -= get_unaligned_le16(ip) >> 2;
ip += 2;
if (m_pos == op)
goto eof_found;
*
* Isolated from kernel/printk.c by Dave Young <hidave.darkstar@gmail.com>
*
+ * 2008-05-01 rewrite the function and use a ratelimit_state data struct as
+ * parameter. Now every user can use their own standalone ratelimit_state.
+ *
* This file is released under the GPLv2.
*
*/
#include <linux/jiffies.h>
#include <linux/module.h>
+static DEFINE_SPINLOCK(ratelimit_lock);
+static unsigned long flags;
+
/*
* __ratelimit - rate limiting
- * @ratelimit_jiffies: minimum time in jiffies between two callbacks
- * @ratelimit_burst: number of callbacks we do before ratelimiting
+ * @rs: ratelimit_state data
*
- * This enforces a rate limit: not more than @ratelimit_burst callbacks
- * in every ratelimit_jiffies
+ * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks
+ * in every @rs->ratelimit_jiffies
*/
-int __ratelimit(int ratelimit_jiffies, int ratelimit_burst)
+int __ratelimit(struct ratelimit_state *rs)
{
- static DEFINE_SPINLOCK(ratelimit_lock);
- static unsigned toks = 10 * 5 * HZ;
- static unsigned long last_msg;
- static int missed;
- unsigned long flags;
- unsigned long now = jiffies;
+ if (!rs->interval)
+ return 1;
spin_lock_irqsave(&ratelimit_lock, flags);
- toks += now - last_msg;
- last_msg = now;
- if (toks > (ratelimit_burst * ratelimit_jiffies))
- toks = ratelimit_burst * ratelimit_jiffies;
- if (toks >= ratelimit_jiffies) {
- int lost = missed;
+ if (!rs->begin)
+ rs->begin = jiffies;
- missed = 0;
- toks -= ratelimit_jiffies;
- spin_unlock_irqrestore(&ratelimit_lock, flags);
- if (lost)
- printk(KERN_WARNING "%s: %d messages suppressed\n",
- __func__, lost);
- return 1;
+ if (time_is_before_jiffies(rs->begin + rs->interval)) {
+ if (rs->missed)
+ printk(KERN_WARNING "%s: %d callbacks suppressed\n",
+ __func__, rs->missed);
+ rs->begin = 0;
+ rs->printed = 0;
+ rs->missed = 0;
}
- missed++;
+ if (rs->burst && rs->burst > rs->printed)
+ goto print;
+
+ rs->missed++;
spin_unlock_irqrestore(&ratelimit_lock, flags);
return 0;
+
+print:
+ rs->printed++;
+ spin_unlock_irqrestore(&ratelimit_lock, flags);
+ return 1;
}
EXPORT_SYMBOL(__ratelimit);
{
struct address_space *mapping = page->mapping;
- mem_cgroup_uncharge_page(page);
+ mem_cgroup_uncharge_cache_page(page);
radix_tree_delete(&mapping->page_tree, page->index);
page->mapping = NULL;
mapping->nrpages--;
mapping->nrpages++;
__inc_zone_page_state(page, NR_FILE_PAGES);
} else
- mem_cgroup_uncharge_page(page);
+ mem_cgroup_uncharge_cache_page(page);
write_unlock_irq(&mapping->tree_lock);
radix_tree_preload_end();
} else
- mem_cgroup_uncharge_page(page);
+ mem_cgroup_uncharge_cache_page(page);
out:
return error;
}
* Otherwise return zero.
*
* The @gfp_mask argument specifies whether I/O may be performed to release
- * this page (__GFP_IO), and whether the call may block (__GFP_WAIT).
+ * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS).
*
- * NOTE: @gfp_mask may go away, and this function may become non-blocking.
*/
int try_to_release_page(struct page *page, gfp_t gfp_mask)
{
}
}
+static unsigned int cpuset_mems_nr(unsigned int *array)
+{
+ int node;
+ unsigned int nr = 0;
+
+ for_each_node_mask(node, cpuset_current_mems_allowed)
+ nr += array[node];
+
+ return nr;
+}
+
#ifdef CONFIG_SYSCTL
#ifdef CONFIG_HIGHMEM
static void try_to_free_low(struct hstate *h, unsigned long count)
}
__setup("default_hugepagesz=", hugetlb_default_setup);
-static unsigned int cpuset_mems_nr(unsigned int *array)
-{
- int node;
- unsigned int nr = 0;
-
- for_each_node_mask(node, cpuset_current_mems_allowed)
- nr += array[node];
-
- return nr;
-}
-
int hugetlb_sysctl_handler(struct ctl_table *table, int write,
struct file *file, void __user *buffer,
size_t *length, loff_t *ppos)
#include <asm/uaccess.h>
-struct cgroup_subsys mem_cgroup_subsys;
-static const int MEM_CGROUP_RECLAIM_RETRIES = 5;
-static struct kmem_cache *page_cgroup_cache;
+struct cgroup_subsys mem_cgroup_subsys __read_mostly;
+static struct kmem_cache *page_cgroup_cache __read_mostly;
+#define MEM_CGROUP_RECLAIM_RETRIES 5
/*
* Statistics for memory cgroup.
struct list_head lru; /* per cgroup LRU list */
struct page *page;
struct mem_cgroup *mem_cgroup;
- int ref_cnt; /* cached, mapped, migrating */
int flags;
};
#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */
enum charge_type {
MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
MEM_CGROUP_CHARGE_TYPE_MAPPED,
+ MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */
};
/*
MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1;
mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false);
- list_del_init(&pc->lru);
+ list_del(&pc->lru);
}
static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
struct mem_cgroup_per_zone *mz;
unsigned long flags;
+ if (mem_cgroup_subsys.disabled)
+ return;
+
/*
* We cannot lock_page_cgroup while holding zone's lru_lock,
* because other holders of lock_page_cgroup can be interrupted
* < 0 if the cgroup is over its limit
*/
static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask, enum charge_type ctype)
+ gfp_t gfp_mask, enum charge_type ctype,
+ struct mem_cgroup *memcg)
{
struct mem_cgroup *mem;
struct page_cgroup *pc;
unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
struct mem_cgroup_per_zone *mz;
- if (mem_cgroup_subsys.disabled)
- return 0;
-
- /*
- * Should page_cgroup's go to their own slab?
- * One could optimize the performance of the charging routine
- * by saving a bit in the page_flags and using it as a lock
- * to see if the cgroup page already has a page_cgroup associated
- * with it
- */
-retry:
- lock_page_cgroup(page);
- pc = page_get_page_cgroup(page);
- /*
- * The page_cgroup exists and
- * the page has already been accounted.
- */
- if (pc) {
- VM_BUG_ON(pc->page != page);
- VM_BUG_ON(pc->ref_cnt <= 0);
-
- pc->ref_cnt++;
- unlock_page_cgroup(page);
- goto done;
- }
- unlock_page_cgroup(page);
-
- pc = kmem_cache_zalloc(page_cgroup_cache, gfp_mask);
- if (pc == NULL)
+ pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask);
+ if (unlikely(pc == NULL))
goto err;
/*
* thread group leader migrates. It's possible that mm is not
* set, if so charge the init_mm (happens for pagecache usage).
*/
- if (!mm)
- mm = &init_mm;
-
- rcu_read_lock();
- mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
- /*
- * For every charge from the cgroup, increment reference count
- */
- css_get(&mem->css);
- rcu_read_unlock();
+ if (likely(!memcg)) {
+ rcu_read_lock();
+ mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
+ /*
+ * For every charge from the cgroup, increment reference count
+ */
+ css_get(&mem->css);
+ rcu_read_unlock();
+ } else {
+ mem = memcg;
+ css_get(&memcg->css);
+ }
while (res_counter_charge(&mem->res, PAGE_SIZE)) {
if (!(gfp_mask & __GFP_WAIT))
}
}
- pc->ref_cnt = 1;
pc->mem_cgroup = mem;
pc->page = page;
- pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
+ /*
+ * If a page is accounted as a page cache, insert to inactive list.
+ * If anon, insert to active list.
+ */
if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE)
pc->flags = PAGE_CGROUP_FLAG_CACHE;
+ else
+ pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
lock_page_cgroup(page);
- if (page_get_page_cgroup(page)) {
+ if (unlikely(page_get_page_cgroup(page))) {
unlock_page_cgroup(page);
- /*
- * Another charge has been added to this page already.
- * We take lock_page_cgroup(page) again and read
- * page->cgroup, increment refcnt.... just retry is OK.
- */
res_counter_uncharge(&mem->res, PAGE_SIZE);
css_put(&mem->css);
kmem_cache_free(page_cgroup_cache, pc);
- goto retry;
+ goto done;
}
page_assign_page_cgroup(page, pc);
int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
{
+ if (mem_cgroup_subsys.disabled)
+ return 0;
+
+ /*
+ * If already mapped, we don't have to account.
+ * If page cache, page->mapping has address_space.
+ * But page->mapping may have out-of-use anon_vma pointer,
+ * detecit it by PageAnon() check. newly-mapped-anon's page->mapping
+ * is NULL.
+ */
+ if (page_mapped(page) || (page->mapping && !PageAnon(page)))
+ return 0;
+ if (unlikely(!mm))
+ mm = &init_mm;
return mem_cgroup_charge_common(page, mm, gfp_mask,
- MEM_CGROUP_CHARGE_TYPE_MAPPED);
+ MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
}
int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask)
{
- if (!mm)
+ if (mem_cgroup_subsys.disabled)
+ return 0;
+
+ /*
+ * Corner case handling. This is called from add_to_page_cache()
+ * in usual. But some FS (shmem) precharges this page before calling it
+ * and call add_to_page_cache() with GFP_NOWAIT.
+ *
+ * For GFP_NOWAIT case, the page may be pre-charged before calling
+ * add_to_page_cache(). (See shmem.c) check it here and avoid to call
+ * charge twice. (It works but has to pay a bit larger cost.)
+ */
+ if (!(gfp_mask & __GFP_WAIT)) {
+ struct page_cgroup *pc;
+
+ lock_page_cgroup(page);
+ pc = page_get_page_cgroup(page);
+ if (pc) {
+ VM_BUG_ON(pc->page != page);
+ VM_BUG_ON(!pc->mem_cgroup);
+ unlock_page_cgroup(page);
+ return 0;
+ }
+ unlock_page_cgroup(page);
+ }
+
+ if (unlikely(!mm))
mm = &init_mm;
+
return mem_cgroup_charge_common(page, mm, gfp_mask,
- MEM_CGROUP_CHARGE_TYPE_CACHE);
+ MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
}
/*
- * Uncharging is always a welcome operation, we never complain, simply
- * uncharge.
+ * uncharge if !page_mapped(page)
*/
-void mem_cgroup_uncharge_page(struct page *page)
+static void
+__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
{
struct page_cgroup *pc;
struct mem_cgroup *mem;
*/
lock_page_cgroup(page);
pc = page_get_page_cgroup(page);
- if (!pc)
+ if (unlikely(!pc))
goto unlock;
VM_BUG_ON(pc->page != page);
- VM_BUG_ON(pc->ref_cnt <= 0);
- if (--(pc->ref_cnt) == 0) {
- mz = page_cgroup_zoneinfo(pc);
- spin_lock_irqsave(&mz->lru_lock, flags);
- __mem_cgroup_remove_list(mz, pc);
- spin_unlock_irqrestore(&mz->lru_lock, flags);
+ if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
+ && ((pc->flags & PAGE_CGROUP_FLAG_CACHE)
+ || page_mapped(page)))
+ goto unlock;
- page_assign_page_cgroup(page, NULL);
- unlock_page_cgroup(page);
+ mz = page_cgroup_zoneinfo(pc);
+ spin_lock_irqsave(&mz->lru_lock, flags);
+ __mem_cgroup_remove_list(mz, pc);
+ spin_unlock_irqrestore(&mz->lru_lock, flags);
- mem = pc->mem_cgroup;
- res_counter_uncharge(&mem->res, PAGE_SIZE);
- css_put(&mem->css);
+ page_assign_page_cgroup(page, NULL);
+ unlock_page_cgroup(page);
- kmem_cache_free(page_cgroup_cache, pc);
- return;
- }
+ mem = pc->mem_cgroup;
+ res_counter_uncharge(&mem->res, PAGE_SIZE);
+ css_put(&mem->css);
+ kmem_cache_free(page_cgroup_cache, pc);
+ return;
unlock:
unlock_page_cgroup(page);
}
+void mem_cgroup_uncharge_page(struct page *page)
+{
+ __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
+}
+
+void mem_cgroup_uncharge_cache_page(struct page *page)
+{
+ VM_BUG_ON(page_mapped(page));
+ __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
+}
+
/*
- * Returns non-zero if a page (under migration) has valid page_cgroup member.
- * Refcnt of page_cgroup is incremented.
+ * Before starting migration, account against new page.
*/
-int mem_cgroup_prepare_migration(struct page *page)
+int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
{
struct page_cgroup *pc;
+ struct mem_cgroup *mem = NULL;
+ enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
+ int ret = 0;
if (mem_cgroup_subsys.disabled)
return 0;
lock_page_cgroup(page);
pc = page_get_page_cgroup(page);
- if (pc)
- pc->ref_cnt++;
+ if (pc) {
+ mem = pc->mem_cgroup;
+ css_get(&mem->css);
+ if (pc->flags & PAGE_CGROUP_FLAG_CACHE)
+ ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
+ }
unlock_page_cgroup(page);
- return pc != NULL;
+ if (mem) {
+ ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL,
+ ctype, mem);
+ css_put(&mem->css);
+ }
+ return ret;
}
-void mem_cgroup_end_migration(struct page *page)
+/* remove redundant charge if migration failed*/
+void mem_cgroup_end_migration(struct page *newpage)
{
- mem_cgroup_uncharge_page(page);
+ /*
+ * At success, page->mapping is not NULL.
+ * special rollback care is necessary when
+ * 1. at migration failure. (newpage->mapping is cleared in this case)
+ * 2. the newpage was moved but not remapped again because the task
+ * exits and the newpage is obsolete. In this case, the new page
+ * may be a swapcache. So, we just call mem_cgroup_uncharge_page()
+ * always for avoiding mess. The page_cgroup will be removed if
+ * unnecessary. File cache pages is still on radix-tree. Don't
+ * care it.
+ */
+ if (!newpage->mapping)
+ __mem_cgroup_uncharge_common(newpage,
+ MEM_CGROUP_CHARGE_TYPE_FORCE);
+ else if (PageAnon(newpage))
+ mem_cgroup_uncharge_page(newpage);
}
/*
- * We know both *page* and *newpage* are now not-on-LRU and PG_locked.
- * And no race with uncharge() routines because page_cgroup for *page*
- * has extra one reference by mem_cgroup_prepare_migration.
+ * A call to try to shrink memory usage under specified resource controller.
+ * This is typically used for page reclaiming for shmem for reducing side
+ * effect of page allocation from shmem, which is used by some mem_cgroup.
*/
-void mem_cgroup_page_migration(struct page *page, struct page *newpage)
+int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
{
- struct page_cgroup *pc;
- struct mem_cgroup_per_zone *mz;
- unsigned long flags;
+ struct mem_cgroup *mem;
+ int progress = 0;
+ int retry = MEM_CGROUP_RECLAIM_RETRIES;
- lock_page_cgroup(page);
- pc = page_get_page_cgroup(page);
- if (!pc) {
- unlock_page_cgroup(page);
- return;
- }
+ if (mem_cgroup_subsys.disabled)
+ return 0;
- mz = page_cgroup_zoneinfo(pc);
- spin_lock_irqsave(&mz->lru_lock, flags);
- __mem_cgroup_remove_list(mz, pc);
- spin_unlock_irqrestore(&mz->lru_lock, flags);
+ rcu_read_lock();
+ mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
+ css_get(&mem->css);
+ rcu_read_unlock();
- page_assign_page_cgroup(page, NULL);
- unlock_page_cgroup(page);
+ do {
+ progress = try_to_free_mem_cgroup_pages(mem, gfp_mask);
+ } while (!progress && --retry);
- pc->page = newpage;
- lock_page_cgroup(newpage);
- page_assign_page_cgroup(newpage, pc);
+ css_put(&mem->css);
+ if (!retry)
+ return -ENOMEM;
+ return 0;
+}
- mz = page_cgroup_zoneinfo(pc);
- spin_lock_irqsave(&mz->lru_lock, flags);
- __mem_cgroup_add_list(mz, pc);
- spin_unlock_irqrestore(&mz->lru_lock, flags);
+int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val)
+{
+
+ int retry_count = MEM_CGROUP_RECLAIM_RETRIES;
+ int progress;
+ int ret = 0;
- unlock_page_cgroup(newpage);
+ while (res_counter_set_limit(&memcg->res, val)) {
+ if (signal_pending(current)) {
+ ret = -EINTR;
+ break;
+ }
+ if (!retry_count) {
+ ret = -EBUSY;
+ break;
+ }
+ progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL);
+ if (!progress)
+ retry_count--;
+ }
+ return ret;
}
+
/*
* This routine traverse page_cgroup in given list and drop them all.
- * This routine ignores page_cgroup->ref_cnt.
* *And* this routine doesn't reclaim page itself, just removes page_cgroup.
*/
#define FORCE_UNCHARGE_BATCH (128)
page = pc->page;
get_page(page);
spin_unlock_irqrestore(&mz->lru_lock, flags);
- mem_cgroup_uncharge_page(page);
- put_page(page);
- if (--count <= 0) {
- count = FORCE_UNCHARGE_BATCH;
+ /*
+ * Check if this page is on LRU. !LRU page can be found
+ * if it's under page migration.
+ */
+ if (PageLRU(page)) {
+ __mem_cgroup_uncharge_common(page,
+ MEM_CGROUP_CHARGE_TYPE_FORCE);
+ put_page(page);
+ if (--count <= 0) {
+ count = FORCE_UNCHARGE_BATCH;
+ cond_resched();
+ }
+ } else
cond_resched();
- }
spin_lock_irqsave(&mz->lru_lock, flags);
}
spin_unlock_irqrestore(&mz->lru_lock, flags);
int ret = -EBUSY;
int node, zid;
- if (mem_cgroup_subsys.disabled)
- return 0;
-
css_get(&mem->css);
/*
* page reclaim code (kswapd etc..) will move pages between
return ret;
}
-static int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp)
-{
- *tmp = memparse(buf, &buf);
- if (*buf != '\0')
- return -EINVAL;
-
- /*
- * Round up the value to the closest page size
- */
- *tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT;
- return 0;
-}
-
static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
{
return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res,
cft->private);
}
-
-static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
- struct file *file, const char __user *userbuf,
- size_t nbytes, loff_t *ppos)
+/*
+ * The user of this function is...
+ * RES_LIMIT.
+ */
+static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
+ const char *buffer)
{
- return res_counter_write(&mem_cgroup_from_cont(cont)->res,
- cft->private, userbuf, nbytes, ppos,
- mem_cgroup_write_strategy);
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ unsigned long long val;
+ int ret;
+
+ switch (cft->private) {
+ case RES_LIMIT:
+ /* This function does all necessary parse...reuse it */
+ ret = res_counter_memparse_write_strategy(buffer, &val);
+ if (!ret)
+ ret = mem_cgroup_resize_limit(memcg, val);
+ break;
+ default:
+ ret = -EINVAL; /* should be BUG() ? */
+ break;
+ }
+ return ret;
}
static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
{
.name = "limit_in_bytes",
.private = RES_LIMIT,
- .write = mem_cgroup_write,
+ .write_string = mem_cgroup_write,
.read_u64 = mem_cgroup_read,
},
{
static int mem_cgroup_populate(struct cgroup_subsys *ss,
struct cgroup *cont)
{
- if (mem_cgroup_subsys.disabled)
- return 0;
return cgroup_add_files(cont, ss, mem_cgroup_files,
ARRAY_SIZE(mem_cgroup_files));
}
struct mm_struct *mm;
struct mem_cgroup *mem, *old_mem;
- if (mem_cgroup_subsys.disabled)
- return;
-
mm = get_task_mm(p);
if (mm == NULL)
return;
__inc_zone_page_state(newpage, NR_FILE_PAGES);
write_unlock_irq(&mapping->tree_lock);
+ if (!PageSwapCache(newpage)) {
+ mem_cgroup_uncharge_cache_page(page);
+ }
return 0;
}
rc = fallback_migrate_page(mapping, newpage, page);
if (!rc) {
- mem_cgroup_page_migration(page, newpage);
remove_migration_ptes(page, newpage);
} else
newpage->mapping = NULL;
/* page was freed from under us. So we are done. */
goto move_newpage;
+ charge = mem_cgroup_prepare_migration(page, newpage);
+ if (charge == -ENOMEM) {
+ rc = -ENOMEM;
+ goto move_newpage;
+ }
+ /* prepare cgroup just returns 0 or -ENOMEM */
+ BUG_ON(charge);
+
rc = -EAGAIN;
if (TestSetPageLocked(page)) {
if (!force)
goto rcu_unlock;
}
- charge = mem_cgroup_prepare_migration(page);
/* Establish migration ptes or remove ptes */
try_to_unmap(page, 1);
if (!page_mapped(page))
rc = move_to_new_page(newpage, page);
- if (rc) {
+ if (rc)
remove_migration_ptes(page, page);
- if (charge)
- mem_cgroup_end_migration(page);
- } else if (charge)
- mem_cgroup_end_migration(newpage);
rcu_unlock:
if (rcu_locked)
rcu_read_unlock();
}
move_newpage:
+ if (!charge)
+ mem_cgroup_end_migration(newpage);
/*
* Move the new page to the LRU. If migration was not successful
* then this will free the page.
* Thread creation: For how long have there been zero
* available threads?
*/
- if (jiffies - last_empty_jifs > 1 * HZ) {
+ if (time_after(jiffies, last_empty_jifs + 1 * HZ)) {
/* unlocked list_empty() test is OK here */
if (list_empty(&pdflush_list)) {
/* unlocked test is OK here */
if (nr_pdflush_threads <= MIN_PDFLUSH_THREADS)
continue;
pdf = list_entry(pdflush_list.prev, struct pdflush_work, list);
- if (jiffies - pdf->when_i_went_to_sleep > 1 * HZ) {
+ if (time_after(jiffies, pdf->when_i_went_to_sleep + 1 * HZ)) {
/* Limit exit rate */
pdf->when_i_went_to_sleep = jiffies;
break; /* exeunt */
VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
if (atomic_inc_and_test(&page->_mapcount))
__page_set_anon_rmap(page, vma, address);
- else {
+ else
__page_check_anon_rmap(page, vma, address);
- /*
- * We unconditionally charged during prepare, we uncharge here
- * This takes care of balancing the reference counts
- */
- mem_cgroup_uncharge_page(page);
- }
}
/**
{
if (atomic_inc_and_test(&page->_mapcount))
__inc_zone_page_state(page, NR_FILE_MAPPED);
- else
- /*
- * We unconditionally charged during prepare, we uncharge here
- * This takes care of balancing the reference counts
- */
- mem_cgroup_uncharge_page(page);
}
#ifdef CONFIG_DEBUG_VM
error = 1;
if (!inode)
goto out;
- /* Precharge page while we can wait, compensate afterwards */
+ /* Precharge page using GFP_KERNEL while we can wait */
error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
if (error)
goto out;
error = radix_tree_preload(GFP_KERNEL);
- if (error)
- goto uncharge;
+ if (error) {
+ mem_cgroup_uncharge_cache_page(page);
+ goto out;
+ }
error = 1;
spin_lock(&info->lock);
ptr = shmem_swp_entry(info, idx, NULL);
- if (ptr && ptr->val == entry.val)
+ if (ptr && ptr->val == entry.val) {
error = add_to_page_cache(page, inode->i_mapping,
idx, GFP_NOWAIT);
+ /* does mem_cgroup_uncharge_cache_page on error */
+ } else /* we must compensate for our precharge above */
+ mem_cgroup_uncharge_cache_page(page);
+
if (error == -EEXIST) {
struct page *filepage = find_get_page(inode->i_mapping, idx);
error = 1;
shmem_swp_unmap(ptr);
spin_unlock(&info->lock);
radix_tree_preload_end();
-uncharge:
- mem_cgroup_uncharge_page(page);
out:
unlock_page(page);
page_cache_release(page);
shmem_swp_unmap(entry);
spin_unlock(&info->lock);
unlock_page(swappage);
+ page_cache_release(swappage);
if (error == -ENOMEM) {
/* allow reclaim from this memory cgroup */
- error = mem_cgroup_cache_charge(swappage,
- current->mm, gfp & ~__GFP_HIGHMEM);
- if (error) {
- page_cache_release(swappage);
+ error = mem_cgroup_shrink_usage(current->mm,
+ gfp);
+ if (error)
goto failed;
- }
- mem_cgroup_uncharge_page(swappage);
}
- page_cache_release(swappage);
goto repeat;
}
} else if (sgp == SGP_READ && !filepage) {
}
if (!filepage) {
+ int ret;
+
spin_unlock(&info->lock);
filepage = shmem_alloc_page(gfp, info, idx);
if (!filepage) {
swap = *entry;
shmem_swp_unmap(entry);
}
- if (error || swap.val || 0 != add_to_page_cache_lru(
- filepage, mapping, idx, GFP_NOWAIT)) {
+ ret = error || swap.val;
+ if (ret)
+ mem_cgroup_uncharge_cache_page(filepage);
+ else
+ ret = add_to_page_cache_lru(filepage, mapping,
+ idx, GFP_NOWAIT);
+ /*
+ * At add_to_page_cache_lru() failure, uncharge will
+ * be done automatically.
+ */
+ if (ret) {
spin_unlock(&info->lock);
- mem_cgroup_uncharge_page(filepage);
page_cache_release(filepage);
shmem_unacct_blocks(info->flags, 1);
shmem_free_blocks(inode, 1);
goto failed;
goto repeat;
}
- mem_cgroup_uncharge_page(filepage);
info->flags |= SHMEM_PAGEIN;
}
#include <linux/kthread.h>
#include <linux/freezer.h>
#include <linux/memcontrol.h>
+#include <linux/delayacct.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
struct zone *zone;
enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
+ delayacct_freepages_start();
+
if (scan_global_lru(sc))
count_vm_event(ALLOCSTALL);
/*
} else
mem_cgroup_record_reclaim_priority(sc->mem_cgroup, priority);
+ delayacct_freepages_end();
+
return ret;
}
*/
static struct datalink_proto *find_snap_client(unsigned char *desc)
{
- struct list_head *entry;
struct datalink_proto *proto = NULL, *p;
- list_for_each_rcu(entry, &snap_list) {
- p = list_entry(entry, struct datalink_proto, node);
+ list_for_each_entry_rcu(p, &snap_list, node) {
if (!memcmp(p->type, desc, 5)) {
proto = p;
break;
{
.ctl_name = NET_CORE_MSG_COST,
.procname = "message_cost",
- .data = &net_msg_cost,
+ .data = &net_ratelimit_state.interval,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
{
.ctl_name = NET_CORE_MSG_BURST,
.procname = "message_burst",
- .data = &net_msg_burst,
+ .data = &net_ratelimit_state.burst,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
#include <asm/system.h>
#include <asm/uaccess.h>
-int net_msg_cost __read_mostly = 5*HZ;
-int net_msg_burst __read_mostly = 10;
int net_msg_warn __read_mostly = 1;
EXPORT_SYMBOL(net_msg_warn);
+DEFINE_RATELIMIT_STATE(net_ratelimit_state, 5 * HZ, 10);
/*
* All net warning printk()s should be guarded by this function.
*/
int net_ratelimit(void)
{
- return __printk_ratelimit(net_msg_cost, net_msg_burst);
+ return __ratelimit(&net_ratelimit_state);
}
EXPORT_SYMBOL(net_ratelimit);
static int inet_create(struct net *net, struct socket *sock, int protocol)
{
struct sock *sk;
- struct list_head *p;
struct inet_protosw *answer;
struct inet_sock *inet;
struct proto *answer_prot;
sock->state = SS_UNCONNECTED;
/* Look for the requested type/protocol pair. */
- answer = NULL;
lookup_protocol:
err = -ESOCKTNOSUPPORT;
rcu_read_lock();
- list_for_each_rcu(p, &inetsw[sock->type]) {
- answer = list_entry(p, struct inet_protosw, list);
+ list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {
+ err = 0;
/* Check the non-wild match. */
if (protocol == answer->protocol) {
if (protocol != IPPROTO_IP)
break;
}
err = -EPROTONOSUPPORT;
- answer = NULL;
}
- if (unlikely(answer == NULL)) {
+ if (unlikely(err)) {
if (try_loading_module < 2) {
rcu_read_unlock();
/*
struct inet_sock *inet;
struct ipv6_pinfo *np;
struct sock *sk;
- struct list_head *p;
struct inet_protosw *answer;
struct proto *answer_prot;
unsigned char answer_flags;
build_ehash_secret();
/* Look for the requested type/protocol pair. */
- answer = NULL;
lookup_protocol:
err = -ESOCKTNOSUPPORT;
rcu_read_lock();
- list_for_each_rcu(p, &inetsw6[sock->type]) {
- answer = list_entry(p, struct inet_protosw, list);
+ list_for_each_entry_rcu(answer, &inetsw6[sock->type], list) {
+ err = 0;
/* Check the non-wild match. */
if (protocol == answer->protocol) {
if (protocol != IPPROTO_IP)
break;
}
err = -EPROTONOSUPPORT;
- answer = NULL;
}
- if (!answer) {
+ if (err) {
if (try_loading_module < 2) {
rcu_read_unlock();
/*
return &namespaces->net_ns->sysctl_table_headers;
}
+/* Return standard mode bits for table entry. */
+static int net_ctl_permissions(struct ctl_table_root *root,
+ struct nsproxy *nsproxy,
+ struct ctl_table *table)
+{
+ /* Allow network administrator to have same access as root. */
+ if (capable(CAP_NET_ADMIN)) {
+ int mode = (table->mode >> 6) & 7;
+ return (mode << 6) | (mode << 3) | mode;
+ }
+ return table->mode;
+}
+
static struct ctl_table_root net_sysctl_root = {
.lookup = net_ctl_header_lookup,
+ .permissions = net_ctl_permissions,
};
static LIST_HEAD(net_sysctl_ro_tables);
include scripts/Makefile.host
-mod-fw := $(addprefix $(INSTALL_FW_PATH)/,$(fw-shipped-m))
-
+mod-fw := $(fw-shipped-m)
# If CONFIG_FIRMWARE_IN_KERNEL isn't set, then install the
# firmware for in-kernel drivers too.
ifndef CONFIG_FIRMWARE_IN_KERNEL
-mod-fw += $(addprefix $(INSTALL_FW_PATH)/,$(fw-shipped-y))
+mod-fw += $(fw-shipped-y)
endif
+installed-mod-fw := $(addprefix $(INSTALL_FW_PATH)/,$(mod-fw))
+
installed-fw := $(addprefix $(INSTALL_FW_PATH)/,$(fw-shipped-all))
installed-fw-dirs := $(sort $(dir $(installed-fw))) $(INSTALL_FW_PATH)/.
.PHONY: $(PHONY)
__fw_install: $(installed-fw)
-__fw_modinst: $(mod-fw)
+__fw_modinst: $(installed-mod-fw)
+__fw_modbuild: $(addprefix $(obj)/,$(mod-fw))
FORCE:
# $& (whole re) matches the complete objdump line with the stack growth
# $1 (first bracket) matches the size of the stack growth
#
+# $dre is similar, but for dynamic stack redutions:
+# $& (whole re) matches the complete objdump line with the stack growth
+# $1 (first bracket) matches the dynamic amount of the stack growth
+#
# use anything else and feel the pain ;)
-my (@stack, $re, $x, $xs);
+my (@stack, $re, $dre, $x, $xs);
{
my $arch = shift;
if ($arch eq "") {
$arch = `uname -m`;
+ chomp($arch);
}
$x = "[0-9a-f]"; # hex character
} elsif ($arch =~ /^i[3456]86$/) {
#c0105234: 81 ec ac 05 00 00 sub $0x5ac,%esp
$re = qr/^.*[as][du][db] \$(0x$x{1,8}),\%esp$/o;
+ $dre = qr/^.*[as][du][db] (%.*),\%esp$/o;
} elsif ($arch eq 'x86_64') {
# 2f60: 48 81 ec e8 05 00 00 sub $0x5e8,%rsp
$re = qr/^.*[as][du][db] \$(0x$x{1,8}),\%rsp$/o;
+ $dre = qr/^.*[as][du][db] (\%.*),\%rsp$/o;
} elsif ($arch eq 'ia64') {
#e0000000044011fc: 01 0f fc 8c adds r12=-384,r12
$re = qr/.*adds.*r12=-(([0-9]{2}|[3-9])[0-9]{2}),r12/o;
# 0: 00 e8 38 01 LINK 0x4e0;
$re = qr/.*[[:space:]]LINK[[:space:]]*(0x$x{1,8})/o;
} else {
- print("wrong or unknown architecture\n");
+ print("wrong or unknown architecture \"$arch\"\n");
exit
}
}
next if ($size < 100);
push @stack, "$intro$size\n";
}
+ elsif (defined $dre && $line =~ m/$dre/) {
+ my $size = "Dynamic ($1)";
+
+ next if $line !~ m/^($xs*)/;
+ my $addr = $1;
+ $addr =~ s/ /0/g;
+ $addr = "0x$addr";
+
+ my $intro = "$addr $func [$file]:";
+ my $padlen = 56 - length($intro);
+ while ($padlen > 0) {
+ $intro .= ' ';
+ $padlen -= 8;
+ }
+ push @stack, "$intro$size\n";
+ }
}
print sort bysize @stack;
short type;
short access;
struct list_head list;
+ struct rcu_head rcu;
};
struct dev_cgroup {
return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id));
}
+static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
+{
+ return css_to_devcgroup(task_subsys_state(task, devices_subsys_id));
+}
+
struct cgroup_subsys devices_subsys;
static int devcgroup_can_attach(struct cgroup_subsys *ss,
}
if (whcopy != NULL)
- list_add_tail(&whcopy->list, &dev_cgroup->whitelist);
+ list_add_tail_rcu(&whcopy->list, &dev_cgroup->whitelist);
spin_unlock(&dev_cgroup->lock);
return 0;
}
+static void whitelist_item_free(struct rcu_head *rcu)
+{
+ struct dev_whitelist_item *item;
+
+ item = container_of(rcu, struct dev_whitelist_item, rcu);
+ kfree(item);
+}
+
/*
* called under cgroup_lock()
* since the list is visible to other tasks, we need the spinlock also
remove:
walk->access &= ~wh->access;
if (!walk->access) {
- list_del(&walk->list);
- kfree(walk);
+ list_del_rcu(&walk->list);
+ call_rcu(&walk->rcu, whitelist_item_free);
}
}
spin_unlock(&dev_cgroup->lock);
}
wh->minor = wh->major = ~0;
wh->type = DEV_ALL;
- wh->access = ACC_MKNOD | ACC_READ | ACC_WRITE;
+ wh->access = ACC_MASK;
list_add(&wh->list, &dev_cgroup->whitelist);
} else {
parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup);
static void set_majmin(char *str, unsigned m)
{
- memset(str, 0, MAJMINLEN);
if (m == ~0)
- sprintf(str, "*");
+ strcpy(str, "*");
else
- snprintf(str, MAJMINLEN, "%u", m);
+ sprintf(str, "%u", m);
}
static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft,
struct dev_whitelist_item *wh;
char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
- spin_lock(&devcgroup->lock);
- list_for_each_entry(wh, &devcgroup->whitelist, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(wh, &devcgroup->whitelist, list) {
set_access(acc, wh->access);
set_majmin(maj, wh->major);
set_majmin(min, wh->minor);
seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type),
maj, min, acc);
}
- spin_unlock(&devcgroup->lock);
+ rcu_read_unlock();
return 0;
}
* when adding a new allow rule to a device whitelist, the rule
* must be allowed in the parent device
*/
-static int parent_has_perm(struct cgroup *childcg,
+static int parent_has_perm(struct dev_cgroup *childcg,
struct dev_whitelist_item *wh)
{
- struct cgroup *pcg = childcg->parent;
+ struct cgroup *pcg = childcg->css.cgroup->parent;
struct dev_cgroup *parent;
int ret;
* new access is only allowed if you're in the top-level cgroup, or your
* parent cgroup has the access you're asking for.
*/
-static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft,
- struct file *file, const char __user *userbuf,
- size_t nbytes, loff_t *ppos)
+static int devcgroup_update_access(struct dev_cgroup *devcgroup,
+ int filetype, const char *buffer)
{
- struct cgroup *cur_cgroup;
- struct dev_cgroup *devcgroup, *cur_devcgroup;
- int filetype = cft->private;
- char *buffer, *b;
+ struct dev_cgroup *cur_devcgroup;
+ const char *b;
+ char *endp;
int retval = 0, count;
struct dev_whitelist_item wh;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- devcgroup = cgroup_to_devcgroup(cgroup);
- cur_cgroup = task_cgroup(current, devices_subsys.subsys_id);
- cur_devcgroup = cgroup_to_devcgroup(cur_cgroup);
-
- buffer = kmalloc(nbytes+1, GFP_KERNEL);
- if (!buffer)
- return -ENOMEM;
-
- if (copy_from_user(buffer, userbuf, nbytes)) {
- retval = -EFAULT;
- goto out1;
- }
- buffer[nbytes] = 0; /* nul-terminate */
-
- cgroup_lock();
- if (cgroup_is_removed(cgroup)) {
- retval = -ENODEV;
- goto out2;
- }
+ cur_devcgroup = task_devcgroup(current);
memset(&wh, 0, sizeof(wh));
b = buffer;
wh.type = DEV_CHAR;
break;
default:
- retval = -EINVAL;
- goto out2;
+ return -EINVAL;
}
b++;
- if (!isspace(*b)) {
- retval = -EINVAL;
- goto out2;
- }
+ if (!isspace(*b))
+ return -EINVAL;
b++;
if (*b == '*') {
wh.major = ~0;
b++;
} else if (isdigit(*b)) {
- wh.major = 0;
- while (isdigit(*b)) {
- wh.major = wh.major*10+(*b-'0');
- b++;
- }
+ wh.major = simple_strtoul(b, &endp, 10);
+ b = endp;
} else {
- retval = -EINVAL;
- goto out2;
- }
- if (*b != ':') {
- retval = -EINVAL;
- goto out2;
+ return -EINVAL;
}
+ if (*b != ':')
+ return -EINVAL;
b++;
/* read minor */
wh.minor = ~0;
b++;
} else if (isdigit(*b)) {
- wh.minor = 0;
- while (isdigit(*b)) {
- wh.minor = wh.minor*10+(*b-'0');
- b++;
- }
+ wh.minor = simple_strtoul(b, &endp, 10);
+ b = endp;
} else {
- retval = -EINVAL;
- goto out2;
- }
- if (!isspace(*b)) {
- retval = -EINVAL;
- goto out2;
+ return -EINVAL;
}
+ if (!isspace(*b))
+ return -EINVAL;
for (b++, count = 0; count < 3; count++, b++) {
switch (*b) {
case 'r':
count = 3;
break;
default:
- retval = -EINVAL;
- goto out2;
+ return -EINVAL;
}
}
retval = 0;
switch (filetype) {
case DEVCG_ALLOW:
- if (!parent_has_perm(cgroup, &wh))
- retval = -EPERM;
- else
- retval = dev_whitelist_add(devcgroup, &wh);
- break;
+ if (!parent_has_perm(devcgroup, &wh))
+ return -EPERM;
+ return dev_whitelist_add(devcgroup, &wh);
case DEVCG_DENY:
dev_whitelist_rm(devcgroup, &wh);
break;
default:
- retval = -EINVAL;
- goto out2;
+ return -EINVAL;
}
+ return 0;
+}
- if (retval == 0)
- retval = nbytes;
-
-out2:
+static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft,
+ const char *buffer)
+{
+ int retval;
+ if (!cgroup_lock_live_group(cgrp))
+ return -ENODEV;
+ retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp),
+ cft->private, buffer);
cgroup_unlock();
-out1:
- kfree(buffer);
return retval;
}
static struct cftype dev_cgroup_files[] = {
{
.name = "allow",
- .write = devcgroup_access_write,
+ .write_string = devcgroup_access_write,
.private = DEVCG_ALLOW,
},
{
.name = "deny",
- .write = devcgroup_access_write,
+ .write_string = devcgroup_access_write,
.private = DEVCG_DENY,
},
{
if (!dev_cgroup)
return 0;
- spin_lock(&dev_cgroup->lock);
- list_for_each_entry(wh, &dev_cgroup->whitelist, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) {
if (wh->type & DEV_ALL)
goto acc_check;
if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode))
continue;
if ((mask & MAY_READ) && !(wh->access & ACC_READ))
continue;
- spin_unlock(&dev_cgroup->lock);
+ rcu_read_unlock();
return 0;
}
- spin_unlock(&dev_cgroup->lock);
+ rcu_read_unlock();
return -EPERM;
}
if (!dev_cgroup)
return 0;
- spin_lock(&dev_cgroup->lock);
+ rcu_read_lock();
list_for_each_entry(wh, &dev_cgroup->whitelist, list) {
if (wh->type & DEV_ALL)
goto acc_check;
acc_check:
if (!(wh->access & ACC_MKNOD))
continue;
- spin_unlock(&dev_cgroup->lock);
+ rcu_read_unlock();
return 0;
}
- spin_unlock(&dev_cgroup->lock);
+ rcu_read_unlock();
return -EPERM;
}